LCOV - combined.info - drivers/gpu/drm/i915/gt/uc/intel_guc

LCOV - code coverage report

Current view:	top level - drivers/gpu/drm/i915/gt/uc - intel_guc_submission.c (source / functions)		Hit	Total	Coverage
Test:	combined.info	Lines:	0	247	0.0 %
Date:	2022-03-28 16:04:14	Functions:	0	18	0.0 %
		Branches:	0	64	0.0 %

           Branch data     Line data    Source code

       1                 :            : // SPDX-License-Identifier: MIT
       2                 :            : /*
       3                 :            :  * Copyright © 2014 Intel Corporation
       4                 :            :  */
       5                 :            : 
       6                 :            : #include <linux/circ_buf.h>
       7                 :            : 
       8                 :            : #include "gem/i915_gem_context.h"
       9                 :            : #include "gt/intel_context.h"
      10                 :            : #include "gt/intel_engine_pm.h"
      11                 :            : #include "gt/intel_gt.h"
      12                 :            : #include "gt/intel_gt_pm.h"
      13                 :            : #include "gt/intel_lrc_reg.h"
      14                 :            : #include "gt/intel_ring.h"
      15                 :            : 
      16                 :            : #include "intel_guc_submission.h"
      17                 :            : 
      18                 :            : #include "i915_drv.h"
      19                 :            : #include "i915_trace.h"
      20                 :            : 
      21                 :            : /**
      22                 :            :  * DOC: GuC-based command submission
      23                 :            :  *
      24                 :            :  * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
      25                 :            :  * firmware is moving to an updated submission interface and we plan to
      26                 :            :  * turn submission back on when that lands. The below documentation (and related
      27                 :            :  * code) matches the old submission model and will be updated as part of the
      28                 :            :  * upgrade to the new flow.
      29                 :            :  *
      30                 :            :  * GuC stage descriptor:
      31                 :            :  * During initialization, the driver allocates a static pool of 1024 such
      32                 :            :  * descriptors, and shares them with the GuC. Currently, we only use one
      33                 :            :  * descriptor. This stage descriptor lets the GuC know about the workqueue and
      34                 :            :  * process descriptor. Theoretically, it also lets the GuC know about our HW
      35                 :            :  * contexts (context ID, etc...), but we actually employ a kind of submission
      36                 :            :  * where the GuC uses the LRCA sent via the work item instead. This is called
      37                 :            :  * a "proxy" submission.
      38                 :            :  *
      39                 :            :  * The Scratch registers:
      40                 :            :  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
      41                 :            :  * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
      42                 :            :  * triggers an interrupt on the GuC via another register write (0xC4C8).
      43                 :            :  * Firmware writes a success/fail code back to the action register after
      44                 :            :  * processes the request. The kernel driver polls waiting for this update and
      45                 :            :  * then proceeds.
      46                 :            :  *
      47                 :            :  * Work Items:
      48                 :            :  * There are several types of work items that the host may place into a
      49                 :            :  * workqueue, each with its own requirements and limitations. Currently only
      50                 :            :  * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
      51                 :            :  * represents in-order queue. The kernel driver packs ring tail pointer and an
      52                 :            :  * ELSP context descriptor dword into Work Item.
      53                 :            :  * See guc_add_request()
      54                 :            :  *
      55                 :            :  */
      56                 :            : 
      57                 :          0 : static inline struct i915_priolist *to_priolist(struct rb_node *rb)
      58                 :            : {
      59                 :          0 :         return rb_entry(rb, struct i915_priolist, node);
      60                 :            : }
      61                 :            : 
      62                 :          0 : static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
      63                 :            : {
      64                 :          0 :         struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
      65                 :            : 
      66                 :          0 :         return &base[id];
      67                 :            : }
      68                 :            : 
      69                 :          0 : static int guc_workqueue_create(struct intel_guc *guc)
      70                 :            : {
      71                 :          0 :         return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
      72                 :            :                                               &guc->workqueue_vaddr);
      73                 :            : }
      74                 :            : 
      75                 :          0 : static void guc_workqueue_destroy(struct intel_guc *guc)
      76                 :            : {
      77                 :          0 :         i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
      78                 :          0 : }
      79                 :            : 
      80                 :            : /*
      81                 :            :  * Initialise the process descriptor shared with the GuC firmware.
      82                 :            :  */
      83                 :          0 : static int guc_proc_desc_create(struct intel_guc *guc)
      84                 :            : {
      85                 :          0 :         const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
      86                 :            : 
      87                 :          0 :         return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
      88                 :            :                                               &guc->proc_desc_vaddr);
      89                 :            : }
      90                 :            : 
      91                 :          0 : static void guc_proc_desc_destroy(struct intel_guc *guc)
      92                 :            : {
      93                 :          0 :         i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
      94                 :            : }
      95                 :            : 
      96                 :            : static void guc_proc_desc_init(struct intel_guc *guc)
      97                 :            : {
      98                 :            :         struct guc_process_desc *desc;
      99                 :            : 
     100                 :            :         desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
     101                 :            : 
     102                 :            :         /*
     103                 :            :          * XXX: pDoorbell and WQVBaseAddress are pointers in process address
     104                 :            :          * space for ring3 clients (set them as in mmap_ioctl) or kernel
     105                 :            :          * space for kernel clients (map on demand instead? May make debug
     106                 :            :          * easier to have it mapped).
     107                 :            :          */
     108                 :            :         desc->wq_base_addr = 0;
     109                 :            :         desc->db_base_addr = 0;
     110                 :            : 
     111                 :            :         desc->wq_size_bytes = GUC_WQ_SIZE;
     112                 :            :         desc->wq_status = WQ_STATUS_ACTIVE;
     113                 :            :         desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
     114                 :            : }
     115                 :            : 
     116                 :          0 : static void guc_proc_desc_fini(struct intel_guc *guc)
     117                 :            : {
     118                 :          0 :         memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
     119                 :            : }
     120                 :            : 
     121                 :          0 : static int guc_stage_desc_pool_create(struct intel_guc *guc)
     122                 :            : {
     123                 :          0 :         u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
     124                 :            :                               GUC_MAX_STAGE_DESCRIPTORS);
     125                 :            : 
     126                 :          0 :         return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
     127                 :            :                                               &guc->stage_desc_pool_vaddr);
     128                 :            : }
     129                 :            : 
     130                 :          0 : static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
     131                 :            : {
     132                 :          0 :         i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
     133                 :          0 : }
     134                 :            : 
     135                 :            : /*
     136                 :            :  * Initialise/clear the stage descriptor shared with the GuC firmware.
     137                 :            :  *
     138                 :            :  * This descriptor tells the GuC where (in GGTT space) to find the important
     139                 :            :  * data structures related to work submission (process descriptor, write queue,
     140                 :            :  * etc).
     141                 :            :  */
     142                 :          0 : static void guc_stage_desc_init(struct intel_guc *guc)
     143                 :            : {
     144                 :          0 :         struct guc_stage_desc *desc;
     145                 :            : 
     146                 :            :         /* we only use 1 stage desc, so hardcode it to 0 */
     147                 :          0 :         desc = __get_stage_desc(guc, 0);
     148                 :          0 :         memset(desc, 0, sizeof(*desc));
     149                 :            : 
     150                 :          0 :         desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
     151                 :            :                           GUC_STAGE_DESC_ATTR_KERNEL;
     152                 :            : 
     153                 :          0 :         desc->stage_id = 0;
     154                 :          0 :         desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
     155                 :            : 
     156                 :          0 :         desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
     157                 :          0 :         desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
     158                 :          0 :         desc->wq_size = GUC_WQ_SIZE;
     159                 :          0 : }
     160                 :            : 
     161                 :          0 : static void guc_stage_desc_fini(struct intel_guc *guc)
     162                 :            : {
     163                 :          0 :         struct guc_stage_desc *desc;
     164                 :            : 
     165                 :          0 :         desc = __get_stage_desc(guc, 0);
     166                 :          0 :         memset(desc, 0, sizeof(*desc));
     167                 :            : }
     168                 :            : 
     169                 :            : /* Construct a Work Item and append it to the GuC's Work Queue */
     170                 :          0 : static void guc_wq_item_append(struct intel_guc *guc,
     171                 :            :                                u32 target_engine, u32 context_desc,
     172                 :            :                                u32 ring_tail, u32 fence_id)
     173                 :            : {
     174                 :            :         /* wqi_len is in DWords, and does not include the one-word header */
     175                 :          0 :         const size_t wqi_size = sizeof(struct guc_wq_item);
     176                 :          0 :         const u32 wqi_len = wqi_size / sizeof(u32) - 1;
     177                 :          0 :         struct guc_process_desc *desc = guc->proc_desc_vaddr;
     178                 :          0 :         struct guc_wq_item *wqi;
     179                 :          0 :         u32 wq_off;
     180                 :            : 
     181                 :          0 :         lockdep_assert_held(&guc->wq_lock);
     182                 :            : 
     183                 :            :         /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
     184                 :            :          * should not have the case where structure wqi is across page, neither
     185                 :            :          * wrapped to the beginning. This simplifies the implementation below.
     186                 :            :          *
     187                 :            :          * XXX: if not the case, we need save data to a temp wqi and copy it to
     188                 :            :          * workqueue buffer dw by dw.
     189                 :            :          */
     190                 :          0 :         BUILD_BUG_ON(wqi_size != 16);
     191                 :            : 
     192                 :            :         /* We expect the WQ to be active if we're appending items to it */
     193                 :          0 :         GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE);
     194                 :            : 
     195                 :            :         /* Free space is guaranteed. */
     196                 :          0 :         wq_off = READ_ONCE(desc->tail);
     197                 :          0 :         GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head),
     198                 :            :                               GUC_WQ_SIZE) < wqi_size);
     199                 :          0 :         GEM_BUG_ON(wq_off & (wqi_size - 1));
     200                 :            : 
     201                 :          0 :         wqi = guc->workqueue_vaddr + wq_off;
     202                 :            : 
     203                 :            :         /* Now fill in the 4-word work queue item */
     204                 :          0 :         wqi->header = WQ_TYPE_INORDER |
     205                 :            :                       (wqi_len << WQ_LEN_SHIFT) |
     206                 :          0 :                       (target_engine << WQ_TARGET_SHIFT) |
     207                 :            :                       WQ_NO_WCFLUSH_WAIT;
     208                 :          0 :         wqi->context_desc = context_desc;
     209                 :          0 :         wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
     210                 :          0 :         GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
     211                 :          0 :         wqi->fence_id = fence_id;
     212                 :            : 
     213                 :            :         /* Make the update visible to GuC */
     214                 :          0 :         WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
     215                 :            : }
     216                 :            : 
     217                 :          0 : static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
     218                 :            : {
     219                 :          0 :         struct intel_engine_cs *engine = rq->engine;
     220                 :          0 :         u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
     221                 :          0 :         u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
     222                 :            : 
     223                 :          0 :         guc_wq_item_append(guc, engine->guc_id, ctx_desc,
     224                 :          0 :                            ring_tail, rq->fence.seqno);
     225                 :            : }
     226                 :            : 
     227                 :            : /*
     228                 :            :  * When we're doing submissions using regular execlists backend, writing to
     229                 :            :  * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
     230                 :            :  * pinned in mappable aperture portion of GGTT are visible to command streamer.
     231                 :            :  * Writes done by GuC on our behalf are not guaranteeing such ordering,
     232                 :            :  * therefore, to ensure the flush, we're issuing a POSTING READ.
     233                 :            :  */
     234                 :          0 : static void flush_ggtt_writes(struct i915_vma *vma)
     235                 :            : {
     236         [ #  # ]:          0 :         if (i915_vma_is_map_and_fenceable(vma))
     237                 :          0 :                 intel_uncore_posting_read_fw(vma->vm->gt->uncore,
     238                 :            :                                              GUC_STATUS);
     239                 :          0 : }
     240                 :            : 
     241                 :            : static void guc_submit(struct intel_engine_cs *engine,
     242                 :            :                        struct i915_request **out,
     243                 :            :                        struct i915_request **end)
     244                 :            : {
     245                 :            :         struct intel_guc *guc = &engine->gt->uc.guc;
     246                 :            : 
     247                 :            :         spin_lock(&guc->wq_lock);
     248                 :            : 
     249                 :            :         do {
     250                 :            :                 struct i915_request *rq = *out++;
     251                 :            : 
     252                 :            :                 flush_ggtt_writes(rq->ring->vma);
     253                 :            :                 guc_add_request(guc, rq);
     254                 :            :         } while (out != end);
     255                 :            : 
     256                 :            :         spin_unlock(&guc->wq_lock);
     257                 :            : }
     258                 :            : 
     259                 :            : static inline int rq_prio(const struct i915_request *rq)
     260                 :            : {
     261                 :            :         return rq->sched.attr.priority | __NO_PREEMPTION;
     262                 :            : }
     263                 :            : 
     264                 :            : static struct i915_request *schedule_in(struct i915_request *rq, int idx)
     265                 :            : {
     266                 :            :         trace_i915_request_in(rq, idx);
     267                 :            : 
     268                 :            :         /*
     269                 :            :          * Currently we are not tracking the rq->context being inflight
     270                 :            :          * (ce->inflight = rq->engine). It is only used by the execlists
     271                 :            :          * backend at the moment, a similar counting strategy would be
     272                 :            :          * required if we generalise the inflight tracking.
     273                 :            :          */
     274                 :            : 
     275                 :            :         __intel_gt_pm_get(rq->engine->gt);
     276                 :            :         return i915_request_get(rq);
     277                 :            : }
     278                 :            : 
     279                 :          0 : static void schedule_out(struct i915_request *rq)
     280                 :            : {
     281                 :          0 :         trace_i915_request_out(rq);
     282                 :            : 
     283                 :          0 :         intel_gt_pm_put_async(rq->engine->gt);
     284                 :          0 :         i915_request_put(rq);
     285                 :          0 : }
     286                 :            : 
     287                 :          0 : static void __guc_dequeue(struct intel_engine_cs *engine)
     288                 :            : {
     289                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     290                 :          0 :         struct i915_request **first = execlists->inflight;
     291                 :          0 :         struct i915_request ** const last_port = first + execlists->port_mask;
     292                 :          0 :         struct i915_request *last = first[0];
     293                 :          0 :         struct i915_request **port;
     294                 :          0 :         bool submit = false;
     295                 :          0 :         struct rb_node *rb;
     296                 :            : 
     297                 :          0 :         lockdep_assert_held(&engine->active.lock);
     298                 :            : 
     299         [ #  # ]:          0 :         if (last) {
     300         [ #  # ]:          0 :                 if (*++first)
     301                 :            :                         return;
     302                 :            : 
     303                 :            :                 last = NULL;
     304                 :            :         }
     305                 :            : 
     306                 :            :         /*
     307                 :            :          * We write directly into the execlists->inflight queue and don't use
     308                 :            :          * the execlists->pending queue, as we don't have a distinct switch
     309                 :            :          * event.
     310                 :            :          */
     311                 :          0 :         port = first;
     312         [ #  # ]:          0 :         while ((rb = rb_first_cached(&execlists->queue))) {
     313                 :          0 :                 struct i915_priolist *p = to_priolist(rb);
     314                 :          0 :                 struct i915_request *rq, *rn;
     315                 :          0 :                 int i;
     316                 :            : 
     317   [ #  #  #  # ]:          0 :                 priolist_for_each_request_consume(rq, rn, p, i) {
     318   [ #  #  #  # ]:          0 :                         if (last && rq->context != last->context) {
     319         [ #  # ]:          0 :                                 if (port == last_port)
     320                 :          0 :                                         goto done;
     321                 :            : 
     322                 :          0 :                                 *port = schedule_in(last,
     323                 :            :                                                     port - execlists->inflight);
     324                 :          0 :                                 port++;
     325                 :            :                         }
     326                 :            : 
     327                 :          0 :                         list_del_init(&rq->sched.link);
     328                 :          0 :                         __i915_request_submit(rq);
     329                 :          0 :                         submit = true;
     330                 :          0 :                         last = rq;
     331                 :            :                 }
     332                 :            : 
     333                 :          0 :                 rb_erase_cached(&p->node, &execlists->queue);
     334         [ #  # ]:          0 :                 i915_priolist_free(p);
     335                 :            :         }
     336                 :          0 : done:
     337                 :          0 :         execlists->queue_priority_hint =
     338         [ #  # ]:          0 :                 rb ? to_priolist(rb)->priority : INT_MIN;
     339         [ #  # ]:          0 :         if (submit) {
     340                 :          0 :                 *port = schedule_in(last, port - execlists->inflight);
     341                 :          0 :                 *++port = NULL;
     342                 :          0 :                 guc_submit(engine, first, port);
     343                 :            :         }
     344                 :          0 :         execlists->active = execlists->inflight;
     345                 :            : }
     346                 :            : 
     347                 :          0 : static void guc_submission_tasklet(unsigned long data)
     348                 :            : {
     349                 :          0 :         struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
     350                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     351                 :          0 :         struct i915_request **port, *rq;
     352                 :          0 :         unsigned long flags;
     353                 :            : 
     354                 :          0 :         spin_lock_irqsave(&engine->active.lock, flags);
     355                 :            : 
     356         [ #  # ]:          0 :         for (port = execlists->inflight; (rq = *port); port++) {
     357         [ #  # ]:          0 :                 if (!i915_request_completed(rq))
     358                 :            :                         break;
     359                 :            : 
     360                 :          0 :                 schedule_out(rq);
     361                 :            :         }
     362         [ #  # ]:          0 :         if (port != execlists->inflight) {
     363                 :          0 :                 int idx = port - execlists->inflight;
     364                 :          0 :                 int rem = ARRAY_SIZE(execlists->inflight) - idx;
     365                 :          0 :                 memmove(execlists->inflight, port, rem * sizeof(*port));
     366                 :            :         }
     367                 :            : 
     368                 :          0 :         __guc_dequeue(engine);
     369                 :            : 
     370                 :          0 :         spin_unlock_irqrestore(&engine->active.lock, flags);
     371                 :          0 : }
     372                 :            : 
     373                 :          0 : static void guc_reset_prepare(struct intel_engine_cs *engine)
     374                 :            : {
     375                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     376                 :            : 
     377                 :          0 :         ENGINE_TRACE(engine, "\n");
     378                 :            : 
     379                 :            :         /*
     380                 :            :          * Prevent request submission to the hardware until we have
     381                 :            :          * completed the reset in i915_gem_reset_finish(). If a request
     382                 :            :          * is completed by one engine, it may then queue a request
     383                 :            :          * to a second via its execlists->tasklet *just* as we are
     384                 :            :          * calling engine->init_hw() and also writing the ELSP.
     385                 :            :          * Turning off the execlists->tasklet until the reset is over
     386                 :            :          * prevents the race.
     387                 :            :          */
     388                 :          0 :         __tasklet_disable_sync_once(&execlists->tasklet);
     389                 :          0 : }
     390                 :            : 
     391                 :            : static void
     392                 :          0 : cancel_port_requests(struct intel_engine_execlists * const execlists)
     393                 :            : {
     394                 :          0 :         struct i915_request * const *port, *rq;
     395                 :            : 
     396                 :            :         /* Note we are only using the inflight and not the pending queue */
     397                 :            : 
     398         [ #  # ]:          0 :         for (port = execlists->active; (rq = *port); port++)
     399                 :          0 :                 schedule_out(rq);
     400                 :          0 :         execlists->active =
     401                 :          0 :                 memset(execlists->inflight, 0, sizeof(execlists->inflight));
     402                 :          0 : }
     403                 :            : 
     404                 :          0 : static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
     405                 :            : {
     406                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     407                 :          0 :         struct i915_request *rq;
     408                 :          0 :         unsigned long flags;
     409                 :            : 
     410                 :          0 :         spin_lock_irqsave(&engine->active.lock, flags);
     411                 :            : 
     412                 :          0 :         cancel_port_requests(execlists);
     413                 :            : 
     414                 :            :         /* Push back any incomplete requests for replay after the reset. */
     415                 :          0 :         rq = execlists_unwind_incomplete_requests(execlists);
     416         [ #  # ]:          0 :         if (!rq)
     417                 :          0 :                 goto out_unlock;
     418                 :            : 
     419         [ #  # ]:          0 :         if (!i915_request_started(rq))
     420                 :          0 :                 stalled = false;
     421                 :            : 
     422                 :          0 :         __i915_request_reset(rq, stalled);
     423                 :          0 :         intel_lr_context_reset(engine, rq->context, rq->head, stalled);
     424                 :            : 
     425                 :          0 : out_unlock:
     426                 :          0 :         spin_unlock_irqrestore(&engine->active.lock, flags);
     427                 :          0 : }
     428                 :            : 
     429                 :          0 : static void guc_reset_cancel(struct intel_engine_cs *engine)
     430                 :            : {
     431                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     432                 :          0 :         struct i915_request *rq, *rn;
     433                 :          0 :         struct rb_node *rb;
     434                 :          0 :         unsigned long flags;
     435                 :            : 
     436                 :          0 :         ENGINE_TRACE(engine, "\n");
     437                 :            : 
     438                 :            :         /*
     439                 :            :          * Before we call engine->cancel_requests(), we should have exclusive
     440                 :            :          * access to the submission state. This is arranged for us by the
     441                 :            :          * caller disabling the interrupt generation, the tasklet and other
     442                 :            :          * threads that may then access the same state, giving us a free hand
     443                 :            :          * to reset state. However, we still need to let lockdep be aware that
     444                 :            :          * we know this state may be accessed in hardirq context, so we
     445                 :            :          * disable the irq around this manipulation and we want to keep
     446                 :            :          * the spinlock focused on its duties and not accidentally conflate
     447                 :            :          * coverage to the submission's irq state. (Similarly, although we
     448                 :            :          * shouldn't need to disable irq around the manipulation of the
     449                 :            :          * submission's irq state, we also wish to remind ourselves that
     450                 :            :          * it is irq state.)
     451                 :            :          */
     452                 :          0 :         spin_lock_irqsave(&engine->active.lock, flags);
     453                 :            : 
     454                 :            :         /* Cancel the requests on the HW and clear the ELSP tracker. */
     455                 :          0 :         cancel_port_requests(execlists);
     456                 :            : 
     457                 :            :         /* Mark all executing requests as skipped. */
     458         [ #  # ]:          0 :         list_for_each_entry(rq, &engine->active.requests, sched.link) {
     459         [ #  # ]:          0 :                 if (!i915_request_signaled(rq))
     460                 :          0 :                         dma_fence_set_error(&rq->fence, -EIO);
     461                 :            : 
     462                 :          0 :                 i915_request_mark_complete(rq);
     463                 :            :         }
     464                 :            : 
     465                 :            :         /* Flush the queued requests to the timeline list (for retiring). */
     466         [ #  # ]:          0 :         while ((rb = rb_first_cached(&execlists->queue))) {
     467                 :          0 :                 struct i915_priolist *p = to_priolist(rb);
     468                 :          0 :                 int i;
     469                 :            : 
     470   [ #  #  #  # ]:          0 :                 priolist_for_each_request_consume(rq, rn, p, i) {
     471                 :          0 :                         list_del_init(&rq->sched.link);
     472                 :          0 :                         __i915_request_submit(rq);
     473                 :          0 :                         dma_fence_set_error(&rq->fence, -EIO);
     474                 :          0 :                         i915_request_mark_complete(rq);
     475                 :            :                 }
     476                 :            : 
     477                 :          0 :                 rb_erase_cached(&p->node, &execlists->queue);
     478         [ #  # ]:          0 :                 i915_priolist_free(p);
     479                 :            :         }
     480                 :            : 
     481                 :            :         /* Remaining _unready_ requests will be nop'ed when submitted */
     482                 :            : 
     483                 :          0 :         execlists->queue_priority_hint = INT_MIN;
     484                 :          0 :         execlists->queue = RB_ROOT_CACHED;
     485                 :            : 
     486                 :          0 :         spin_unlock_irqrestore(&engine->active.lock, flags);
     487                 :          0 : }
     488                 :            : 
     489                 :          0 : static void guc_reset_finish(struct intel_engine_cs *engine)
     490                 :            : {
     491                 :          0 :         struct intel_engine_execlists * const execlists = &engine->execlists;
     492                 :            : 
     493         [ #  # ]:          0 :         if (__tasklet_enable(&execlists->tasklet))
     494                 :            :                 /* And kick in case we missed a new request submission. */
     495                 :          0 :                 tasklet_hi_schedule(&execlists->tasklet);
     496                 :            : 
     497                 :          0 :         ENGINE_TRACE(engine, "depth->%d\n",
     498                 :            :                      atomic_read(&execlists->tasklet.count));
     499                 :          0 : }
     500                 :            : 
     501                 :            : /*
     502                 :            :  * Everything below here is concerned with setup & teardown, and is
     503                 :            :  * therefore not part of the somewhat time-critical batch-submission
     504                 :            :  * path of guc_submit() above.
     505                 :            :  */
     506                 :            : 
     507                 :            : /*
     508                 :            :  * Set up the memory resources to be shared with the GuC (via the GGTT)
     509                 :            :  * at firmware loading time.
     510                 :            :  */
     511                 :          0 : int intel_guc_submission_init(struct intel_guc *guc)
     512                 :            : {
     513                 :          0 :         int ret;
     514                 :            : 
     515         [ #  # ]:          0 :         if (guc->stage_desc_pool)
     516                 :            :                 return 0;
     517                 :            : 
     518                 :          0 :         ret = guc_stage_desc_pool_create(guc);
     519         [ #  # ]:          0 :         if (ret)
     520                 :            :                 return ret;
     521                 :            :         /*
     522                 :            :          * Keep static analysers happy, let them know that we allocated the
     523                 :            :          * vma after testing that it didn't exist earlier.
     524                 :            :          */
     525                 :          0 :         GEM_BUG_ON(!guc->stage_desc_pool);
     526                 :            : 
     527                 :          0 :         ret = guc_workqueue_create(guc);
     528         [ #  # ]:          0 :         if (ret)
     529                 :          0 :                 goto err_pool;
     530                 :            : 
     531                 :          0 :         ret = guc_proc_desc_create(guc);
     532         [ #  # ]:          0 :         if (ret)
     533                 :          0 :                 goto err_workqueue;
     534                 :            : 
     535                 :          0 :         spin_lock_init(&guc->wq_lock);
     536                 :            : 
     537                 :          0 :         return 0;
     538                 :            : 
     539                 :            : err_workqueue:
     540                 :          0 :         guc_workqueue_destroy(guc);
     541                 :          0 : err_pool:
     542                 :          0 :         guc_stage_desc_pool_destroy(guc);
     543                 :          0 :         return ret;
     544                 :            : }
     545                 :            : 
     546                 :          0 : void intel_guc_submission_fini(struct intel_guc *guc)
     547                 :            : {
     548         [ #  # ]:          0 :         if (guc->stage_desc_pool) {
     549                 :          0 :                 guc_proc_desc_destroy(guc);
     550                 :          0 :                 guc_workqueue_destroy(guc);
     551                 :          0 :                 guc_stage_desc_pool_destroy(guc);
     552                 :            :         }
     553                 :          0 : }
     554                 :            : 
     555                 :            : static void guc_interrupts_capture(struct intel_gt *gt)
     556                 :            : {
     557                 :            :         struct intel_uncore *uncore = gt->uncore;
     558                 :            :         u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
     559                 :            :         u32 dmask = irqs << 16 | irqs;
     560                 :            : 
     561                 :            :         GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
     562                 :            : 
     563                 :            :         /* Don't handle the ctx switch interrupt in GuC submission mode */
     564                 :            :         intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
     565                 :            :         intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
     566                 :            : }
     567                 :            : 
     568                 :            : static void guc_interrupts_release(struct intel_gt *gt)
     569                 :            : {
     570                 :            :         struct intel_uncore *uncore = gt->uncore;
     571                 :            :         u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
     572                 :            :         u32 dmask = irqs << 16 | irqs;
     573                 :            : 
     574                 :            :         GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
     575                 :            : 
     576                 :            :         /* Handle ctx switch interrupts again */
     577                 :            :         intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
     578                 :            :         intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
     579                 :            : }
     580                 :            : 
     581                 :          0 : static void guc_set_default_submission(struct intel_engine_cs *engine)
     582                 :            : {
     583                 :            :         /*
     584                 :            :          * We inherit a bunch of functions from execlists that we'd like
     585                 :            :          * to keep using:
     586                 :            :          *
     587                 :            :          *    engine->submit_request = execlists_submit_request;
     588                 :            :          *    engine->cancel_requests = execlists_cancel_requests;
     589                 :            :          *    engine->schedule = execlists_schedule;
     590                 :            :          *
     591                 :            :          * But we need to override the actual submission backend in order
     592                 :            :          * to talk to the GuC.
     593                 :            :          */
     594                 :          0 :         intel_execlists_set_default_submission(engine);
     595                 :            : 
     596                 :          0 :         engine->execlists.tasklet.func = guc_submission_tasklet;
     597                 :            : 
     598                 :            :         /* do not use execlists park/unpark */
     599                 :          0 :         engine->park = engine->unpark = NULL;
     600                 :            : 
     601                 :          0 :         engine->reset.prepare = guc_reset_prepare;
     602                 :          0 :         engine->reset.rewind = guc_reset_rewind;
     603                 :          0 :         engine->reset.cancel = guc_reset_cancel;
     604                 :          0 :         engine->reset.finish = guc_reset_finish;
     605                 :            : 
     606                 :          0 :         engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
     607                 :          0 :         engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
     608                 :            : 
     609                 :            :         /*
     610                 :            :          * For the breadcrumb irq to work we need the interrupts to stay
     611                 :            :          * enabled. However, on all platforms on which we'll have support for
     612                 :            :          * GuC submission we don't allow disabling the interrupts at runtime, so
     613                 :            :          * we're always safe with the current flow.
     614                 :            :          */
     615                 :          0 :         GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
     616                 :          0 : }
     617                 :            : 
     618                 :          0 : void intel_guc_submission_enable(struct intel_guc *guc)
     619                 :            : {
     620                 :          0 :         struct intel_gt *gt = guc_to_gt(guc);
     621                 :          0 :         struct intel_engine_cs *engine;
     622                 :          0 :         enum intel_engine_id id;
     623                 :            : 
     624                 :            :         /*
     625                 :            :          * We're using GuC work items for submitting work through GuC. Since
     626                 :            :          * we're coalescing multiple requests from a single context into a
     627                 :            :          * single work item prior to assigning it to execlist_port, we can
     628                 :            :          * never have more work items than the total number of ports (for all
     629                 :            :          * engines). The GuC firmware is controlling the HEAD of work queue,
     630                 :            :          * and it is guaranteed that it will remove the work item from the
     631                 :            :          * queue before our request is completed.
     632                 :            :          */
     633                 :          0 :         BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
     634                 :            :                      sizeof(struct guc_wq_item) *
     635                 :            :                      I915_NUM_ENGINES > GUC_WQ_SIZE);
     636                 :            : 
     637                 :          0 :         guc_proc_desc_init(guc);
     638                 :          0 :         guc_stage_desc_init(guc);
     639                 :            : 
     640                 :            :         /* Take over from manual control of ELSP (execlists) */
     641                 :          0 :         guc_interrupts_capture(gt);
     642                 :            : 
     643   [ #  #  #  # ]:          0 :         for_each_engine(engine, gt, id) {
     644                 :          0 :                 engine->set_default_submission = guc_set_default_submission;
     645                 :          0 :                 engine->set_default_submission(engine);
     646                 :            :         }
     647                 :          0 : }
     648                 :            : 
     649                 :          0 : void intel_guc_submission_disable(struct intel_guc *guc)
     650                 :            : {
     651                 :          0 :         struct intel_gt *gt = guc_to_gt(guc);
     652                 :            : 
     653                 :          0 :         GEM_BUG_ON(gt->awake); /* GT should be parked first */
     654                 :            : 
     655                 :            :         /* Note: By the time we're here, GuC may have already been reset */
     656                 :            : 
     657                 :          0 :         guc_interrupts_release(gt);
     658                 :            : 
     659                 :          0 :         guc_stage_desc_fini(guc);
     660                 :          0 :         guc_proc_desc_fini(guc);
     661                 :          0 : }
     662                 :            : 
     663                 :          0 : static bool __guc_submission_support(struct intel_guc *guc)
     664                 :            : {
     665                 :            :         /* XXX: GuC submission is unavailable for now */
     666                 :          0 :         return false;
     667                 :            : 
     668                 :            :         if (!intel_guc_is_supported(guc))
     669                 :            :                 return false;
     670                 :            : 
     671                 :            :         return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
     672                 :            : }
     673                 :            : 
     674                 :          0 : void intel_guc_submission_init_early(struct intel_guc *guc)
     675                 :            : {
     676                 :          0 :         guc->submission_supported = __guc_submission_support(guc);
     677                 :          0 : }
     678                 :            : 
     679                 :          0 : bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
     680                 :            : {
     681                 :          0 :         return engine->set_default_submission == guc_set_default_submission;
     682                 :            : }

Generated by: LCOV version 1.14