Skip to content

Commit

Permalink
drm/i915/gt: Use the local HWSP offset during submission
Browse files Browse the repository at this point in the history
We wrap the timeline on construction of the next request, but there may
still be requests in flight that have not yet finalized the breadcrumb.
(The breadcrumb is delayed as we need engine-local offsets, and for the
virtual engine that is not known until execution.) As such, by the time
we write to the timeline's HWSP offset it may have changed, and we
should use the value we preserved in the request instead.

Though the window is small and infrequent (at full flow we can expect a
timeline's seqno to wrap once every 30 minutes), the impact of writing
the old seqno into the new HWSP is severe: the old requests are never
completed, and the new requests are completed before they are even
submitted.

Fixes: ebece75 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
Signed-off-by: Chris Wilson <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
Cc: Joonas Lahtinen <[email protected]>
Cc: <[email protected]> # v5.2+
Reviewed-by: Mika Kuoppala <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
(cherry picked from commit c10f601)
Signed-off-by: Rodrigo Vivi <[email protected]>
  • Loading branch information
ickle authored and rodrigovivi committed Nov 4, 2020
1 parent 59dd13a commit 8ce7099
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 16 deletions.
27 changes: 19 additions & 8 deletions drivers/gpu/drm/i915/gt/intel_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = {
.destroy = execlists_context_destroy,
};

static u32 hwsp_offset(const struct i915_request *rq)
{
const struct intel_timeline_cacheline *cl;

/* Before the request is executed, the timeline/cachline is fixed */

cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
if (cl)
return cl->ggtt_offset;

return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
}

static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
Expand All @@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;

*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_request_timeline(rq)->hwsp_offset;
*cs++ = hwsp_offset(rq);
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;

Expand Down Expand Up @@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}

static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
u32 addr = i915_request_active_timeline(request)->hwsp_offset;

return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}

static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
Expand All @@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset,
hwsp_offset(request),
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);

Expand All @@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset,
hwsp_offset(request),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
Expand Down Expand Up @@ -4991,7 +5002,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen12_emit_ggtt_write_rcs(cs,
request->fence.seqno,
i915_request_active_timeline(request)->hwsp_offset,
hwsp_offset(request),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
Expand Down
18 changes: 10 additions & 8 deletions drivers/gpu/drm/i915/gt/intel_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
return cl;
}

static void cacheline_acquire(struct intel_timeline_cacheline *cl)
static void cacheline_acquire(struct intel_timeline_cacheline *cl,
u32 ggtt_offset)
{
if (cl)
i915_active_acquire(&cl->active);
if (!cl)
return;

cl->ggtt_offset = ggtt_offset;
i915_active_acquire(&cl->active);
}

static void cacheline_release(struct intel_timeline_cacheline *cl)
Expand Down Expand Up @@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset);

cacheline_acquire(tl->hwsp_cacheline);
cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
if (atomic_fetch_inc(&tl->pin_count)) {
cacheline_release(tl->hwsp_cacheline);
__i915_vma_unpin(tl->hwsp_ggtt);
Expand Down Expand Up @@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
tl->fence_context, tl->hwsp_offset);

cacheline_acquire(cl);
cacheline_acquire(cl, tl->hwsp_offset);
tl->hwsp_cacheline = cl;

*seqno = timeline_advance(tl);
Expand Down Expand Up @@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
if (err)
goto out;

*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;

*hwsp = cl->ggtt_offset;
out:
i915_active_release(&cl->active);
return err;
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_timeline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
struct intel_timeline_hwsp *hwsp;
void *vaddr;

u32 ggtt_offset;

struct rcu_head rcu;
};

Expand Down

0 comments on commit 8ce7099

Please sign in to comment.