Skip to content

Commit

Permalink
drm/i915: Keep timeline HWSP allocated until idle across the system
Browse files Browse the repository at this point in the history
In preparation for enabling HW semaphores, we need to keep in flight
timeline HWSP alive until its use across entire system has completed,
as any other timeline active on the GPU may still refer back to the
already retired timeline. We both have to delay recycling available
cachelines and unpinning old HWSP until the next idle point.

An easy option would be to simply keep all used HWSP until the system as
a whole was idle, i.e. we could release them all at once on parking.
However, on a busy system, we may never see a global idle point,
essentially meaning the resource will be leaked until we are forced to
do a GC pass. We already employ a fine-grained idle detection mechanism
for vma, which we can reuse here so that each cacheline can be freed
immediately after the last request using it is retired.

v3: Keep track of the activity of each cacheline.
v4: cacheline_free() on canceling the seqno tracking
v5: Finally with a testcase to exercise wraparound
v6: Pack cacheline into empty bits of page-aligned vaddr
v7: Use i915_utils to hide the pointer casting around bit manipulation

Signed-off-by: Chris Wilson <[email protected]>
Reviewed-by: Tvrtko Ursulin <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
ickle committed Mar 1, 2019
1 parent 1e3f697 commit ebece75
Show file tree
Hide file tree
Showing 5 changed files with 420 additions and 39 deletions.
31 changes: 16 additions & 15 deletions drivers/gpu/drm/i915/i915_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,6 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (tmp != rq);
}

static u32 timeline_get_seqno(struct i915_timeline *tl)
{
return tl->seqno += 1 + tl->has_initial_breadcrumb;
}

static void move_to_timeline(struct i915_request *request,
struct i915_timeline *timeline)
{
Expand Down Expand Up @@ -532,8 +527,10 @@ struct i915_request *
i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
{
struct drm_i915_private *i915 = engine->i915;
struct i915_request *rq;
struct intel_context *ce;
struct i915_timeline *tl;
struct i915_request *rq;
u32 seqno;
int ret;

lockdep_assert_held(&i915->drm.struct_mutex);
Expand Down Expand Up @@ -610,24 +607,27 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
}
}

rq->rcustate = get_state_synchronize_rcu();

INIT_LIST_HEAD(&rq->active_list);

tl = ce->ring->timeline;
ret = i915_timeline_get_seqno(tl, rq, &seqno);
if (ret)
goto err_free;

rq->i915 = i915;
rq->engine = engine;
rq->gem_context = ctx;
rq->hw_context = ce;
rq->ring = ce->ring;
rq->timeline = ce->ring->timeline;
rq->timeline = tl;
GEM_BUG_ON(rq->timeline == &engine->timeline);
rq->hwsp_seqno = rq->timeline->hwsp_seqno;
rq->hwsp_seqno = tl->hwsp_seqno;
rq->hwsp_cacheline = tl->hwsp_cacheline;
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */

spin_lock_init(&rq->lock);
dma_fence_init(&rq->fence,
&i915_fence_ops,
&rq->lock,
rq->timeline->fence_context,
timeline_get_seqno(rq->timeline));
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
tl->fence_context, seqno);

/* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
Expand Down Expand Up @@ -687,6 +687,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));

err_free:
kmem_cache_free(global.slab_requests, rq);
err_unreserve:
mutex_unlock(&ce->ring->timeline->mutex);
Expand Down
11 changes: 11 additions & 0 deletions drivers/gpu/drm/i915/i915_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct drm_file;
struct drm_i915_gem_object;
struct i915_request;
struct i915_timeline;
struct i915_timeline_cacheline;

struct i915_capture_list {
struct i915_capture_list *next;
Expand Down Expand Up @@ -148,6 +149,16 @@ struct i915_request {
*/
const u32 *hwsp_seqno;

/*
* If we need to access the timeline's seqno for this request in
* another request, we need to keep a read reference to this associated
* cacheline, so that we do not free and recycle it before the foreign
* observers have completed. Hence, we keep a pointer to the cacheline
* inside the timeline's HWSP vma, but it is only valid while this
* request has not completed and guarded by the timeline mutex.
*/
struct i915_timeline_cacheline *hwsp_cacheline;

/** Position in the ring of the start of the request */
u32 head;

Expand Down
Loading

0 comments on commit ebece75

Please sign in to comment.