Skip to content

Commit 2935ed5

Browse files
committed
drm/i915: Remove logical HW ID
With the introduction of ctx->engines[] we allow multiple logical contexts to be used on the same engine (e.g. with virtual engines). According to bspec, aach logical context requires a unique tag in order for context-switching to occur correctly between them. [Simple experiments show that it is not so easy to trick the HW into performing a lite-restore with matching logical IDs, though my memory from early Broadwell experiments do suggest that it should be generating lite-restores.] We only need to keep a unique tag for the active lifetime of the context, and for as long as we need to identify that context. The HW uses the tag to determine if it should use a lite-restore (why not the LRCA?) and passes the tag back for various status identifies. The only status we need to track is for OA, so when using perf, we assign the specific context a unique tag. v2: Calculate required number of tags to fill ELSP. Fixes: 976b55f ("drm/i915: Allow a context to define its set of engines") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111895 Signed-off-by: Chris Wilson <[email protected]> Acked-by: Daniele Ceraolo Spurio <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent a2b4dea commit 2935ed5

18 files changed

+57
-302
lines changed

drivers/gpu/drm/i915/gem/i915_gem_context.c

Lines changed: 0 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -167,97 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
167167
return i915_gem_context_get_engine(ctx, idx);
168168
}
169169

170-
static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
171-
{
172-
unsigned int max;
173-
174-
lockdep_assert_held(&i915->contexts.mutex);
175-
176-
if (INTEL_GEN(i915) >= 12)
177-
max = GEN12_MAX_CONTEXT_HW_ID;
178-
else if (INTEL_GEN(i915) >= 11)
179-
max = GEN11_MAX_CONTEXT_HW_ID;
180-
else if (USES_GUC_SUBMISSION(i915))
181-
/*
182-
* When using GuC in proxy submission, GuC consumes the
183-
* highest bit in the context id to indicate proxy submission.
184-
*/
185-
max = MAX_GUC_CONTEXT_HW_ID;
186-
else
187-
max = MAX_CONTEXT_HW_ID;
188-
189-
return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
190-
}
191-
192-
static int steal_hw_id(struct drm_i915_private *i915)
193-
{
194-
struct i915_gem_context *ctx, *cn;
195-
LIST_HEAD(pinned);
196-
int id = -ENOSPC;
197-
198-
lockdep_assert_held(&i915->contexts.mutex);
199-
200-
list_for_each_entry_safe(ctx, cn,
201-
&i915->contexts.hw_id_list, hw_id_link) {
202-
if (atomic_read(&ctx->hw_id_pin_count)) {
203-
list_move_tail(&ctx->hw_id_link, &pinned);
204-
continue;
205-
}
206-
207-
GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
208-
list_del_init(&ctx->hw_id_link);
209-
id = ctx->hw_id;
210-
break;
211-
}
212-
213-
/*
214-
* Remember how far we got up on the last repossesion scan, so the
215-
* list is kept in a "least recently scanned" order.
216-
*/
217-
list_splice_tail(&pinned, &i915->contexts.hw_id_list);
218-
return id;
219-
}
220-
221-
static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
222-
{
223-
int ret;
224-
225-
lockdep_assert_held(&i915->contexts.mutex);
226-
227-
/*
228-
* We prefer to steal/stall ourselves and our users over that of the
229-
* entire system. That may be a little unfair to our users, and
230-
* even hurt high priority clients. The choice is whether to oomkill
231-
* something else, or steal a context id.
232-
*/
233-
ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
234-
if (unlikely(ret < 0)) {
235-
ret = steal_hw_id(i915);
236-
if (ret < 0) /* once again for the correct errno code */
237-
ret = new_hw_id(i915, GFP_KERNEL);
238-
if (ret < 0)
239-
return ret;
240-
}
241-
242-
*out = ret;
243-
return 0;
244-
}
245-
246-
static void release_hw_id(struct i915_gem_context *ctx)
247-
{
248-
struct drm_i915_private *i915 = ctx->i915;
249-
250-
if (list_empty(&ctx->hw_id_link))
251-
return;
252-
253-
mutex_lock(&i915->contexts.mutex);
254-
if (!list_empty(&ctx->hw_id_link)) {
255-
ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
256-
list_del_init(&ctx->hw_id_link);
257-
}
258-
mutex_unlock(&i915->contexts.mutex);
259-
}
260-
261170
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
262171
{
263172
while (count--) {
@@ -312,8 +221,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
312221
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
313222
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
314223

315-
release_hw_id(ctx);
316-
317224
free_engines(rcu_access_pointer(ctx->engines));
318225
mutex_destroy(&ctx->engines_mutex);
319226

@@ -386,12 +293,6 @@ static void context_close(struct i915_gem_context *ctx)
386293

387294
ctx->file_priv = ERR_PTR(-EBADF);
388295

389-
/*
390-
* This context will never again be assinged to HW, so we can
391-
* reuse its ID for the next context.
392-
*/
393-
release_hw_id(ctx);
394-
395296
/*
396297
* The LUT uses the VMA as a backpointer to unref the object,
397298
* so we need to clear the LUT before we close all the VMA (inside
@@ -430,7 +331,6 @@ __create_context(struct drm_i915_private *i915)
430331
RCU_INIT_POINTER(ctx->engines, e);
431332

432333
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
433-
INIT_LIST_HEAD(&ctx->hw_id_link);
434334

435335
/* NB: Mark all slices as needing a remap so that when the context first
436336
* loads it will restore whatever remap state already exists. If there
@@ -584,18 +484,11 @@ struct i915_gem_context *
584484
i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
585485
{
586486
struct i915_gem_context *ctx;
587-
int err;
588487

589488
ctx = i915_gem_create_context(i915, 0);
590489
if (IS_ERR(ctx))
591490
return ctx;
592491

593-
err = i915_gem_context_pin_hw_id(ctx);
594-
if (err) {
595-
destroy_kernel_context(&ctx);
596-
return ERR_PTR(err);
597-
}
598-
599492
i915_gem_context_clear_bannable(ctx);
600493
ctx->sched.priority = I915_USER_PRIORITY(prio);
601494

@@ -609,12 +502,6 @@ static void init_contexts(struct drm_i915_private *i915)
609502
mutex_init(&i915->contexts.mutex);
610503
INIT_LIST_HEAD(&i915->contexts.list);
611504

612-
/* Using the simple ida interface, the max is limited by sizeof(int) */
613-
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
614-
BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
615-
ida_init(&i915->contexts.hw_ida);
616-
INIT_LIST_HEAD(&i915->contexts.hw_id_list);
617-
618505
INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
619506
init_llist_head(&i915->contexts.free_list);
620507
}
@@ -634,15 +521,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
634521
DRM_ERROR("Failed to create default global context\n");
635522
return PTR_ERR(ctx);
636523
}
637-
/*
638-
* For easy recognisablity, we want the kernel context to be 0 and then
639-
* all user contexts will have non-zero hw_id. Kernel contexts are
640-
* permanently pinned, so that we never suffer a stall and can
641-
* use them from any allocation context (e.g. for evicting other
642-
* contexts and from inside the shrinker).
643-
*/
644-
GEM_BUG_ON(ctx->hw_id);
645-
GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
646524
dev_priv->kernel_context = ctx;
647525

648526
DRM_DEBUG_DRIVER("%s context support initialized\n",
@@ -656,10 +534,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
656534
lockdep_assert_held(&i915->drm.struct_mutex);
657535

658536
destroy_kernel_context(&i915->kernel_context);
659-
660-
/* Must free all deferred contexts (via flush_workqueue) first */
661-
GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
662-
ida_destroy(&i915->contexts.hw_ida);
663537
}
664538

665539
static int context_idr_cleanup(int id, void *p, void *data)
@@ -2316,33 +2190,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
23162190
return ret;
23172191
}
23182192

2319-
int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
2320-
{
2321-
struct drm_i915_private *i915 = ctx->i915;
2322-
int err = 0;
2323-
2324-
mutex_lock(&i915->contexts.mutex);
2325-
2326-
GEM_BUG_ON(i915_gem_context_is_closed(ctx));
2327-
2328-
if (list_empty(&ctx->hw_id_link)) {
2329-
GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
2330-
2331-
err = assign_hw_id(i915, &ctx->hw_id);
2332-
if (err)
2333-
goto out_unlock;
2334-
2335-
list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
2336-
}
2337-
2338-
GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
2339-
atomic_inc(&ctx->hw_id_pin_count);
2340-
2341-
out_unlock:
2342-
mutex_unlock(&i915->contexts.mutex);
2343-
return err;
2344-
}
2345-
23462193
/* GEM context-engines iterator: for_each_gem_engine() */
23472194
struct intel_context *
23482195
i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)

drivers/gpu/drm/i915/gem/i915_gem_context.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,21 +112,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
112112
clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
113113
}
114114

115-
int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
116-
static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
117-
{
118-
if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
119-
return 0;
120-
121-
return __i915_gem_context_pin_hw_id(ctx);
122-
}
123-
124-
static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
125-
{
126-
GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
127-
atomic_dec(&ctx->hw_id_pin_count);
128-
}
129-
130115
static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
131116
{
132117
return !ctx->file_priv;

drivers/gpu/drm/i915/gem/i915_gem_context_types.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,24 +147,6 @@ struct i915_gem_context {
147147
#define CONTEXT_FORCE_SINGLE_SUBMISSION 2
148148
#define CONTEXT_USER_ENGINES 3
149149

150-
/**
151-
* @hw_id: - unique identifier for the context
152-
*
153-
* The hardware needs to uniquely identify the context for a few
154-
* functions like fault reporting, PASID, scheduling. The
155-
* &drm_i915_private.context_hw_ida is used to assign a unqiue
156-
* id for the lifetime of the context.
157-
*
158-
* @hw_id_pin_count: - number of times this context had been pinned
159-
* for use (should be, at most, once per engine).
160-
*
161-
* @hw_id_link: - all contexts with an assigned id are tracked
162-
* for possible repossession.
163-
*/
164-
unsigned int hw_id;
165-
atomic_t hw_id_pin_count;
166-
struct list_head hw_id_link;
167-
168150
struct mutex mutex;
169151

170152
struct i915_sched_attr sched;

drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -660,9 +660,9 @@ static int igt_ctx_exec(void *arg)
660660

661661
err = gpu_fill(ce, obj, dw);
662662
if (err) {
663-
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
663+
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
664664
ndwords, dw, max_dwords(obj),
665-
engine->name, ctx->hw_id,
665+
engine->name,
666666
yesno(!!ctx->vm), err);
667667
intel_context_put(ce);
668668
kernel_context_close(ctx);
@@ -798,9 +798,9 @@ static int igt_shared_ctx_exec(void *arg)
798798

799799
err = gpu_fill(ce, obj, dw);
800800
if (err) {
801-
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
801+
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
802802
ndwords, dw, max_dwords(obj),
803-
engine->name, ctx->hw_id,
803+
engine->name,
804804
yesno(!!ctx->vm), err);
805805
intel_context_put(ce);
806806
kernel_context_close(ctx);
@@ -1382,10 +1382,9 @@ static int igt_ctx_readonly(void *arg)
13821382

13831383
err = gpu_fill(ce, obj, dw);
13841384
if (err) {
1385-
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
1385+
pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
13861386
ndwords, dw, max_dwords(obj),
1387-
ce->engine->name, ctx->hw_id,
1388-
yesno(!!ctx->vm), err);
1387+
ce->engine->name, yesno(!!ctx->vm), err);
13891388
i915_gem_context_unlock_engines(ctx);
13901389
goto out_unlock;
13911390
}

drivers/gpu/drm/i915/gem/selftests/mock_context.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
1313
{
1414
struct i915_gem_context *ctx;
1515
struct i915_gem_engines *e;
16-
int ret;
1716

1817
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1918
if (!ctx)
@@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915,
3029
RCU_INIT_POINTER(ctx->engines, e);
3130

3231
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
33-
INIT_LIST_HEAD(&ctx->hw_id_link);
3432
mutex_init(&ctx->mutex);
3533

36-
ret = i915_gem_context_pin_hw_id(ctx);
37-
if (ret < 0)
38-
goto err_engines;
39-
4034
if (name) {
4135
struct i915_ppgtt *ppgtt;
4236

@@ -54,8 +48,6 @@ mock_context(struct drm_i915_private *i915,
5448

5549
return ctx;
5650

57-
err_engines:
58-
free_engines(rcu_access_pointer(ctx->engines));
5951
err_free:
6052
kfree(ctx);
6153
return NULL;

drivers/gpu/drm/i915/gt/intel_context_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct intel_context {
5858

5959
u32 *lrc_reg_state;
6060
u64 lrc_desc;
61+
u32 tag; /* cookie passed to HW to track this context on submission */
6162

6263
unsigned int active_count; /* protected by timeline->mutex */
6364

drivers/gpu/drm/i915/gt/intel_engine_types.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,12 @@ struct intel_engine_cs {
303303
u8 uabi_class;
304304
u8 uabi_instance;
305305

306+
u32 uabi_capabilities;
306307
u32 context_size;
307308
u32 mmio_base;
308309

309-
u32 uabi_capabilities;
310+
unsigned int context_tag;
311+
#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
310312

311313
struct rb_node uabi_node;
312314

0 commit comments

Comments
 (0)