Commit 6f2f7c83 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-gt-next-2021-10-21' of...

Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- Expose multi-LRC submission interface

  Similar to the bonded submission interface but simplified.
  Comes with GuC only implementation for now. See kerneldoc
  for more details.

  Userspace changes: https://github.com/intel/media-driver/pull/1252

- Expose logical engine instance to user

  Needed by the multi-LRC submission interface for GuC

  Userspace changes: https://github.com/intel/media-driver/pull/1252

Driver Changes:

- Fix blank screen booting crashes when CONFIG_CC_OPTIMIZE_FOR_SIZE=y (Hugh)
- Add support for multi-LRC submission in the GuC backend (Matt B)
- Add extra cache flushing before making pages userspace visible (Matt A, Thomas)
- Mark internal GPU object pages dirty so they will be flushed properly (Matt A)

- Move remaining debugfs interfaces i915_wedged/i915_forcewake_user into gt (Andi)
- Replace the unconditional clflushes with drm_clflush_virt_range() (Ville)
- Remove IS_ACTIVE macro completely (Lucas)
- Improve kerneldocs for cache_dirty (Matt A)

- Add missing includes (Lucas)
- Selftest improvements (Matt R, Ran, Matt A)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YXFmLKoq8Fg9JxSd@jlahtine-mobl.ger.corp.intel.com
parents 94ff371e ab5d964c
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2021 Intel Corporation
*/
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
/**
* struct drm_i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior described below. Lastly, the default is to not allow BBs to
* preempted mid BB rather insert coordinated preemption on all hardware
* contexts between each set of BBs. Flags may be added in the future to change
* both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Example 1 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can also be thought of as 2 virtual engines described by 2-D array
* in the engines the field with bonds placed between each index of the
* virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
* CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logical contiguous, return -EINVAL
*/
struct drm_i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine
*/
__u16 width;
/**
* @num_siblings: number of siblings per context
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __packed;
...@@ -135,8 +135,8 @@ Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and ...@@ -135,8 +135,8 @@ Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and
drm_i915_context_engines_parallel_submit to the uAPI to implement this drm_i915_context_engines_parallel_submit to the uAPI to implement this
extension. extension.
.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h .. kernel-doc:: include/uapi/drm/i915_drm.h
:functions: drm_i915_context_engines_parallel_submit :functions: i915_context_engines_parallel_submit
Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL
------------------------------------------------------------------- -------------------------------------------------------------------
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
* Copyright © 2014-2016 Intel Corporation * Copyright © 2014-2016 Intel Corporation
*/ */
#include <linux/dma-fence-array.h>
#include "gt/intel_engine.h" #include "gt/intel_engine.h"
#include "i915_gem_ioctls.h" #include "i915_gem_ioctls.h"
...@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) ...@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
} }
static __always_inline unsigned int static __always_inline unsigned int
__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) __busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
{ {
const struct i915_request *rq; const struct i915_request *rq;
...@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) ...@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
* to eventually flush us, but to minimise latency just ask the * to eventually flush us, but to minimise latency just ask the
* hardware. * hardware.
* *
* Note we only report on the status of native fences. * Note we only report on the status of native fences and we currently
* have two native fences:
*
* 1. A composite fence (dma_fence_array) constructed of i915 requests
* created during a parallel submission. In this case we deconstruct the
* composite fence into individual i915 requests and check the status of
* each request.
*
* 2. A single i915 request.
*/ */
if (!dma_fence_is_i915(fence)) if (dma_fence_is_array(fence)) {
struct dma_fence_array *array = to_dma_fence_array(fence);
struct dma_fence **child = array->fences;
unsigned int nchild = array->num_fences;
do {
struct dma_fence *current_fence = *child++;
/* Not an i915 fence, can't be busy per above */
if (!dma_fence_is_i915(current_fence) ||
!test_bit(I915_FENCE_FLAG_COMPOSITE,
&current_fence->flags)) {
return 0;
}
rq = to_request(current_fence);
if (!i915_request_completed(rq))
return flag(rq->engine->uabi_class);
} while (--nchild);
/* All requests in array complete, not busy */
return 0; return 0;
} else {
if (!dma_fence_is_i915(fence))
return 0;
/* opencode to_request() in order to avoid const warnings */ rq = to_request(fence);
rq = container_of(fence, const struct i915_request, fence); if (i915_request_completed(rq))
if (i915_request_completed(rq)) return 0;
return 0;
/* Beware type-expansion follies! */ /* Beware type-expansion follies! */
BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
return flag(rq->engine->uabi_class); return flag(rq->engine->uabi_class);
}
} }
static __always_inline unsigned int static __always_inline unsigned int
busy_check_reader(const struct dma_fence *fence) busy_check_reader(struct dma_fence *fence)
{ {
return __busy_set_if_active(fence, __busy_read_flag); return __busy_set_if_active(fence, __busy_read_flag);
} }
static __always_inline unsigned int static __always_inline unsigned int
busy_check_writer(const struct dma_fence *fence) busy_check_writer(struct dma_fence *fence)
{ {
if (!fence) if (!fence)
return 0; return 0;
......
...@@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) ...@@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
return 0; return 0;
} }
static int
set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
void *data)
{
struct i915_context_engines_parallel_submit __user *ext =
container_of_user(base, typeof(*ext), base);
const struct set_proto_ctx_engines *set = data;
struct drm_i915_private *i915 = set->i915;
u64 flags;
int err = 0, n, i, j;
u16 slot, width, num_siblings;
struct intel_engine_cs **siblings = NULL;
intel_engine_mask_t prev_mask;
/* FIXME: This is NIY for execlists */
if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
return -ENODEV;
if (get_user(slot, &ext->engine_index))
return -EFAULT;
if (get_user(width, &ext->width))
return -EFAULT;
if (get_user(num_siblings, &ext->num_siblings))
return -EFAULT;
if (slot >= set->num_engines) {
drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
slot, set->num_engines);
return -EINVAL;
}
if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
drm_dbg(&i915->drm,
"Invalid placement[%d], already occupied\n", slot);
return -EINVAL;
}
if (get_user(flags, &ext->flags))
return -EFAULT;
if (flags) {
drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
return -EINVAL;
}
for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
err = check_user_mbz(&ext->mbz64[n]);
if (err)
return err;
}
if (width < 2) {
drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
return -EINVAL;
}
if (num_siblings < 1) {
drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
num_siblings);
return -EINVAL;
}
siblings = kmalloc_array(num_siblings * width,
sizeof(*siblings),
GFP_KERNEL);
if (!siblings)
return -ENOMEM;
/* Create contexts / engines */
for (i = 0; i < width; ++i) {
intel_engine_mask_t current_mask = 0;
struct i915_engine_class_instance prev_engine;
for (j = 0; j < num_siblings; ++j) {
struct i915_engine_class_instance ci;
n = i * num_siblings + j;
if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
err = -EFAULT;
goto out_err;
}
siblings[n] =
intel_engine_lookup_user(i915, ci.engine_class,
ci.engine_instance);
if (!siblings[n]) {
drm_dbg(&i915->drm,
"Invalid sibling[%d]: { class:%d, inst:%d }\n",
n, ci.engine_class, ci.engine_instance);
err = -EINVAL;
goto out_err;
}
if (n) {
if (prev_engine.engine_class !=
ci.engine_class) {
drm_dbg(&i915->drm,
"Mismatched class %d, %d\n",
prev_engine.engine_class,
ci.engine_class);
err = -EINVAL;
goto out_err;
}
}
prev_engine = ci;
current_mask |= siblings[n]->logical_mask;
}
if (i > 0) {
if (current_mask != prev_mask << 1) {
drm_dbg(&i915->drm,
"Non contiguous logical mask 0x%x, 0x%x\n",
prev_mask, current_mask);
err = -EINVAL;
goto out_err;
}
}
prev_mask = current_mask;
}
set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
set->engines[slot].num_siblings = num_siblings;
set->engines[slot].width = width;
set->engines[slot].siblings = siblings;
return 0;
out_err:
kfree(siblings);
return err;
}
static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
set_proto_ctx_engines_parallel_submit,
}; };
static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
...@@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce, ...@@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce,
GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
RCU_INIT_POINTER(ce->gem_context, ctx); RCU_INIT_POINTER(ce->gem_context, ctx);
GEM_BUG_ON(intel_context_is_pinned(ce));
ce->ring_size = SZ_16K; ce->ring_size = SZ_16K;
i915_vm_put(ce->vm); i915_vm_put(ce->vm);
...@@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce, ...@@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce,
return ret; return ret;
} }
static void __unpin_engines(struct i915_gem_engines *e, unsigned int count)
{
while (count--) {
struct intel_context *ce = e->engines[count], *child;
if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags))
continue;
for_each_child(ce, child)
intel_context_unpin(child);
intel_context_unpin(ce);
}
}
static void unpin_engines(struct i915_gem_engines *e)
{
__unpin_engines(e, e->num_engines);
}
static void __free_engines(struct i915_gem_engines *e, unsigned int count) static void __free_engines(struct i915_gem_engines *e, unsigned int count)
{ {
while (count--) { while (count--) {
...@@ -933,6 +1091,40 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, ...@@ -933,6 +1091,40 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
return err; return err;
} }
static int perma_pin_contexts(struct intel_context *ce)
{
struct intel_context *child;
int i = 0, j = 0, ret;
GEM_BUG_ON(!intel_context_is_parent(ce));
ret = intel_context_pin(ce);
if (unlikely(ret))
return ret;
for_each_child(ce, child) {
ret = intel_context_pin(child);
if (unlikely(ret))
goto unwind;
++i;
}
set_bit(CONTEXT_PERMA_PIN, &ce->flags);
return 0;
unwind:
intel_context_unpin(ce);
for_each_child(ce, child) {
if (j++ < i)
intel_context_unpin(child);
else
break;
}
return ret;
}
static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
unsigned int num_engines, unsigned int num_engines,
struct i915_gem_proto_engine *pe) struct i915_gem_proto_engine *pe)
...@@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, ...@@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
e->num_engines = num_engines; e->num_engines = num_engines;
for (n = 0; n < num_engines; n++) { for (n = 0; n < num_engines; n++) {
struct intel_context *ce; struct intel_context *ce, *child;
int ret; int ret;
switch (pe[n].type) { switch (pe[n].type) {
...@@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, ...@@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
case I915_GEM_ENGINE_TYPE_BALANCED: case I915_GEM_ENGINE_TYPE_BALANCED:
ce = intel_engine_create_virtual(pe[n].siblings, ce = intel_engine_create_virtual(pe[n].siblings,
pe[n].num_siblings); pe[n].num_siblings, 0);
break;
case I915_GEM_ENGINE_TYPE_PARALLEL:
ce = intel_engine_create_parallel(pe[n].siblings,
pe[n].num_siblings,
pe[n].width);
break; break;
case I915_GEM_ENGINE_TYPE_INVALID: case I915_GEM_ENGINE_TYPE_INVALID:
...@@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, ...@@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
err = ERR_PTR(ret); err = ERR_PTR(ret);
goto free_engines; goto free_engines;
} }
for_each_child(ce, child) {
ret = intel_context_set_gem(child, ctx, pe->sseu);
if (ret) {
err = ERR_PTR(ret);
goto free_engines;
}
}
/*
* XXX: Must be done after calling intel_context_set_gem as that
* function changes the ring size. The ring is allocated when
* the context is pinned. If the ring size is changed after
* allocation we have a mismatch of the ring size and will cause
* the context to hang. Presumably with a bit of reordering we
* could move the perma-pin step to the backend function
* intel_engine_create_parallel.
*/
if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) {
ret = perma_pin_contexts(ce);
if (ret) {
err = ERR_PTR(ret);
goto free_engines;
}
}
} }
return e; return e;
...@@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx) ...@@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx)
/* Flush any concurrent set_engines() */ /* Flush any concurrent set_engines() */
mutex_lock(&ctx->engines_mutex); mutex_lock(&ctx->engines_mutex);
unpin_engines(__context_engines_static(ctx));
engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1)); engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
i915_gem_context_set_closed(ctx); i915_gem_context_set_closed(ctx);
mutex_unlock(&ctx->engines_mutex); mutex_unlock(&ctx->engines_mutex);
......
...@@ -78,13 +78,16 @@ enum i915_gem_engine_type { ...@@ -78,13 +78,16 @@ enum i915_gem_engine_type {
/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
I915_GEM_ENGINE_TYPE_BALANCED, I915_GEM_ENGINE_TYPE_BALANCED,
/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
I915_GEM_ENGINE_TYPE_PARALLEL,
}; };
/** /**
* struct i915_gem_proto_engine - prototype engine * struct i915_gem_proto_engine - prototype engine
* *
* This struct describes an engine that a context may contain. Engines * This struct describes an engine that a context may contain. Engines
* have three types: * have four types:
* *
* - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
* show up as a NULL in i915_gem_engines::engines[i] and any attempt to * show up as a NULL in i915_gem_engines::engines[i] and any attempt to
...@@ -97,6 +100,10 @@ enum i915_gem_engine_type { ...@@ -97,6 +100,10 @@ enum i915_gem_engine_type {
* *
* - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
* i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
*
* - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
* i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
* i915_gem_proto_engine::siblings.
*/ */
struct i915_gem_proto_engine { struct i915_gem_proto_engine {
/** @type: Type of this engine */ /** @type: Type of this engine */
...@@ -105,10 +112,13 @@ struct i915_gem_proto_engine { ...@@ -105,10 +112,13 @@ struct i915_gem_proto_engine {
/** @engine: Engine, for physical */ /** @engine: Engine, for physical */
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
/** @num_siblings: Number of balanced siblings */ /** @num_siblings: Number of balanced or parallel siblings */
unsigned int num_siblings; unsigned int num_siblings;
/** @siblings: Balanced siblings */ /** @width: Width of each sibling */
unsigned int width;
/** @siblings: Balanced siblings or num_siblings * width for parallel */
struct intel_engine_cs **siblings; struct intel_engine_cs **siblings;
/** @sseu: Client-set SSEU parameters */ /** @sseu: Client-set SSEU parameters */
......
...@@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) ...@@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{ {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct sg_table *pages; struct sg_table *pages;
unsigned int sg_page_sizes; unsigned int sg_page_sizes;
...@@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) ...@@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
if (IS_ERR(pages)) if (IS_ERR(pages))
return PTR_ERR(pages); return PTR_ERR(pages);
sg_page_sizes = i915_sg_dma_sizes(pages->sgl); /* XXX: consider doing a vmap flush or something */
if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
wbinvd_on_all_cpus();
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
__i915_gem_object_set_pages(obj, pages, sg_page_sizes); __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
return 0; return 0;
...@@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, ...@@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
} }
drm_gem_private_object_init(dev, &obj->base, dma_buf->size); drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0); i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class,
I915_BO_ALLOC_USER);
obj->base.import_attach = attach; obj->base.import_attach = attach;
obj->base.resv = dma_buf->resv; obj->base.resv = dma_buf->resv;
......
...@@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, ...@@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
internal_free_pages(pages); internal_free_pages(pages);
obj->mm.dirty = false; obj->mm.dirty = false;
__start_cpu_write(obj);
} }
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
......
...@@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, ...@@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
} }
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
/*
* This is purely from a security perspective, so we simply don't care
* about non-userspace objects being able to bypass the LLC.
*/
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
/*
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
* since the heavy flush we apply when first gathering the pages is
* skipped if the kernel thinks the object is coherent with the GPU. As
* a result it might be possible to bypass the cache and read the
* contents of the page directly, which could be stale data. If it's
* just a case of userspace shooting themselves in the foot then so be
* it, but since i915 takes the stance of always zeroing memory before
* handing it to userspace, we need to prevent this.
*/
return IS_JSL_EHL(i915);
}
static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
{ {
struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_gem_object *obj = to_intel_bo(gem);
......
...@@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) ...@@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level); unsigned int cache_level);
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj); void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
......
...@@ -427,6 +427,33 @@ struct drm_i915_gem_object { ...@@ -427,6 +427,33 @@ struct drm_i915_gem_object {
* can freely bypass the CPU cache when touching the pages with the GPU, * can freely bypass the CPU cache when touching the pages with the GPU,
* where the kernel is completely unaware. On such platform we need * where the kernel is completely unaware. On such platform we need
* apply the sledgehammer-on-acquire regardless of the @cache_coherent. * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
*
* Special care is taken on non-LLC platforms, to prevent potential
* information leak. The driver currently ensures:
*
* 1. All userspace objects, by default, have @cache_level set as
* I915_CACHE_NONE. The only exception is userptr objects, where we
* instead force I915_CACHE_LLC, but we also don't allow userspace to
* ever change the @cache_level for such objects. Another special case
* is dma-buf, which doesn't rely on @cache_dirty, but there we
* always do a forced flush when acquiring the pages, if there is a
* chance that the pages can be read directly from main memory with
* the GPU.
*
* 2. All I915_CACHE_NONE objects have @cache_dirty initially true.
*
* 3. All swapped-out objects(i.e shmem) have @cache_dirty set to
* true.
*
* 4. The @cache_dirty is never freely reset before the initial
* flush, even if userspace adjusts the @cache_level through the
* i915_gem_set_caching_ioctl.
*
* 5. All @cache_dirty objects(including swapped-in) are initially
* flushed with a synchronous call to drm_clflush_sg in
* __i915_gem_object_set_pages. The @cache_dirty can be freely reset
* at this point. All further asynchronous clfushes are never security
* critical, i.e userspace is free to race against itself.
*/ */
unsigned int cache_dirty:1; unsigned int cache_dirty:1;
......
...@@ -182,22 +182,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) ...@@ -182,22 +182,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
if (i915_gem_object_needs_bit17_swizzle(obj)) if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st); i915_gem_object_do_bit_17_swizzle(obj, st);
/* if (i915_gem_object_can_bypass_llc(obj))
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
* possible for userspace to bypass the GTT caching bits set by the
* kernel, as per the given object cache_level. This is troublesome
* since the heavy flush we apply when first gathering the pages is
* skipped if the kernel thinks the object is coherent with the GPU. As
* a result it might be possible to bypass the cache and read the
* contents of the page directly, which could be stale data. If it's
* just a case of userspace shooting themselves in the foot then so be
* it, but since i915 takes the stance of always zeroing memory before
* handing it to userspace, we need to prevent this.
*
* By setting cache_dirty here we make the clflush in set_pages
* unconditional on such platforms.
*/
if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
obj->cache_dirty = true; obj->cache_dirty = true;
__i915_gem_object_set_pages(obj, st, sg_page_sizes); __i915_gem_object_set_pages(obj, st, sg_page_sizes);
...@@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, ...@@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages, struct sg_table *pages,
bool needs_clflush) bool needs_clflush)
{ {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
if (obj->mm.madv == I915_MADV_DONTNEED) if (obj->mm.madv == I915_MADV_DONTNEED)
...@@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, ...@@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
drm_clflush_sg(pages); drm_clflush_sg(pages);
__start_cpu_write(obj); __start_cpu_write(obj);
/*
* On non-LLC platforms, force the flush-on-acquire if this is ever
* swapped-in. Our async flush path is not trust worthy enough yet(and
* happens in the wrong order), and with some tricks it's conceivable
* for userspace to change the cache-level to I915_CACHE_NONE after the
* pages are swapped-in, and since execbuf binds the object before doing
* the async flush, we have a race window.
*/
if (!HAS_LLC(i915))
obj->cache_dirty = true;
} }
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages) void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
......
...@@ -165,8 +165,11 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) ...@@ -165,8 +165,11 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
goto err; goto err;
} }
sg_page_sizes = i915_sg_dma_sizes(st->sgl); WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE));
if (i915_gem_object_can_bypass_llc(obj))
obj->cache_dirty = true;
sg_page_sizes = i915_sg_dma_sizes(st->sgl);
__i915_gem_object_set_pages(obj, st, sg_page_sizes); __i915_gem_object_set_pages(obj, st, sg_page_sizes);
return 0; return 0;
...@@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, ...@@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
return -ENOMEM; return -ENOMEM;
drm_gem_private_object_init(dev, &obj->base, args->user_size); drm_gem_private_object_init(dev, &obj->base, args->user_size);
i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
I915_BO_ALLOC_USER);
obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
obj->read_domains = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->write_domain = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU;
......
...@@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, ...@@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
huge_pages_free_pages(pages); huge_pages_free_pages(pages);
obj->mm.dirty = false; obj->mm.dirty = false;
__start_cpu_write(obj);
} }
static const struct drm_i915_gem_object_ops huge_page_ops = { static const struct drm_i915_gem_object_ops huge_page_ops = {
...@@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915, ...@@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915,
{ {
static struct lock_class_key lock_class; static struct lock_class_key lock_class;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
unsigned int cache_level;
GEM_BUG_ON(!size); GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
...@@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915, ...@@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915,
obj->write_domain = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_NONE;
cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->mm.page_mask = page_mask; obj->mm.page_mask = page_mask;
......
...@@ -17,13 +17,20 @@ ...@@ -17,13 +17,20 @@
#include "huge_gem_object.h" #include "huge_gem_object.h"
#include "mock_context.h" #include "mock_context.h"
enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
CLIENT_NUM_TILING_TYPES
};
#define WIDTH 512 #define WIDTH 512
#define HEIGHT 32 #define HEIGHT 32
struct blit_buffer { struct blit_buffer {
struct i915_vma *vma; struct i915_vma *vma;
u32 start_val; u32 start_val;
u32 tiling; enum client_tiling tiling;
}; };
struct tiled_blits { struct tiled_blits {
...@@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t, ...@@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t,
*cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(BCS_SWCTRL); *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
if (src->tiling == I915_TILING_Y) if (src->tiling == CLIENT_TILING_Y)
cmd |= BCS_SRC_Y; cmd |= BCS_SRC_Y;
if (dst->tiling == I915_TILING_Y) if (dst->tiling == CLIENT_TILING_Y)
cmd |= BCS_DST_Y; cmd |= BCS_DST_Y;
*cs++ = cmd; *cs++ = cmd;
...@@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t, ...@@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t,
t->buffers[i].vma = vma; t->buffers[i].vma = vma;
t->buffers[i].tiling = t->buffers[i].tiling =
i915_prandom_u32_max_state(I915_TILING_Y + 1, prng); i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
} }
return 0; return 0;
...@@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset) ...@@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset)
static u64 tiled_offset(const struct intel_gt *gt, static u64 tiled_offset(const struct intel_gt *gt,
u64 v, u64 v,
unsigned int stride, unsigned int stride,
unsigned int tiling) enum client_tiling tiling)
{ {
unsigned int swizzle; unsigned int swizzle;
u64 x, y; u64 x, y;
if (tiling == I915_TILING_NONE) if (tiling == CLIENT_TILING_LINEAR)
return v; return v;
y = div64_u64_rem(v, stride, &x); y = div64_u64_rem(v, stride, &x);
if (tiling == I915_TILING_X) { if (tiling == CLIENT_TILING_X) {
v = div64_u64_rem(y, 8, &y) * stride * 8; v = div64_u64_rem(y, 8, &y) * stride * 8;
v += y * 512; v += y * 512;
v += div64_u64_rem(x, 512, &x) << 12; v += div64_u64_rem(x, 512, &x) << 12;
...@@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt, ...@@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt,
return v; return v;
} }
static const char *repr_tiling(int tiling) static const char *repr_tiling(enum client_tiling tiling)
{ {
switch (tiling) { switch (tiling) {
case I915_TILING_NONE: return "linear"; case CLIENT_TILING_LINEAR: return "linear";
case I915_TILING_X: return "X"; case CLIENT_TILING_X: return "X";
case I915_TILING_Y: return "Y"; case CLIENT_TILING_Y: return "Y";
default: return "unknown"; default: return "unknown";
} }
} }
......
...@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, ...@@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err) if (err)
goto err_post_unpin; goto err_post_unpin;
intel_engine_pm_might_get(ce->engine);
if (unlikely(intel_context_is_closed(ce))) { if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT; err = -ENOENT;
goto err_unlock; goto err_unlock;
...@@ -362,8 +364,8 @@ static int __intel_context_active(struct i915_active *active) ...@@ -362,8 +364,8 @@ static int __intel_context_active(struct i915_active *active)
return 0; return 0;
} }
static int sw_fence_dummy_notify(struct i915_sw_fence *sf, static int __i915_sw_fence_call
enum i915_sw_fence_notify state) sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state)
{ {
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -399,6 +401,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ...@@ -399,6 +401,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID; ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link); INIT_LIST_HEAD(&ce->guc_id.link);
INIT_LIST_HEAD(&ce->destroyed_link);
INIT_LIST_HEAD(&ce->parallel.child_list);
/* /*
* Initialize fence to be complete as this is expected to be complete * Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding. * unless there is a pending schedule disable outstanding.
...@@ -413,10 +419,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ...@@ -413,10 +419,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
void intel_context_fini(struct intel_context *ce) void intel_context_fini(struct intel_context *ce)
{ {
struct intel_context *child, *next;
if (ce->timeline) if (ce->timeline)
intel_timeline_put(ce->timeline); intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm); i915_vm_put(ce->vm);
/* Need to put the creation ref for the children */
if (intel_context_is_parent(ce))
for_each_child_safe(ce, child, next)
intel_context_put(child);
mutex_destroy(&ce->pin_mutex); mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active); i915_active_fini(&ce->active);
i915_sw_fence_fini(&ce->guc_state.blocked); i915_sw_fence_fini(&ce->guc_state.blocked);
...@@ -515,24 +528,53 @@ struct i915_request *intel_context_create_request(struct intel_context *ce) ...@@ -515,24 +528,53 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
struct i915_request *intel_context_find_active_request(struct intel_context *ce) struct i915_request *intel_context_find_active_request(struct intel_context *ce)
{ {
struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL; struct i915_request *rq, *active = NULL;
unsigned long flags; unsigned long flags;
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
spin_lock_irqsave(&ce->guc_state.lock, flags); /*
list_for_each_entry_reverse(rq, &ce->guc_state.requests, * We search the parent list to find an active request on the submitted
* context. The parent list contains the requests for all the contexts
* in the relationship so we have to do a compare of each request's
* context.
*/
spin_lock_irqsave(&parent->guc_state.lock, flags);
list_for_each_entry_reverse(rq, &parent->guc_state.requests,
sched.link) { sched.link) {
if (rq->context != ce)
continue;
if (i915_request_completed(rq)) if (i915_request_completed(rq))
break; break;
active = rq; active = rq;
} }
spin_unlock_irqrestore(&ce->guc_state.lock, flags); spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active; return active;
} }
void intel_context_bind_parent_child(struct intel_context *parent,
struct intel_context *child)
{
/*
* Callers responsibility to validate that this function is used
* correctly but we use GEM_BUG_ON here ensure that they do.
*/
GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
GEM_BUG_ON(intel_context_is_pinned(parent));
GEM_BUG_ON(intel_context_is_child(parent));
GEM_BUG_ON(intel_context_is_pinned(child));
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
parent->parallel.child_index = parent->parallel.number_children++;
list_add_tail(&child->parallel.child_link,
&parent->parallel.child_list);
child->parallel.parent = parent;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c" #include "selftest_context.c"
#endif #endif
...@@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce); ...@@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce);
int intel_context_reconfigure_sseu(struct intel_context *ce, int intel_context_reconfigure_sseu(struct intel_context *ce,
const struct intel_sseu sseu); const struct intel_sseu sseu);
#define PARENT_SCRATCH_SIZE PAGE_SIZE
static inline bool intel_context_is_child(struct intel_context *ce)
{
return !!ce->parallel.parent;
}
static inline bool intel_context_is_parent(struct intel_context *ce)
{
return !!ce->parallel.number_children;
}
static inline bool intel_context_is_pinned(struct intel_context *ce);
static inline struct intel_context *
intel_context_to_parent(struct intel_context *ce)
{
if (intel_context_is_child(ce)) {
/*
* The parent holds ref count to the child so it is always safe
* for the parent to access the child, but the child has a
* pointer to the parent without a ref. To ensure this is safe
* the child should only access the parent pointer while the
* parent is pinned.
*/
GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
return ce->parallel.parent;
} else {
return ce;
}
}
static inline bool intel_context_is_parallel(struct intel_context *ce)
{
return intel_context_is_child(ce) || intel_context_is_parent(ce);
}
void intel_context_bind_parent_child(struct intel_context *parent,
struct intel_context *child);
#define for_each_child(parent, ce)\
list_for_each_entry(ce, &(parent)->parallel.child_list,\
parallel.child_link)
#define for_each_child_safe(parent, ce, cn)\
list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\
parallel.child_link)
/** /**
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
* @ce - the context * @ce - the context
...@@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce) ...@@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
int err; int err;
err = mutex_lock_interruptible(&tl->mutex); if (intel_context_is_parent(ce))
err = mutex_lock_interruptible_nested(&tl->mutex, 0);
else if (intel_context_is_child(ce))
err = mutex_lock_interruptible_nested(&tl->mutex,
ce->parallel.child_index + 1);
else
err = mutex_lock_interruptible(&tl->mutex);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
......
...@@ -55,9 +55,13 @@ struct intel_context_ops { ...@@ -55,9 +55,13 @@ struct intel_context_ops {
void (*reset)(struct intel_context *ce); void (*reset)(struct intel_context *ce);
void (*destroy)(struct kref *kref); void (*destroy)(struct kref *kref);
/* virtual engine/context interface */ /* virtual/parallel engine/context interface */
struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
unsigned int count); unsigned int count,
unsigned long flags);
struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
unsigned int num_siblings,
unsigned int width);
struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
unsigned int sibling); unsigned int sibling);
}; };
...@@ -113,6 +117,7 @@ struct intel_context { ...@@ -113,6 +117,7 @@ struct intel_context {
#define CONTEXT_NOPREEMPT 8 #define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9 #define CONTEXT_LRCA_DIRTY 9
#define CONTEXT_GUC_INIT 10 #define CONTEXT_GUC_INIT 10
#define CONTEXT_PERMA_PIN 11
struct { struct {
u64 timeout_us; u64 timeout_us;
...@@ -197,22 +202,80 @@ struct intel_context { ...@@ -197,22 +202,80 @@ struct intel_context {
struct { struct {
/** /**
* @id: handle which is used to uniquely identify this context * @id: handle which is used to uniquely identify this context
* with the GuC, protected by guc->contexts_lock * with the GuC, protected by guc->submission_state.lock
*/ */
u16 id; u16 id;
/** /**
* @ref: the number of references to the guc_id, when * @ref: the number of references to the guc_id, when
* transitioning in and out of zero protected by * transitioning in and out of zero protected by
* guc->contexts_lock * guc->submission_state.lock
*/ */
atomic_t ref; atomic_t ref;
/** /**
* @link: in guc->guc_id_list when the guc_id has no refs but is * @link: in guc->guc_id_list when the guc_id has no refs but is
* still valid, protected by guc->contexts_lock * still valid, protected by guc->submission_state.lock
*/ */
struct list_head link; struct list_head link;
} guc_id; } guc_id;
/**
* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
* list when context is pending to be destroyed (deregistered with the
* GuC), protected by guc->submission_state.lock
*/
struct list_head destroyed_link;
/** @parallel: sub-structure for parallel submission members */
struct {
union {
/**
* @child_list: parent's list of children
* contexts, no protection as immutable after context
* creation
*/
struct list_head child_list;
/**
* @child_link: child's link into parent's list of
* children
*/
struct list_head child_link;
};
/** @parent: pointer to parent if child */
struct intel_context *parent;
/**
* @last_rq: last request submitted on a parallel context, used
* to insert submit fences between requests in the parallel
* context
*/
struct i915_request *last_rq;
/**
* @fence_context: fence context composite fence when doing
* parallel submission
*/
u64 fence_context;
/**
* @seqno: seqno for composite fence when doing parallel
* submission
*/
u32 seqno;
/** @number_children: number of children if parent */
u8 number_children;
/** @child_index: index into child_list if child */
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
/** @wqi_head: head pointer in work queue */
u16 wqi_head;
/** @wqi_tail: tail pointer in work queue */
u16 wqi_tail;
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
*/
u8 parent_page;
} guc;
} parallel;
#ifdef CONFIG_DRM_I915_SELFTEST #ifdef CONFIG_DRM_I915_SELFTEST
/** /**
* @drop_schedule_enable: Force drop of schedule enable G2H for selftest * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#ifndef _INTEL_RINGBUFFER_H_ #ifndef _INTEL_RINGBUFFER_H_
#define _INTEL_RINGBUFFER_H_ #define _INTEL_RINGBUFFER_H_
#include <asm/cacheflush.h>
#include <drm/drm_util.h> #include <drm/drm_util.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
...@@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) ...@@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
return intel_engine_has_preemption(engine); return intel_engine_has_preemption(engine);
} }
#define FORCE_VIRTUAL BIT(0)
struct intel_context * struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings, intel_engine_create_virtual(struct intel_engine_cs **siblings,
unsigned int count); unsigned int count, unsigned long flags);
static inline struct intel_context *
intel_engine_create_parallel(struct intel_engine_cs **engines,
unsigned int num_engines,
unsigned int width)
{
GEM_BUG_ON(!engines[0]->cops->create_parallel);
return engines[0]->cops->create_parallel(engines, num_engines, width);
}
static inline bool static inline bool
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
......
...@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) ...@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
GEM_DEBUG_WARN_ON(iir); GEM_DEBUG_WARN_ON(iir);
} }
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
u8 logical_instance)
{ {
const struct engine_info *info = &intel_engines[id]; const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915; struct drm_i915_private *i915 = gt->i915;
...@@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) ...@@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->class = info->class; engine->class = info->class;
engine->instance = info->instance; engine->instance = info->instance;
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine); __sprint_engine_name(engine);
engine->props.heartbeat_interval_ms = engine->props.heartbeat_interval_ms =
...@@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) ...@@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
return info->engine_mask; return info->engine_mask;
} }
static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
u8 class, const u8 *map, u8 num_instances)
{
int i, j;
u8 current_logical_id = 0;
for (j = 0; j < num_instances; ++j) {
for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
if (!HAS_ENGINE(gt, i) ||
intel_engines[i].class != class)
continue;
if (intel_engines[i].instance == map[j]) {
logical_ids[intel_engines[i].instance] =
current_logical_id++;
break;
}
}
}
}
static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
{
int i;
u8 map[MAX_ENGINE_INSTANCE + 1];
for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
map[i] = i;
populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
}
/** /**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
* @gt: pointer to struct intel_gt * @gt: pointer to struct intel_gt
...@@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt) ...@@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915; struct drm_i915_private *i915 = gt->i915;
const unsigned int engine_mask = init_engine_mask(gt); const unsigned int engine_mask = init_engine_mask(gt);
unsigned int mask = 0; unsigned int mask = 0;
unsigned int i; unsigned int i, class;
u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
int err; int err;
drm_WARN_ON(&i915->drm, engine_mask == 0); drm_WARN_ON(&i915->drm, engine_mask == 0);
...@@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt) ...@@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
if (i915_inject_probe_failure(i915)) if (i915_inject_probe_failure(i915))
return -ENODEV; return -ENODEV;
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
if (!HAS_ENGINE(gt, i)) setup_logical_ids(gt, logical_ids, class);
continue;
err = intel_engine_setup(gt, i); for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
if (err) u8 instance = intel_engines[i].instance;
goto cleanup;
if (intel_engines[i].class != class ||
!HAS_ENGINE(gt, i))
continue;
mask |= BIT(i); err = intel_engine_setup(gt, i,
logical_ids[instance]);
if (err)
goto cleanup;
mask |= BIT(i);
}
} }
/* /*
...@@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) ...@@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
struct intel_context * struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs **siblings, intel_engine_create_virtual(struct intel_engine_cs **siblings,
unsigned int count) unsigned int count, unsigned long flags)
{ {
if (count == 0) if (count == 0)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (count == 1) if (count == 1 && !(flags & FORCE_VIRTUAL))
return intel_context_create(siblings[0]); return intel_context_create(siblings[0]);
GEM_BUG_ON(!siblings[0]->cops->create_virtual); GEM_BUG_ON(!siblings[0]->cops->create_virtual);
return siblings[0]->cops->create_virtual(siblings, count); return siblings[0]->cops->create_virtual(siblings, count, flags);
} }
struct i915_request * struct i915_request *
......
...@@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) ...@@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
unsigned long flags; unsigned long flags;
bool result = true; bool result = true;
/*
* This is execlist specific behaviour intended to ensure the GPU is
* idle by switching to a known 'safe' context. With GuC submission, the
* same idle guarantee is achieved by other means (disabling
* scheduling). Further, switching to a 'safe' context has no effect
* with GuC submission as the scheduler can just switch back again.
*
* FIXME: Move this backend scheduler specific behaviour into the
* scheduler backend.
*/
if (intel_engine_uses_guc(engine))
return true;
/* GPU is pointing to the void, as good as in the kernel context. */ /* GPU is pointing to the void, as good as in the kernel context. */
if (intel_gt_is_wedged(engine->gt)) if (intel_gt_is_wedged(engine->gt))
return true; return true;
......
...@@ -6,9 +6,11 @@ ...@@ -6,9 +6,11 @@
#ifndef INTEL_ENGINE_PM_H #ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H #define INTEL_ENGINE_PM_H
#include "i915_drv.h"
#include "i915_request.h" #include "i915_request.h"
#include "intel_engine_types.h" #include "intel_engine_types.h"
#include "intel_wakeref.h" #include "intel_wakeref.h"
#include "intel_gt_pm.h"
static inline bool static inline bool
intel_engine_pm_is_awake(const struct intel_engine_cs *engine) intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
...@@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine) ...@@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
return intel_wakeref_is_active(&engine->wakeref); return intel_wakeref_is_active(&engine->wakeref);
} }
static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
{
__intel_wakeref_get(&engine->wakeref);
}
static inline void intel_engine_pm_get(struct intel_engine_cs *engine) static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
{ {
intel_wakeref_get(&engine->wakeref); intel_wakeref_get(&engine->wakeref);
...@@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine) ...@@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
return intel_wakeref_get_if_active(&engine->wakeref); return intel_wakeref_get_if_active(&engine->wakeref);
} }
static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
{
if (!intel_engine_is_virtual(engine)) {
intel_wakeref_might_get(&engine->wakeref);
} else {
struct intel_gt *gt = engine->gt;
struct intel_engine_cs *tengine;
intel_engine_mask_t tmp, mask = engine->mask;
for_each_engine_masked(tengine, gt, mask, tmp)
intel_wakeref_might_get(&tengine->wakeref);
}
intel_gt_pm_might_get(engine->gt);
}
static inline void intel_engine_pm_put(struct intel_engine_cs *engine) static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
{ {
intel_wakeref_put(&engine->wakeref); intel_wakeref_put(&engine->wakeref);
...@@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) ...@@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref); intel_wakeref_unlock_wait(&engine->wakeref);
} }
static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
{
if (!intel_engine_is_virtual(engine)) {
intel_wakeref_might_put(&engine->wakeref);
} else {
struct intel_gt *gt = engine->gt;
struct intel_engine_cs *tengine;
intel_engine_mask_t tmp, mask = engine->mask;
for_each_engine_masked(tengine, gt, mask, tmp)
intel_wakeref_might_put(&tengine->wakeref);
}
intel_gt_pm_might_put(engine->gt);
}
static inline struct i915_request * static inline struct i915_request *
intel_engine_create_kernel_request(struct intel_engine_cs *engine) intel_engine_create_kernel_request(struct intel_engine_cs *engine)
{ {
......
...@@ -269,6 +269,13 @@ struct intel_engine_cs { ...@@ -269,6 +269,13 @@ struct intel_engine_cs {
unsigned int guc_id; unsigned int guc_id;
intel_engine_mask_t mask; intel_engine_mask_t mask;
/**
* @logical_mask: logical mask of engine, reported to user space via
* query IOCTL and used to communicate with the GuC in logical space.
* The logical instance of a physical engine can change based on product
* and fusing.
*/
intel_engine_mask_t logical_mask;
u8 class; u8 class;
u8 instance; u8 instance;
......
...@@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) ...@@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
} }
static struct intel_context * static struct intel_context *
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags);
static struct i915_request * static struct i915_request *
__active_request(const struct intel_timeline * const tl, __active_request(const struct intel_timeline * const tl,
...@@ -3784,7 +3785,8 @@ static void virtual_submit_request(struct i915_request *rq) ...@@ -3784,7 +3785,8 @@ static void virtual_submit_request(struct i915_request *rq)
} }
static struct intel_context * static struct intel_context *
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
unsigned long flags)
{ {
struct virtual_engine *ve; struct virtual_engine *ve;
unsigned int n; unsigned int n;
...@@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) ...@@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
ve->siblings[ve->num_siblings++] = sibling; ve->siblings[ve->num_siblings++] = sibling;
ve->base.mask |= sibling->mask; ve->base.mask |= sibling->mask;
ve->base.logical_mask |= sibling->logical_mask;
/* /*
* All physical engines must be compatible for their emission * All physical engines must be compatible for their emission
......
...@@ -13,6 +13,59 @@ ...@@ -13,6 +13,59 @@
#include "pxp/intel_pxp_debugfs.h" #include "pxp/intel_pxp_debugfs.h"
#include "uc/intel_uc_debugfs.h" #include "uc/intel_uc_debugfs.h"
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
{
int ret = intel_gt_terminally_wedged(gt);
switch (ret) {
case -EIO:
*val = 1;
return 0;
case 0:
*val = 0;
return 0;
default:
return ret;
}
}
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
{
/* Flush any previous reset before applying for a new one */
wait_event(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
"Manually reset engine mask to %llx", val);
return 0;
}
/*
* keep the interface clean where the first parameter
* is a 'struct intel_gt *' instead of 'void *'
*/
static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
{
return intel_gt_debugfs_reset_show(data, val);
}
static int __intel_gt_debugfs_reset_store(void *data, u64 val)
{
return intel_gt_debugfs_reset_store(data, val);
}
DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
__intel_gt_debugfs_reset_store, "%llu\n");
static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "reset", &reset_fops, NULL },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
}
void intel_gt_debugfs_register(struct intel_gt *gt) void intel_gt_debugfs_register(struct intel_gt *gt)
{ {
struct dentry *root; struct dentry *root;
...@@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt) ...@@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt)
if (IS_ERR(root)) if (IS_ERR(root))
return; return;
gt_debugfs_register(gt, root);
intel_gt_engines_debugfs_register(gt, root); intel_gt_engines_debugfs_register(gt, root);
intel_gt_pm_debugfs_register(gt, root); intel_gt_pm_debugfs_register(gt, root);
intel_sseu_debugfs_register(gt, root); intel_sseu_debugfs_register(gt, root);
......
...@@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root, ...@@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root,
const struct intel_gt_debugfs_file *files, const struct intel_gt_debugfs_file *files,
unsigned long count, void *data); unsigned long count, void *data);
/* functions that need to be accessed by the upper level non-gt interfaces */
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
#endif /* INTEL_GT_DEBUGFS_H */ #endif /* INTEL_GT_DEBUGFS_H */
...@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) ...@@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
return intel_wakeref_get_if_active(&gt->wakeref); return intel_wakeref_get_if_active(&gt->wakeref);
} }
static inline void intel_gt_pm_might_get(struct intel_gt *gt)
{
intel_wakeref_might_get(&gt->wakeref);
}
static inline void intel_gt_pm_put(struct intel_gt *gt) static inline void intel_gt_pm_put(struct intel_gt *gt)
{ {
intel_wakeref_put(&gt->wakeref); intel_wakeref_put(&gt->wakeref);
...@@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt) ...@@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(&gt->wakeref); intel_wakeref_put_async(&gt->wakeref);
} }
static inline void intel_gt_pm_might_put(struct intel_gt *gt)
{
intel_wakeref_might_put(&gt->wakeref);
}
#define with_intel_gt_pm(gt, tmp) \
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{ {
return intel_wakeref_wait_for_idle(&gt->wakeref); return intel_wakeref_wait_for_idle(&gt->wakeref);
......
...@@ -20,6 +20,46 @@ ...@@ -20,6 +20,46 @@
#include "intel_uncore.h" #include "intel_uncore.h"
#include "vlv_sideband.h" #include "vlv_sideband.h"
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
intel_gt_pm_get(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
return 0;
}
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
intel_gt_pm_put(gt);
atomic_dec(&gt->user_wakeref);
return 0;
}
static int forcewake_user_open(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
return intel_gt_pm_debugfs_forcewake_user_open(gt);
}
static int forcewake_user_release(struct inode *inode, struct file *file)
{
struct intel_gt *gt = inode->i_private;
return intel_gt_pm_debugfs_forcewake_user_release(gt);
}
static const struct file_operations forcewake_user_fops = {
.owner = THIS_MODULE,
.open = forcewake_user_open,
.release = forcewake_user_release,
};
static int fw_domains_show(struct seq_file *m, void *data) static int fw_domains_show(struct seq_file *m, void *data)
{ {
struct intel_gt *gt = m->private; struct intel_gt *gt = m->private;
...@@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) ...@@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{ "drpc", &drpc_fops, NULL }, { "drpc", &drpc_fops, NULL },
{ "frequency", &frequency_fops, NULL }, { "frequency", &frequency_fops, NULL },
{ "forcewake", &fw_domains_fops, NULL }, { "forcewake", &fw_domains_fops, NULL },
{ "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval }, { "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval }, { "rps_boost", &rps_boost_fops, rps_eval },
}; };
......
...@@ -13,4 +13,8 @@ struct drm_printer; ...@@ -13,4 +13,8 @@ struct drm_printer;
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root); void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m); void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
/* functions that need to be accessed by the upper level non-gt interfaces */
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
#endif /* INTEL_GT_PM_DEBUGFS_H */ #endif /* INTEL_GT_PM_DEBUGFS_H */
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* Copyright © 2019 Intel Corporation * Copyright © 2019 Intel Corporation
*/ */
#include <asm/tsc.h>
#include <linux/cpufreq.h> #include <linux/cpufreq.h>
#include "i915_drv.h" #include "i915_drv.h"
......
...@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) ...@@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += PAGE_SIZE; context_size += PAGE_SIZE;
} }
if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
context_size += PARENT_SCRATCH_SIZE;
}
obj = i915_gem_object_create_lmem(engine->i915, context_size, obj = i915_gem_object_create_lmem(engine->i915, context_size,
I915_BO_ALLOC_PM_VOLATILE); I915_BO_ALLOC_PM_VOLATILE);
if (IS_ERR(obj)) if (IS_ERR(obj))
......
...@@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine) ...@@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
sanitize_hwsp(engine); sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */ /* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE); drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
intel_engine_reset_pinned_contexts(engine); intel_engine_reset_pinned_contexts(engine);
} }
......
...@@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline) ...@@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline)
timeline->hwsp_map = vaddr; timeline->hwsp_map = vaddr;
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
clflush(vaddr + ofs); drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
return 0; return 0;
} }
...@@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl) ...@@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl)
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
WRITE_ONCE(*hwsp_seqno, tl->seqno); WRITE_ONCE(*hwsp_seqno, tl->seqno);
clflush(hwsp_seqno); drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
} }
void intel_timeline_enter(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl)
......
...@@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, ...@@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) { for (n = 0; n < nctx; n++) {
ve[n] = intel_engine_create_virtual(siblings, nsibling); ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve[n])) { if (IS_ERR(ve[n])) {
err = PTR_ERR(ve[n]); err = PTR_ERR(ve[n]);
nctx = n; nctx = n;
...@@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, ...@@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
* restrict it to our desired engine within the virtual engine. * restrict it to our desired engine within the virtual engine.
*/ */
ve = intel_engine_create_virtual(siblings, nsibling); ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) { if (IS_ERR(ve)) {
err = PTR_ERR(ve); err = PTR_ERR(ve);
goto out_close; goto out_close;
...@@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, ...@@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
i915_request_add(rq); i915_request_add(rq);
} }
ce = intel_engine_create_virtual(siblings, nsibling); ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) { if (IS_ERR(ce)) {
err = PTR_ERR(ce); err = PTR_ERR(ce);
goto out; goto out;
...@@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, ...@@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
/* XXX We do not handle oversubscription and fairness with normal rq */ /* XXX We do not handle oversubscription and fairness with normal rq */
for (n = 0; n < nsibling; n++) { for (n = 0; n < nsibling; n++) {
ce = intel_engine_create_virtual(siblings, nsibling); ce = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ce)) { if (IS_ERR(ce)) {
err = PTR_ERR(ce); err = PTR_ERR(ce);
goto out; goto out;
...@@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, ...@@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
if (err) if (err)
goto out_scratch; goto out_scratch;
ve = intel_engine_create_virtual(siblings, nsibling); ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) { if (IS_ERR(ve)) {
err = PTR_ERR(ve); err = PTR_ERR(ve);
goto out_scratch; goto out_scratch;
...@@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, ...@@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
if (igt_spinner_init(&spin, gt)) if (igt_spinner_init(&spin, gt))
return -ENOMEM; return -ENOMEM;
ve = intel_engine_create_virtual(siblings, nsibling); ve = intel_engine_create_virtual(siblings, nsibling, 0);
if (IS_ERR(ve)) { if (IS_ERR(ve)) {
err = PTR_ERR(ve); err = PTR_ERR(ve);
goto out_spin; goto out_spin;
......
...@@ -142,6 +142,7 @@ enum intel_guc_action { ...@@ -142,6 +142,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
INTEL_GUC_ACTION_LIMIT INTEL_GUC_ACTION_LIMIT
}; };
......
...@@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p) ...@@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
} }
} }
} }
void intel_guc_write_barrier(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
/*
* Ensure intel_uncore_write_fw can be used rather than
* intel_uncore_write.
*/
GEM_BUG_ON(guc->send_regs.fw_domains);
/*
* This register is used by the i915 and GuC for MMIO based
* communication. Once we are in this code CTBs are the only
* method the i915 uses to communicate with the GuC so it is
* safe to write to this register (a value of 0 is NOP for MMIO
* communication). If we ever start mixing CTBs and MMIOs a new
* register will have to be chosen. This function is also used
* to enforce ordering of a work queue item write and an update
* to the process descriptor. When a work queue is being used,
* CTBs are also the only mechanism of communication.
*/
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
} else {
/* wmb() sufficient for a barrier if in smem */
wmb();
}
}
...@@ -46,6 +46,15 @@ struct intel_guc { ...@@ -46,6 +46,15 @@ struct intel_guc {
* submitted until the stalled request is processed. * submitted until the stalled request is processed.
*/ */
struct i915_request *stalled_request; struct i915_request *stalled_request;
/**
* @submission_stall_reason: reason why submission is stalled
*/
enum {
STALL_NONE,
STALL_REGISTER_CONTEXT,
STALL_MOVE_LRC_TAIL,
STALL_ADD_REQUEST,
} submission_stall_reason;
/* intel_guc_recv interrupt related state */ /* intel_guc_recv interrupt related state */
/** @irq_lock: protects GuC irq state */ /** @irq_lock: protects GuC irq state */
...@@ -71,16 +80,41 @@ struct intel_guc { ...@@ -71,16 +80,41 @@ struct intel_guc {
} interrupts; } interrupts;
/** /**
* @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and * @submission_state: sub-structure for submission state protected by
* ce->guc_id.ref when transitioning in and out of zero * single lock
*/
spinlock_t contexts_lock;
/** @guc_ids: used to allocate unique ce->guc_id.id values */
struct ida guc_ids;
/**
* @guc_id_list: list of intel_context with valid guc_ids but no refs
*/ */
struct list_head guc_id_list; struct {
/**
* @lock: protects everything in submission_state,
* ce->guc_id.id, and ce->guc_id.ref when transitioning in and
* out of zero
*/
spinlock_t lock;
/**
* @guc_ids: used to allocate new guc_ids, single-lrc
*/
struct ida guc_ids;
/**
* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
/**
* @guc_id_list: list of intel_context with valid guc_ids but no
* refs
*/
struct list_head guc_id_list;
/**
* @destroyed_contexts: list of contexts waiting to be destroyed
* (deregistered with the GuC)
*/
struct list_head destroyed_contexts;
/**
* @destroyed_worker: worker to deregister contexts, need as we
* need to take a GT PM reference and can't from destroy
* function as it might be in an atomic context (no sleeping)
*/
struct work_struct destroyed_worker;
} submission_state;
/** /**
* @submission_supported: tracks whether we support GuC submission on * @submission_supported: tracks whether we support GuC submission on
...@@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc); ...@@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc);
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
void intel_guc_write_barrier(struct intel_guc *guc);
#endif #endif
...@@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt, ...@@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
for_each_engine(engine, gt, id) { for_each_engine(engine, gt, id) {
u8 guc_class = engine_class_to_guc_class(engine->class); u8 guc_class = engine_class_to_guc_class(engine->class);
system_info->mapping_table[guc_class][engine->instance] = system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
engine->instance; engine->instance;
} }
} }
......
...@@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct) ...@@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
return ++ct->requests.last_fence; return ++ct->requests.last_fence;
} }
static void write_barrier(struct intel_guc_ct *ct)
{
struct intel_guc *guc = ct_to_guc(ct);
struct intel_gt *gt = guc_to_gt(guc);
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
GEM_BUG_ON(guc->send_regs.fw_domains);
/*
* This register is used by the i915 and GuC for MMIO based
* communication. Once we are in this code CTBs are the only
* method the i915 uses to communicate with the GuC so it is
* safe to write to this register (a value of 0 is NOP for MMIO
* communication). If we ever start mixing CTBs and MMIOs a new
* register will have to be chosen.
*/
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
} else {
/* wmb() sufficient for a barrier if in smem */
wmb();
}
}
static int ct_write(struct intel_guc_ct *ct, static int ct_write(struct intel_guc_ct *ct,
const u32 *action, const u32 *action,
u32 len /* in dwords */, u32 len /* in dwords */,
...@@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct, ...@@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct,
* make sure H2G buffer update and LRC tail update (if this triggering a * make sure H2G buffer update and LRC tail update (if this triggering a
* submission) are visible before updating the descriptor tail * submission) are visible before updating the descriptor tail
*/ */
write_barrier(ct); intel_guc_write_barrier(ct_to_guc(ct));
/* update local copies */ /* update local copies */
ctb->tail = tail; ctb->tail = tail;
......
...@@ -52,27 +52,27 @@ ...@@ -52,27 +52,27 @@
#define GUC_DOORBELL_INVALID 256 #define GUC_DOORBELL_INVALID 256
#define GUC_WQ_SIZE (PAGE_SIZE * 2) /*
* Work queue item header definitions
/* Work queue item header definitions */ *
* Work queue is circular buffer used to submit complex (multi-lrc) submissions
* to the GuC. A work queue item is an entry in the circular buffer.
*/
#define WQ_STATUS_ACTIVE 1 #define WQ_STATUS_ACTIVE 1
#define WQ_STATUS_SUSPENDED 2 #define WQ_STATUS_SUSPENDED 2
#define WQ_STATUS_CMD_ERROR 3 #define WQ_STATUS_CMD_ERROR 3
#define WQ_STATUS_ENGINE_ID_NOT_USED 4 #define WQ_STATUS_ENGINE_ID_NOT_USED 4
#define WQ_STATUS_SUSPENDED_FROM_RESET 5 #define WQ_STATUS_SUSPENDED_FROM_RESET 5
#define WQ_TYPE_SHIFT 0 #define WQ_TYPE_BATCH_BUF 0x1
#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT) #define WQ_TYPE_PSEUDO 0x2
#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT) #define WQ_TYPE_INORDER 0x3
#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT) #define WQ_TYPE_NOOP 0x4
#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT) #define WQ_TYPE_MULTI_LRC 0x5
#define WQ_TARGET_SHIFT 10 #define WQ_TYPE_MASK GENMASK(7, 0)
#define WQ_LEN_SHIFT 16 #define WQ_LEN_MASK GENMASK(26, 16)
#define WQ_NO_WCFLUSH_WAIT (1 << 27)
#define WQ_PRESENT_WORKLOAD (1 << 28) #define WQ_GUC_ID_MASK GENMASK(15, 0)
#define WQ_RING_TAIL_MASK GENMASK(28, 18)
#define WQ_RING_TAIL_SHIFT 20
#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0) #define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1) #define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
...@@ -186,7 +186,7 @@ struct guc_process_desc { ...@@ -186,7 +186,7 @@ struct guc_process_desc {
u32 wq_status; u32 wq_status;
u32 engine_presence; u32 engine_presence;
u32 priority; u32 priority;
u32 reserved[30]; u32 reserved[36];
} __packed; } __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
......
This diff is collapsed.
This diff is collapsed.
...@@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915, ...@@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) { for_each_uabi_engine(engine, i915) {
info.engine.engine_class = engine->uabi_class; info.engine.engine_class = engine->uabi_class;
info.engine.engine_instance = engine->uabi_instance; info.engine.engine_instance = engine->uabi_instance;
info.flags = I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE;
info.capabilities = engine->uabi_capabilities; info.capabilities = engine->uabi_capabilities;
info.logical_instance = ilog2(engine->logical_mask);
if (copy_to_user(info_ptr, &info, sizeof(info))) if (copy_to_user(info_ptr, &info, sizeof(info)))
return -EFAULT; return -EFAULT;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) ...@@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
int __must_check __i915_vma_move_to_active(struct i915_vma *vma, int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq); struct i915_request *rq);
int __must_check i915_vma_move_to_active(struct i915_vma *vma, int __must_check _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq, struct i915_request *rq,
unsigned int flags); struct dma_fence *fence,
unsigned int flags);
static inline int __must_check
i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq,
unsigned int flags)
{
return _i915_vma_move_to_active(vma, rq, &rq->fence, flags);
}
#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) #define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter)
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment