drm/i915/guc: Connect UAPI to GuC multi-lrc interface

Introduce 'set parallel submit' extension to connect UAPI to GuC
multi-lrc interface. Kernel doc in new uAPI should explain it all.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
 (Daniel Vetter)
  - Add IGT link and placeholder for media UMD link
v3:
 (Kernel test robot)
  - Fix warning in unpin engines call
 (John Harrison)
  - Reword a bunch of the kernel doc
v4:
 (John Harrison)
  - Add comment why perma-pin is done after setting gem context
  - Update some comments / docs for proto contexts
v5:
 (John Harrison)
  - Rework perma-pin comment
  - Add BUG_IN if context is pinned when setting gem context

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211014172005.27155-17-matthew.brost@intel.com
This commit is contained in:
Matthew Brost
2021-10-14 10:19:56 -07:00
committed by John Harrison
parent d38a929449
commit e5e32171a2
9 changed files with 505 additions and 31 deletions

View File

@@ -1824,6 +1824,7 @@ struct drm_i915_gem_context_param {
* Extensions:
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
*/
#define I915_CONTEXT_PARAM_ENGINES 0xa
@@ -2098,6 +2099,135 @@ struct i915_context_engines_bond {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* struct i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915 to
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior are described below. Lastly, the default is to not allow BBs to be
* preempted mid-batch. Rather insert coordinated preemption points on all
* hardware contexts between each set of BBs. Flags could be added in the future
* to change both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Examples syntax:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
*
* Example 1 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can be thought of as two virtual engines, each containing two
* engines thereby making a 2D array. However, there are bonds tying the
* entries together and placing restrictions on how they can be scheduled.
* Specifically, the scheduler can choose only vertical columns from the 2D
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
* versa. Same for CS[2] requires also using CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
*/
struct i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine or in other words the
* number of batches in each submission
*/
__u16 width;
/**
* @num_siblings: number of siblings per context or in other words the
* number of possible placements for each submission
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __packed;
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
struct i915_user_extension base; \
__u16 engine_index; \
__u16 width; \
__u16 num_siblings; \
__u16 mbz16; \
__u64 flags; \
__u64 mbz64[3]; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* DOC: Context Engine Map uAPI
*
@@ -2157,6 +2287,7 @@ struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
struct i915_engine_class_instance engines[0];
} __attribute__((packed));