Commit a78313bb authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-gt-next-2024-06-12' of...

Merge tag 'drm-intel-gt-next-2024-06-12' of https://gitlab.freedesktop.org/drm/i915/kernel

 into drm-next

UAPI Changes:

- Support replaying GPU hangs with captured context image (Tvrtko Ursulin)

Driver Changes:

Fixes/improvements/new stuff:

- Automate CCS Mode setting during engine resets [gt] (Andi Shyti)
- Revert "drm/i915: Remove extra multi-gt pm-references" (Janusz Krzysztofik)
- Fix HAS_REGION() usage in intel_gt_probe_lmem() (Ville Syrjälä)
- Disarm breadcrumbs if engines are already idle [gt] (Chris Wilson)
- Shadow default engine context image in the context (Tvrtko Ursulin)
- Support replaying GPU hangs with captured context image (Tvrtko Ursulin)
- avoid FIELD_PREP warning [guc] (Arnd Bergmann)
- Fix CCS id's calculation for CCS mode setting [gt] (Andi Shyti)
- Increase FLR timeout from 3s to 9s (Andi Shyti)
- Update workaround 14018575942 [mtl] (Angus Chen)

Future platform enablement:

- Enable w/a 16021333562 for DG2, MTL and ARL [guc] (John Harrison)

Miscellaneous:

- Pass the region ID rather than a bitmask to HAS_REGION() (Ville Syrjälä)
- Remove counter productive REGION_* wrappers (Ville Syrjälä)
- Fix typo [gem/i915_gem_ttm_move] (Deming Wang)
- Delete the live_hearbeat_fast selftest [gt] (Krzysztof Niemiec)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Zmmazub+U9ewH9ts@linux
parents 365aa9f5 79655e86
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -16,6 +16,23 @@ config DRM_I915_WERROR

	  If in doubt, say "N".

config DRM_I915_REPLAY_GPU_HANGS_API
	bool "Enable GPU hang replay userspace API"
	depends on DRM_I915
	depends on EXPERT
	default n
	help
	  Choose this option if you want to enable special and unstable
	  userspace API used for replaying GPU hangs on a running system.

	  This API is intended to be used by userspace graphics stack developers
	  and provides no stability guarantees.

	  The API needs to be activated at boot time using the
	  enable_debug_only_api module parameter.

	  If in doubt, say "N".

config DRM_I915_DEBUG
	bool "Enable additional driver debugging"
	depends on DRM_I915
+113 −0
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@
#include "gt/intel_engine_user.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h"
#include "gt/shmem_utils.h"

#include "pxp/intel_pxp.h"

@@ -957,6 +958,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
	case I915_CONTEXT_PARAM_BAN_PERIOD:
	case I915_CONTEXT_PARAM_RINGSIZE:
	case I915_CONTEXT_PARAM_CONTEXT_IMAGE:
	default:
		ret = -EINVAL;
		break;
@@ -2104,6 +2106,95 @@ static int get_protected(struct i915_gem_context *ctx,
	return 0;
}

static int set_context_image(struct i915_gem_context *ctx,
			     struct drm_i915_gem_context_param *args)
{
	struct i915_gem_context_param_context_image user;
	struct intel_context *ce;
	struct file *shmem_state;
	unsigned long lookup;
	void *state;
	int ret = 0;

	if (!IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API))
		return -EINVAL;

	if (!ctx->i915->params.enable_debug_only_api)
		return -EINVAL;

	if (args->size < sizeof(user))
		return -EINVAL;

	if (copy_from_user(&user, u64_to_user_ptr(args->value), sizeof(user)))
		return -EFAULT;

	if (user.mbz)
		return -EINVAL;

	if (user.flags & ~(I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX))
		return -EINVAL;

	lookup = 0;
	if (user.flags & I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX)
		lookup |= LOOKUP_USER_INDEX;

	ce = lookup_user_engine(ctx, lookup, &user.engine);
	if (IS_ERR(ce))
		return PTR_ERR(ce);

	if (user.size < ce->engine->context_size) {
		ret = -EINVAL;
		goto out_ce;
	}

	if (drm_WARN_ON_ONCE(&ctx->i915->drm,
			     test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
		/*
		 * This is racy but for a debug only API, if userspace is keen
		 * to create and configure contexts, while simultaneously using
		 * them from a second thread, let them suffer by potentially not
		 * executing with the context image they just raced to apply.
		 */
		ret = -EBUSY;
		goto out_ce;
	}

	state = kmalloc(ce->engine->context_size, GFP_KERNEL);
	if (!state) {
		ret = -ENOMEM;
		goto out_ce;
	}

	if (copy_from_user(state, u64_to_user_ptr(user.image),
			   ce->engine->context_size)) {
		ret = -EFAULT;
		goto out_state;
	}

	shmem_state = shmem_create_from_data(ce->engine->name,
					     state, ce->engine->context_size);
	if (IS_ERR(shmem_state)) {
		ret = PTR_ERR(shmem_state);
		goto out_state;
	}

	if (intel_context_set_own_state(ce)) {
		ret = -EBUSY;
		fput(shmem_state);
		goto out_state;
	}

	ce->default_state = shmem_state;

	args->size = sizeof(user);

out_state:
	kfree(state);
out_ce:
	intel_context_put(ce);
	return ret;
}

static int ctx_setparam(struct drm_i915_file_private *fpriv,
			struct i915_gem_context *ctx,
			struct drm_i915_gem_context_param *args)
@@ -2156,6 +2247,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
		ret = set_persistence(ctx, args);
		break;

	case I915_CONTEXT_PARAM_CONTEXT_IMAGE:
		ret = set_context_image(ctx, args);
		break;

	case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
	case I915_CONTEXT_PARAM_BAN_PERIOD:
@@ -2500,6 +2595,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
	case I915_CONTEXT_PARAM_BAN_PERIOD:
	case I915_CONTEXT_PARAM_ENGINES:
	case I915_CONTEXT_PARAM_RINGSIZE:
	case I915_CONTEXT_PARAM_CONTEXT_IMAGE:
	default:
		ret = -EINVAL;
		break;
@@ -2612,5 +2708,22 @@ int __init i915_gem_context_module_init(void)
	if (!slab_luts)
		return -ENOMEM;

	if (IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) {
		pr_notice("**************************************************************\n");
		pr_notice("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE     **\n");
		pr_notice("**                                                          **\n");
		if (i915_modparams.enable_debug_only_api)
			pr_notice("** i915.enable_debug_only_api is intended to be set         **\n");
		else
			pr_notice("** CONFIG_DRM_I915_REPLAY_GPU_HANGS_API builds are intended **\n");
		pr_notice("** for specific userspace graphics stack developers only!   **\n");
		pr_notice("**                                                          **\n");
		pr_notice("** If you are seeing this message please report this to the **\n");
		pr_notice("** provider of your kernel build.                           **\n");
		pr_notice("**                                                          **\n");
		pr_notice("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE     **\n");
		pr_notice("**************************************************************\n");
	}

	return 0;
}
+1 −1
Original line number Diff line number Diff line
@@ -155,7 +155,7 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
 * @bo: The ttm buffer object.
 *
 * This function prepares an object for move by removing all GPU bindings,
 * removing all CPU mapings and finally releasing the pages sg-table.
 * removing all CPU mappings and finally releasing the pages sg-table.
 *
 * Return: 0 if successful, negative error code on error.
 */
+2 −0
Original line number Diff line number Diff line
@@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu)
	struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);

	trace_intel_context_free(ce);
	if (intel_context_has_own_state(ce))
		fput(ce->default_state);
	kmem_cache_free(slab_ce, ce);
}

+22 −0
Original line number Diff line number Diff line
@@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce)
	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}

#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)
static inline bool intel_context_has_own_state(const struct intel_context *ce)
{
	return test_bit(CONTEXT_OWN_STATE, &ce->flags);
}

static inline bool intel_context_set_own_state(struct intel_context *ce)
{
	return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags);
}
#else
static inline bool intel_context_has_own_state(const struct intel_context *ce)
{
	return false;
}

static inline bool intel_context_set_own_state(struct intel_context *ce)
{
	return true;
}
#endif

u64 intel_context_get_total_runtime_ns(struct intel_context *ce);
u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);

Loading