Commit 8f74c70b authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: block CE CS if not explicitely allowed by module option



The Constant Engine found on gfx6-gfx10 HW has been a notorious source of
problems.

RADV never used it in the first place, radeonsi only used it for a few
releases around 2017 for gfx6-gfx9 before dropping support for it as
well.

While investigating another problem I just recently found that submitting
to the CE seems to be completely broken on gfx9 for quite a while.

Since nobody complained about that problem it most likely means that
nobody is using any of the affected radeonsi versions on current Linux
kernels any more.

So to potentially phase out the support for the CE and eliminate another
source of problems block submitting CE IBs unless it is enabled again
using a debug flag.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Acked-by: default avatarTimur Kristóf <timur.kristof@gmail.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5d55ed19
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1290,6 +1290,7 @@ struct amdgpu_device {
	bool                            debug_disable_gpu_ring_reset;
	bool                            debug_vm_userptr;
	bool                            debug_disable_ce_logs;
	bool                            debug_enable_ce_cs;

	/* Protection for the following isolation structure */
	struct mutex                    enforce_isolation_mutex;
+6 −0
Original line number Diff line number Diff line
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
	if (p->uf_bo && ring->funcs->no_user_fence)
		return -EINVAL;

	if (!p->adev->debug_enable_ce_cs &&
	    chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
		dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
		return -EINVAL;
	}

	if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
	    chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
		if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+7 −1
Original line number Diff line number Diff line
@@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
	AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
	AMDGPU_DEBUG_SMU_POOL = BIT(7),
	AMDGPU_DEBUG_VM_USERPTR = BIT(8),
	AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
	AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
	AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
};

unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
		pr_info("debug: disable kernel logs of correctable errors\n");
		adev->debug_disable_ce_logs = true;
	}

	if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
		pr_info("debug: allowing command submission to CE engine\n");
		adev->debug_enable_ce_cs = true;
	}
}

static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)