Commit 85150626 authored by Victor Lu's avatar Victor Lu Committed by Alex Deucher
Browse files

drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v5)



amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.

Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
and amdgpu_device_xcc_wreg/rreg to use the new xcc_id parameter.

Using amdgpu_sriov_runtime to determine whether to access via kiq or
RLC is sufficient for now.

v5: add condition in amdgpu_device_xcc_w/rreg, remove trace func call

v4: avoid using amdgpu_sriov_w/rreg

v3: use W/RREG32_XCC to handle non-kiq case

v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
    of amdgpu_device_wreg/rreg

Signed-off-by: default avatarVictor Lu <victorchengchi.lu@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 76d2da18
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -1159,11 +1159,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
			    uint32_t reg, uint32_t acc_flags);
u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
				    u64 reg_addr);
uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
				uint32_t reg, uint32_t acc_flags,
				uint32_t xcc_id);
void amdgpu_device_wreg(struct amdgpu_device *adev,
			uint32_t reg, uint32_t v,
			uint32_t acc_flags);
void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
				     u64 reg_addr, u32 reg_data);
void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
			    uint32_t reg, uint32_t v,
			    uint32_t acc_flags,
			    uint32_t xcc_id);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
			     uint32_t reg, uint32_t v, uint32_t xcc_id);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
@@ -1204,8 +1211,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)

#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)

#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
@@ -1215,6 +1222,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
+1 −1
Original line number Diff line number Diff line
@@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
	hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);

	for (reg = hqd_base; reg <= hqd_end; reg++)
		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);


	/* Activate doorbell logic before triggering WPTR poll. */
+1 −1
Original line number Diff line number Diff line
@@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,

	for (reg = hqd_base;
	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);


	/* Activate doorbell logic before triggering WPTR poll. */
+87 −2
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@
#include "amdgpu_pmu.h"
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
#include "amdgpu_virt.h"

#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -472,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
		    amdgpu_sriov_runtime(adev) &&
		    down_read_trylock(&adev->reset_domain->sem)) {
			ret = amdgpu_kiq_rreg(adev, reg);
			ret = amdgpu_kiq_rreg(adev, reg, 0);
			up_read(&adev->reset_domain->sem);
		} else {
			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
@@ -509,6 +510,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
	BUG();
}


/**
 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
 *
 * @adev: amdgpu_device pointer
 * @reg: dword aligned register offset
 * @acc_flags: access flags which require special behavior
 * @xcc_id: xcc accelerated compute core id
 *
 * Returns the 32 bit value from the offset specified.
 */
uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
				uint32_t reg, uint32_t acc_flags,
				uint32_t xcc_id)
{
	uint32_t ret, rlcg_flag;

	if (amdgpu_device_skip_hw_access(adev))
		return 0;

	if ((reg * 4) < adev->rmmio_size) {
		if (amdgpu_sriov_vf(adev) &&
		    !amdgpu_sriov_runtime(adev) &&
		    adev->gfx.rlc.rlcg_reg_access_supported &&
		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
							 GC_HWIP, false,
							 &rlcg_flag)) {
			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
		    amdgpu_sriov_runtime(adev) &&
		    down_read_trylock(&adev->reset_domain->sem)) {
			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
			up_read(&adev->reset_domain->sem);
		} else {
			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
		}
	} else {
		ret = adev->pcie_rreg(adev, reg * 4);
	}

	return ret;
}

/*
 * MMIO register write with bytes helper functions
 * @offset:bytes offset from MMIO start
@@ -556,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
		    amdgpu_sriov_runtime(adev) &&
		    down_read_trylock(&adev->reset_domain->sem)) {
			amdgpu_kiq_wreg(adev, reg, v);
			amdgpu_kiq_wreg(adev, reg, v, 0);
			up_read(&adev->reset_domain->sem);
		} else {
			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
@@ -597,6 +641,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
	}
}

/**
 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
 *
 * @adev: amdgpu_device pointer
 * @reg: dword aligned register offset
 * @v: 32 bit value to write to the register
 * @acc_flags: access flags which require special behavior
 * @xcc_id: xcc accelerated compute core id
 *
 * Writes the value specified to the offset specified.
 */
void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
			uint32_t reg, uint32_t v,
			uint32_t acc_flags, uint32_t xcc_id)
{
	uint32_t rlcg_flag;

	if (amdgpu_device_skip_hw_access(adev))
		return;

	if ((reg * 4) < adev->rmmio_size) {
		if (amdgpu_sriov_vf(adev) &&
		    !amdgpu_sriov_runtime(adev) &&
		    adev->gfx.rlc.rlcg_reg_access_supported &&
		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
							 GC_HWIP, true,
							 &rlcg_flag)) {
			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
		    amdgpu_sriov_runtime(adev) &&
		    down_read_trylock(&adev->reset_domain->sem)) {
			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
			up_read(&adev->reset_domain->sem);
		} else {
			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
		}
	} else {
		adev->pcie_wreg(adev, reg * 4, v);
	}
}

/**
 * amdgpu_device_indirect_rreg - read an indirect register
 *
+4 −4
Original line number Diff line number Diff line
@@ -931,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
		func(adev, ras_error_status, i);
}

uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
{
	signed long r, cnt = 0;
	unsigned long flags;
	uint32_t seq, reg_val_offs = 0, value = 0;
	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	struct amdgpu_ring *ring = &kiq->ring;

	if (amdgpu_device_skip_hw_access(adev))
@@ -999,12 +999,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
	return ~0;
}

void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
{
	signed long r, cnt = 0;
	unsigned long flags;
	uint32_t seq;
	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
	struct amdgpu_ring *ring = &kiq->ring;

	BUG_ON(!ring->funcs->emit_wreg);
Loading