drm/msm/adreno: Introduce A8x GPU Support

A8x is the next generation of Adreno GPUs, featuring a significant
hardware design change. A major update to the design is the introduction
of Slice architecture. Slices are sort of mini-GPUs within the GPU which
are more independent in processing Graphics and compute workloads. Also,
in addition to the BV and BR pipe we saw in A7x, CP has more concurrency
with additional pipes.

From a software interface perspective, these changes have a significant
impact on the KMD side. First, the GPU register space has been extensively
reorganized. Second, to avoid  a register space explosion caused by the
new slice architecture and additional pipes, many registers are now
virtualized, instead of duplicated as in A7x. KMD must configure an
aperture register with the appropriate slice and pipe ID before accessing
these virtualized registers.

Signed-off-by: Akhil P Oommen <akhilpo@oss.qualcomm.com>
Patchwork: https://patchwork.freedesktop.org/patch/689019/
Message-ID: <20251118-kaana-gpu-support-v4-14-86eeb8e93fb6@oss.qualcomm.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
This commit is contained in:
Akhil P Oommen
2025-11-18 14:20:41 +05:30
committed by Rob Clark
parent 06cfbca0e1
commit 288a932008
7 changed files with 1320 additions and 33 deletions

View File

@@ -24,6 +24,7 @@ adreno-y := \
adreno/a6xx_gmu.o \
adreno/a6xx_hfi.o \
adreno/a6xx_preempt.o \
adreno/a8xx_gpu.o \
adreno-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \

View File

@@ -1174,6 +1174,9 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
return ret;
}
/* Read the slice info on A8x GPUs */
a8xx_gpu_get_slice_info(gpu);
/* Set the bus quota to a reasonable value for boot */
a6xx_gmu_set_initial_bw(gpu, gmu);

View File

@@ -157,7 +157,7 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
}
}
static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
@@ -245,14 +245,21 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
}
if (!sysprof) {
if (!adreno_is_a7xx(adreno_gpu)) {
if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);
}
OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
OUT_RING(ring, 1);
if (adreno_is_a8xx(adreno_gpu)) {
OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
OUT_RING(ring, 1);
OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
OUT_RING(ring, 1);
} else {
OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
OUT_RING(ring, 1);
}
}
/* Execute the table update */
@@ -281,7 +288,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
* to make sure BV doesn't race ahead while BR is still switching
* pagetables.
*/
if (adreno_is_a7xx(&a6xx_gpu->base)) {
if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
}
@@ -295,20 +302,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
OUT_RING(ring, CACHE_INVALIDATE);
if (!sysprof) {
u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
/*
* Wait for SRAM clear after the pgtable update, so the
* two can happen in parallel:
*/
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(
REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
if (!adreno_is_a7xx(adreno_gpu)) {
if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
/* Re-enable protected mode: */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
@@ -446,6 +455,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = submit->ring;
u32 rbbm_perfctr_cp0, cp_always_on_counter;
unsigned int i, ibs = 0;
adreno_check_and_reenable_stall(adreno_gpu);
@@ -466,10 +476,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
if (gpu->nr_rings > 1)
a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
rbmemptr_stats(ring, index, cpcycles_start));
get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
rbmemptr_stats(ring, index, alwayson_start));
if (adreno_is_a8xx(adreno_gpu)) {
rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER;
} else {
rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER;
}
get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start));
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_SET_THREAD_BOTH);
@@ -516,14 +532,17 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, 0x00e); /* IB1LIST end */
}
get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
rbmemptr_stats(ring, index, cpcycles_end));
get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
rbmemptr_stats(ring, index, alwayson_end));
get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end));
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
OUT_RING(ring, submit->seqno);
if (adreno_is_a8xx(adreno_gpu)) {
OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
OUT_RING(ring, submit->seqno);
} else {
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
OUT_RING(ring, submit->seqno);
}
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_SET_THREAD_BR);
@@ -723,8 +742,11 @@ static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
/* Copy the data into the internal struct to drop the const qualifier (temporarily) */
*cfg = *common_cfg;
cfg->ubwc_swizzle = 0x6;
cfg->highest_bank_bit = 15;
/* Use common config as is for A8x */
if (!adreno_is_a8xx(gpu)) {
cfg->ubwc_swizzle = 0x6;
cfg->highest_bank_bit = 15;
}
if (adreno_is_a610(gpu)) {
cfg->highest_bank_bit = 13;
@@ -1013,7 +1035,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
return false;
/* A7xx is safe! */
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
return true;
/*
@@ -1127,7 +1149,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
return 0;
}
static int a6xx_zap_shader_init(struct msm_gpu *gpu)
int a6xx_zap_shader_init(struct msm_gpu *gpu)
{
static bool loaded;
int ret;
@@ -2089,7 +2111,7 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
u32 fuse_val;
int ret;
if (adreno_is_a750(adreno_gpu)) {
if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
/*
* Assume that if qcom scm isn't available, that whatever
* replacement allows writing the fuse register ourselves.
@@ -2115,9 +2137,9 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
return ret;
/*
* On a750 raytracing may be disabled by the firmware, find out
* whether that's the case. The scm call above sets the fuse
* register.
* On A7XX_GEN3 and newer, raytracing may be disabled by the
* firmware, find out whether that's the case. The scm call
* above sets the fuse register.
*/
fuse_val = a6xx_llc_read(a6xx_gpu,
REG_A7XX_CX_MISC_SW_FUSE_VALUE);
@@ -2178,7 +2200,7 @@ void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
{
/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
if (adreno_is_a610(to_adreno_gpu(gpu)))
if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
return;
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
@@ -2209,7 +2231,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
msm_devfreq_resume(gpu);
adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
if (adreno_is_a8xx(adreno_gpu))
a8xx_llc_activate(a6xx_gpu);
else if (adreno_is_a7xx(adreno_gpu))
a7xx_llc_activate(a6xx_gpu);
else
a6xx_llc_activate(a6xx_gpu);
return ret;
}
@@ -2589,10 +2616,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_gpu->base.hw_apriv =
!!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
/* gpu->info only gets assigned in adreno_gpu_init() */
is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
config->info->family == ADRENO_7XX_GEN2 ||
config->info->family == ADRENO_7XX_GEN3;
/* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
is_a7xx = config->info->family >= ADRENO_7XX_GEN1;
a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
@@ -2630,7 +2655,7 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
return ERR_PTR(ret);
}
if (adreno_is_a7xx(adreno_gpu)) {
if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
ret = a7xx_cx_mem_init(a6xx_gpu);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
@@ -2754,3 +2779,30 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = {
.bus_halt = a6xx_bus_clear_pending_transactions,
.mmu_fault_handler = a6xx_fault_handler,
};
const struct adreno_gpu_funcs a8xx_gpu_funcs = {
.base = {
.get_param = adreno_get_param,
.set_param = adreno_set_param,
.hw_init = a8xx_hw_init,
.ucode_load = a6xx_ucode_load,
.pm_suspend = a6xx_gmu_pm_suspend,
.pm_resume = a6xx_gmu_pm_resume,
.recover = a8xx_recover,
.submit = a7xx_submit,
.active_ring = a6xx_active_ring,
.irq = a8xx_irq,
.destroy = a6xx_destroy,
.gpu_busy = a8xx_gpu_busy,
.gpu_get_freq = a6xx_gmu_get_freq,
.gpu_set_freq = a6xx_gpu_set_freq,
.create_vm = a6xx_create_vm,
.create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a8xx_progress,
},
.init = a6xx_gpu_init,
.get_timestamp = a8xx_gmu_get_timestamp,
.bus_halt = a8xx_bus_clear_pending_transactions,
.mmu_fault_handler = a8xx_fault_handler,
};

View File

@@ -46,6 +46,8 @@ struct a6xx_info {
const struct adreno_protect *protect;
const struct adreno_reglist_list *pwrup_reglist;
const struct adreno_reglist_list *ifpc_reglist;
const struct adreno_reglist_pipe *nonctxt_reglist;
u32 max_slices;
u32 gmu_chipid;
u32 gmu_cgc_mode;
u32 prim_fifo_threshold;
@@ -101,6 +103,11 @@ struct a6xx_gpu {
void *htw_llc_slice;
bool have_mmu500;
bool hung;
u32 cached_aperture;
spinlock_t aperture_lock;
u32 slice_mask;
};
#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
@@ -302,5 +309,19 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
int a6xx_zap_shader_init(struct msm_gpu *gpu);
void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data);
void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value);
u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate);
int a8xx_gpu_feature_probe(struct msm_gpu *gpu);
void a8xx_gpu_get_slice_info(struct msm_gpu *gpu);
int a8xx_hw_init(struct msm_gpu *gpu);
irqreturn_t a8xx_irq(struct msm_gpu *gpu);
void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu);
bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
void a8xx_recover(struct msm_gpu *gpu);
#endif /* __A6XX_GPU_H__ */

File diff suppressed because it is too large Load Diff

View File

@@ -88,6 +88,13 @@ struct adreno_reglist {
u32 value;
};
/* Reglist with pipe information */
struct adreno_reglist_pipe {
u32 offset;
u32 value;
u32 pipe;
};
struct adreno_speedbin {
uint16_t fuse;
uint16_t speedbin;

View File

@@ -60,6 +60,7 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
<reg32 offset="0x1f400" name="GMU_ICACHE_CONFIG"/>
<reg32 offset="0x1f401" name="GMU_DCACHE_CONFIG"/>
<reg32 offset="0x1f40f" name="GMU_SYS_BUS_CONFIG"/>
<reg32 offset="0x1f50b" name="GMU_MRC_GBIF_QOS_CTRL"/>
<reg32 offset="0x1f800" name="GMU_CM3_SYSRESET"/>
<reg32 offset="0x1f801" name="GMU_CM3_BOOT_CONFIG"/>
<reg32 offset="0x1f81a" name="GMU_CM3_FW_BUSY"/>