Commit 27442758 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.7-2023-10-13:

amdgpu:
- DC replay fixes
- Misc code cleanups and spelling fixes
- Documentation updates
- RAS EEPROM Updates
- FRU EEPROM Updates
- IP discovery updates
- SR-IOV fixes
- RAS updates
- DC PQ fixes
- SMU 13.0.6 updates
- GC 11.5 Support
- NBIO 7.11 Support
- GMC 11 Updates
- Reset fixes
- SMU 11.5 Updates
- SMU 13.0 OD support
- Use flexible arrays for bo list handling
- W=1 Fixes
- SubVP fixes
- DPIA fixes
- DCN 3.5 Support
- Devcoredump fixes
- VPE 6.1 support
- VCN 4.0 Updates
- S/G display fixes
- DML fixes
- DML2 Support
- MST fixes
- VRR fixes
- Enable seamless boot in more cases
- Enable content type property for HDMI
- OLED fixes
- Rework and clean up GPUVM TLB flushing
- DC ODM fixes
- DP 2.x fixes
- AGP aperture fixes
- SDMA firmware loading cleanups
- Cyan Skillfish GPU clock counter fix
- GC 11 GART fix
- Cache GPU fault info for userspace queries
- DC cursor check fixes
- eDP fixes
- DC FP handling fixes
- Variable sized array fixes
- SMU 13.0.x fixes
- IB start and size alignment fixes for VCN
- SMU 14 Support
- Suspend and resume sequence rework
- vkms fix

amdkfd:
- GC 11 fixes
- GC 10 fixes
- Doorbell fixes
- CWSR fixes
- SVM fixes
- Clean up GC info enumeration
- Rework memory limit handling
- Coherent memory handling fixes
- Use partial migrations in GPU faults
- TLB flush fixes
- DMA unmap fixes
- GC 9.4.3 fixes
- SQ interrupt fix
- GTT mapping fix
- GC 11.5 Support

radeon:
- Misc code cleanups
- W=1 Fixes
- Fix possible buffer overflow
- Fix possible NULL pointer dereference

UAPI:
- Add EXT_COHERENT memory allocation flags.  These allow for system scope atomics.
  Proposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88
- Add support for new VPE engine.  This is a memory to memory copy engine with advanced scaling, CSC, and color management features
  Proposed mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25713
- Add INFO IOCTL interface to query GPU faults
  Proposed Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
  Proposed libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298



Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013175758.1735031-1-alexander.deucher@amd.com
parents 08057253 cd905115
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -26,12 +26,30 @@ serial_number
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
   :doc: serial_number

fru_id
-------------

.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
   :doc: fru_id

manufacturer
-------------

.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
   :doc: manufacturer

unique_id
---------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: unique_id

board_info
----------

.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
   :doc: board_info

Accelerated Processing Units (APU) Info
---------------------------------------

+30 −0
Original line number Diff line number Diff line
@@ -64,6 +64,36 @@ gpu_metrics
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: gpu_metrics

fan_curve
---------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: fan_curve

acoustic_limit_rpm_threshold
----------------------------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: acoustic_limit_rpm_threshold

acoustic_target_rpm_threshold
-----------------------------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: acoustic_target_rpm_threshold

fan_target_temperature
----------------------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: fan_target_temperature

fan_minimum_pwm
---------------

.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
   :doc: fan_minimum_pwm

GFXOFF
======

+18 −4
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ amdgpu-y += \
	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
	nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
	sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
	nbio_v7_9.o aqua_vanjaram.o
	nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o

# add DF block
amdgpu-y += \
@@ -113,11 +113,12 @@ amdgpu-y += \
	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
	mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
	mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o
	mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
	gfxhub_v11_5_0.o

# add UMC block
amdgpu-y += \
	umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
	umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o

# add IH block
amdgpu-y += \
@@ -205,14 +206,27 @@ amdgpu-y += \
	vcn_v3_0.o \
	vcn_v4_0.o \
	vcn_v4_0_3.o \
	vcn_v4_0_5.o \
	amdgpu_jpeg.o \
	jpeg_v1_0.o \
	jpeg_v2_0.o \
	jpeg_v2_5.o \
	jpeg_v3_0.o \
	jpeg_v4_0.o \
	jpeg_v4_0_3.o
	jpeg_v4_0_3.o \
	jpeg_v4_0_5.o

# add VPE block
amdgpu-y += \
	amdgpu_vpe.o \
	vpe_v6_1.o

# add UMSCH block
amdgpu-y += \
	amdgpu_umsch_mm.o \
	umsch_mm_v4_0.o

#
# add ATHUB block
amdgpu-y += \
	athub_v1_0.o \
+20 −19
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;

	if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
	if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
	     adev->gmc.xgmi.connected_to_cpu))
		return true;

@@ -48,27 +48,24 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
{
	struct amdgpu_reset_handler *handler;
	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
	int i;

	if (reset_context->method == AMD_RESET_METHOD_NONE) {
		if (aldebaran_is_mode2_default(reset_ctl))
			reset_context->method = AMD_RESET_METHOD_MODE2;
		else
			reset_context->method = amdgpu_asic_reset_method(adev);
	}

	if (reset_context->method != AMD_RESET_METHOD_NONE) {
		dev_dbg(adev->dev, "Getting reset handler for method %d\n",
			reset_context->method);
		list_for_each_entry(handler, &reset_ctl->reset_handlers,
				     handler_list) {
		for_each_handler(i, handler, reset_ctl) {
			if (handler->reset_method == reset_context->method)
				return handler;
		}
	}

	if (aldebaran_is_mode2_default(reset_ctl)) {
		list_for_each_entry(handler, &reset_ctl->reset_handlers,
				     handler_list) {
			if (handler->reset_method == AMD_RESET_METHOD_MODE2) {
				reset_context->method = AMD_RESET_METHOD_MODE2;
				return handler;
			}
		}
	}

	dev_dbg(adev->dev, "Reset handler not found!\n");

	return NULL;
@@ -124,9 +121,9 @@ static void aldebaran_async_reset(struct work_struct *work)
	struct amdgpu_reset_control *reset_ctl =
		container_of(work, struct amdgpu_reset_control, reset_work);
	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
	int i;

	list_for_each_entry(handler, &reset_ctl->reset_handlers,
			     handler_list) {
	for_each_handler(i, handler, reset_ctl)	{
		if (handler->reset_method == reset_ctl->active_reset) {
			dev_dbg(adev->dev, "Resetting device\n");
			handler->do_reset(adev);
@@ -157,7 +154,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
	if (reset_device_list == NULL)
		return -EINVAL;

	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
	    reset_context->hive == NULL) {
		/* Wrong context, return error */
		return -EINVAL;
@@ -338,7 +335,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
	if (reset_device_list == NULL)
		return -EINVAL;

	if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
	if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) ==
		    IP_VERSION(13, 0, 2) &&
	    reset_context->hive == NULL) {
		/* Wrong context, return error */
@@ -395,6 +392,11 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = {
	.do_reset		= aldebaran_mode2_reset,
};

static struct amdgpu_reset_handler
	*aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
		&aldebaran_mode2_handler,
	};

int aldebaran_reset_init(struct amdgpu_device *adev)
{
	struct amdgpu_reset_control *reset_ctl;
@@ -408,10 +410,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev)
	reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
	reset_ctl->get_reset_handler = aldebaran_get_reset_handler;

	INIT_LIST_HEAD(&reset_ctl->reset_handlers);
	INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
	/* Only mode2 is handled through reset control now */
	amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler);
	reset_ctl->reset_handlers = &aldebaran_rst_handlers;

	adev->reset_cntl = reset_ctl;

+60 −18
Original line number Diff line number Diff line
@@ -79,6 +79,8 @@
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_vpe.h"
#include "amdgpu_umsch_mm.h"
#include "amdgpu_gmc.h"
#include "amdgpu_gfx.h"
#include "amdgpu_sdma.h"
@@ -242,6 +244,8 @@ extern int amdgpu_num_kcq;
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display;
extern int amdgpu_umsch_mm;
extern int amdgpu_seamless;

extern int amdgpu_user_partt_mode;

@@ -623,6 +627,9 @@ typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);

typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);

typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);

@@ -654,6 +661,7 @@ enum amd_hw_ip_block_type {
	JPEG_HWIP = VCN_HWIP,
	VCN1_HWIP,
	VCE_HWIP,
	VPE_HWIP,
	DF_HWIP,
	DCE_HWIP,
	OSSSYS_HWIP,
@@ -673,10 +681,15 @@ enum amd_hw_ip_block_type {
#define HWIP_MAX_INSTANCE	44

#define HW_ID_MAX		300
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
#define IP_VERSION_MAJ(ver) ((ver) >> 16)
#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
#define IP_VERSION_REV(ver) ((ver) & 0xFF)
#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
	(((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
#define IP_VERSION(mj, mn, rv)		IP_VERSION_FULL(mj, mn, rv, 0, 0)
#define IP_VERSION_MAJ(ver)		((ver) >> 24)
#define IP_VERSION_MIN(ver)		(((ver) >> 16) & 0xFF)
#define IP_VERSION_REV(ver)		(((ver) >> 8) & 0xFF)
#define IP_VERSION_VARIANT(ver)		(((ver) >> 4) & 0xF)
#define IP_VERSION_SUBREV(ver)		((ver) & 0xF)
#define IP_VERSION_MAJ_MIN_REV(ver)	((ver) >> 8)

struct amdgpu_ip_map_info {
	/* Map of logical to actual dev instances/mask */
@@ -757,8 +770,8 @@ struct amdgpu_mqd {

#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
#define AMDGPU_PRODUCT_NAME_LEN 64
struct amdgpu_reset_domain;
struct amdgpu_fru_info;

/*
 * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
@@ -826,6 +839,8 @@ struct amdgpu_device {
	amdgpu_wreg_ext_t		pcie_wreg_ext;
	amdgpu_rreg64_t			pcie_rreg64;
	amdgpu_wreg64_t			pcie_wreg64;
	amdgpu_rreg64_ext_t			pcie_rreg64_ext;
	amdgpu_wreg64_ext_t			pcie_wreg64_ext;
	/* protects concurrent UVD register access */
	spinlock_t uvd_ctx_idx_lock;
	amdgpu_rreg_t			uvd_ctx_rreg;
@@ -946,6 +961,13 @@ struct amdgpu_device {
	/* jpeg */
	struct amdgpu_jpeg		jpeg;

	/* vpe */
	struct amdgpu_vpe		vpe;

	/* umsch */
	struct amdgpu_umsch_mm		umsch_mm;
	bool				enable_umsch_mm;

	/* firmwares */
	struct amdgpu_firmware		firmware;

@@ -1033,11 +1055,7 @@ struct amdgpu_device {

	bool                            ucode_sysfs_en;

	/* Chip product information */
	char				product_number[20];
	char				product_name[AMDGPU_PRODUCT_NAME_LEN];
	char				serial[20];

	struct amdgpu_fru_info		*fru_info;
	atomic_t			throttling_logging_enabled;
	struct ratelimit_state		throttling_logging_rs;
	uint32_t                        ras_hw_enabled;
@@ -1067,11 +1085,6 @@ struct amdgpu_device {
	uint32_t                        *reset_dump_reg_list;
	uint32_t			*reset_dump_reg_value;
	int                             num_regs;
#ifdef CONFIG_DEV_COREDUMP
	struct amdgpu_task_info         reset_task_info;
	bool                            reset_vram_lost;
	struct timespec64               reset_time;
#endif

	bool                            scpm_enabled;
	uint32_t                        scpm_status;
@@ -1082,7 +1095,30 @@ struct amdgpu_device {
	bool                            dc_enabled;
	/* Mask of active clusters */
	uint32_t			aid_mask;

	/* Debug */
	bool                            debug_vm;
	bool                            debug_largebar;
	bool                            debug_disable_soft_recovery;
};

static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
					 uint8_t ip, uint8_t inst)
{
	/* This considers only major/minor/rev and ignores
	 * subrevision/variant fields.
	 */
	return adev->ip_versions[ip][inst] & ~0xFFU;
}

#ifdef CONFIG_DEV_COREDUMP
struct amdgpu_coredump_info {
	struct amdgpu_device		*adev;
	struct amdgpu_task_info         reset_task_info;
	struct timespec64               reset_time;
	bool                            reset_vram_lost;
};
#endif

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
{
@@ -1134,10 +1170,14 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
				u32 reg_addr);
u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
				  u32 reg_addr);
u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
				  u64 reg_addr);
void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
				 u32 reg_addr, u32 reg_data);
void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
				   u32 reg_addr, u64 reg_data);
void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
				   u64 reg_addr, u64 reg_data);
u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
@@ -1180,6 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
@@ -1275,15 +1317,13 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
	((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))

#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter))

#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
#define for_each_inst(i, inst_mask)        \
	for (i = ffs(inst_mask); i-- != 0; \
	     i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))

#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))

/* Common functions */
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1293,6 +1333,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);
@@ -1367,6 +1408,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
void amdgpu_driver_release_kms(struct drm_device *dev);

int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
int amdgpu_device_prepare(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
Loading