Commit 2ba9f676 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Merge window fixes, mostly amdgpu and xe, with a few other minor ones,
  all looks fairly normal,

  i915:
   - hdcp: Fix when the first read and write are retried

  xe:
   - Wake up waiters after wait condition set to true
   - Mark the preempt fence workqueue as reclaim
   - Update xe2 graphics name string
   - Fix a couple of guc submit races
   - Fix pat index usage in migrate
   - Ensure non-cached migrate pagetable bo mappings
   - Take a PM ref in the delayed snapshot capture worker

  amdgpu:
   - SMU 13.0.6 fixes
   - XGMI fixes
   - SMU 13.0.7 fixes
   - Misc code cleanups
   - Plane refcount fixes
   - DCN 4.0.1 fixes
   - DC power fixes
   - DTO fixes
   - NBIO 7.11 fixes
   - SMU 14.0.x fixes
   - Reset fixes
   - Enable DC on LoongArch
   - Sysfs hotplug warning fix
   - Misc small fixes
   - VCN 4.0.3 fix
   - Slab usage fix
   - Jpeg delayed work fix

  amdkfd:
   - wptr handling fixes

  radeon:
   - Use ttm_bo_move_null()
   - Constify struct pci_device_id
   - Fix spurious hotplug
   - HPD fix

  rockchip
   - fix 32-bit build"

* tag 'drm-next-2024-11-29' of https://gitlab.freedesktop.org/drm/kernel: (48 commits)
  drm/xe: Take PM ref in delayed snapshot capture worker
  drm/xe/migrate: use XE_BO_FLAG_PAGETABLE
  drm/xe/migrate: fix pat index usage
  drm/xe/guc_submit: fix race around suspend_pending
  drm/xe/guc_submit: fix race around pending_disable
  drm/xe: Update xe2_graphics name string
  drm/rockchip: avoid 64-bit division
  Revert "drm/radeon: Delay Connector detecting when HPD singals is unstable"
  drm/amdgpu/jpeg: cancel the jpeg worker
  drm/amdgpu: fix usage slab after free
  drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3
  drm/amdgpu: Fix sysfs warning when hotplugging
  drm/amdgpu: Add sysfs interface for vcn reset mask
  drm/amdgpu/gmc7: fix wait_for_idle callers
  drm/amd/pm: Remove arcturus min power limit
  drm/amd/pm: skip setting the power source on smu v14.0.2/3
  drm/amd/pm: disable pcie speed switching on Intel platform for smu v14.0.2/3
  drm/amdkfd: Use the correct wptr size
  drm/xe: Mark preempt fence workqueue as reclaim
  drm/xe/ufence: Wake up waiters after setting ufence->signalled
  ...
parents 517363b4 9794b89c
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -330,6 +330,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
	}

	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
		amdgpu_set_init_level(tmp_adev,
				AMDGPU_INIT_LEVEL_RESET_RECOVERY);
		dev_info(tmp_adev->dev,
			 "GPU reset succeeded, trying to resume\n");
		r = aldebaran_mode2_restore_ip(tmp_adev);
@@ -375,6 +377,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
							tmp_adev);

		if (!r) {
			amdgpu_set_init_level(tmp_adev,
					      AMDGPU_INIT_LEVEL_DEFAULT);
			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);

			r = amdgpu_ib_ring_tests(tmp_adev);
+1 −0
Original line number Diff line number Diff line
@@ -839,6 +839,7 @@ struct amdgpu_mqd {
enum amdgpu_init_lvl_id {
	AMDGPU_INIT_LEVEL_DEFAULT,
	AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
	AMDGPU_INIT_LEVEL_RESET_RECOVERY,
};

struct amdgpu_init_level {
+24 −5
Original line number Diff line number Diff line
@@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = {
	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
};

struct amdgpu_init_level amdgpu_init_recovery = {
	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
};

/*
 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
 * is used for cases like reset on initialization where the entire hive needs to
@@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
		adev->init_lvl = &amdgpu_init_minimal_xgmi;
		break;
	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
		adev->init_lvl = &amdgpu_init_recovery;
		break;
	case AMDGPU_INIT_LEVEL_DEFAULT:
		fallthrough;
	default:
@@ -3250,7 +3258,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
		return r;
	}

	if (!amdgpu_in_reset(adev))
	if (!amdgpu_reset_in_recovery(adev))
		amdgpu_ras_set_error_query_ready(adev, true);

	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
@@ -4669,8 +4677,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
	int idx;
	bool px;

	amdgpu_fence_driver_sw_fini(adev);
	amdgpu_device_ip_fini(adev);
	amdgpu_fence_driver_sw_fini(adev);
	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
	adev->accel_working = false;
	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
@@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
	struct list_head *device_list_handle;
	bool full_reset, vram_lost = false;
	struct amdgpu_device *tmp_adev;
	int r;
	int r, init_level;

	device_list_handle = reset_context->reset_device_list;

@@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)

	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);

	/**
	 * If it's reset on init, it's default init level, otherwise keep level
	 * as recovery level.
	 */
	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
	else
			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;

	r = 0;
	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
		/* After reset, it's default init level */
		amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
		amdgpu_set_init_level(tmp_adev, init_level);
		if (full_reset) {
			/* post card */
			amdgpu_ras_set_fed(tmp_adev, false);
@@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)

out:
		if (!r) {
			/* IP init is complete now, set level as default */
			amdgpu_set_init_level(tmp_adev,
					      AMDGPU_INIT_LEVEL_DEFAULT);
			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
			r = amdgpu_ib_ring_tests(tmp_adev);
			if (r) {
+5 −3
Original line number Diff line number Diff line
@@ -1778,10 +1778,12 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)

void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
{
	if (adev->dev->kobj.sd) {
		amdgpu_gfx_sysfs_xcp_fini(adev);
		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
		amdgpu_gfx_sysfs_reset_mask_fini(adev);
	}
}

int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
				      unsigned int cleaner_shader_size)
+4 −2
Original line number Diff line number Diff line
@@ -447,6 +447,8 @@ int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)

void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
{
	if (adev->dev->kobj.sd) {
		if (adev->jpeg.num_jpeg_inst)
			device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
	}
}
Loading