Commit 994aeacb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Regular fixes for the week to end the merge window, i915 and xe have a
  few each, amdgpu makes up most of it with a bunch of SR-IOV related
  fixes amongst others.

  i915:
   - Fix BMG support to UHBR13.5
   - Two PSR fixes
   - Fix colorimetry detection for DP

  xe:
   - Fix macro for checking minimum GuC version
   - Fix CCS offset calculation for some BMG SKUs
   - Fix locking on memory usage reporting via fdinfo and BO destroy
   - Fix GPU page fault handler on a closed VM
   - Fix overflow in oa batch buffer

  amdgpu:
   - MES 12 fix
   - KFD fence sync fix
   - SR-IOV fixes
   - VCN 4.0.6 fix
   - SDMA 7.x fix
   - Bump driver version to note cleared VRAM support
   - SWSMU fix
   - CU occupancy logic fix
   - SDMA queue fix"

* tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel: (79 commits)
  drm/amd/pm: update workload mask after the setting
  drm/amdgpu: bump driver version for cleared VRAM
  drm/amdgpu: fix vbios fetching for SR-IOV
  drm/amdgpu: fix PTE copy corruption for sdma 7
  drm/amdkfd: Add SDMA queue quantum support for GFX12
  drm/amdgpu/vcn: enable AV1 on both instances
  drm/amdkfd: Fix CU occupancy for GFX 9.4.3
  drm/amdkfd: Update logic for CU occupancy calculations
  drm/amdgpu: skip coredump after job timeout in SRIOV
  drm/amdgpu: sync to KFD fences before clearing PTEs
  drm/amdgpu/mes12: set enable_level_process_quantum_check
  drm/i915/dp: Fix colorimetry detection
  drm/amdgpu/mes12: reduce timeout
  drm/amdgpu/mes11: reduce timeout
  drm/amdgpu: use GEM references instead of TTMs v2
  drm/amd/display: Allow backlight to go below `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT`
  drm/amd/display: Fix kdoc entry for 'tps' in 'dc_process_dmub_dpia_set_tps_notification'
  drm/amdgpu: update golden regs for gfx12
  drm/amdgpu: clean up vbios fetching code
  drm/amd/display: handle nulled pipe context in DCE110's set_drr()
  ...
parents 894b3c35 e7268dd9
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -1083,10 +1083,6 @@ struct amdgpu_device {

	struct amdgpu_virt	virt;

	/* link all shadow bo */
	struct list_head                shadow_list;
	struct mutex                    shadow_list_lock;

	/* record hw reset is performed */
	bool has_hw_reset;
	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
+1 −1
Original line number Diff line number Diff line
@@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
		return -EINVAL;
	}

	/* udpate aca bank to aca source error_cache first */
	/* update aca bank to aca source error_cache first */
	ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
	if (ret)
		return ret;
+45 −63
Original line number Diff line number Diff line
@@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
 * @inst: xcc's instance number on a multi-XCC setup
 */
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
		int *wave_cnt, int *vmid, uint32_t inst)
		struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
	int pipe_idx;
	int queue_slot;
	unsigned int reg_val;

	unsigned int wave_cnt;
	/*
	 * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
	 * parameters to read out waves in flight. Get VMID if there are
	 * non-zero waves in flight.
	 */
	*vmid = 0xFF;
	*wave_cnt = 0;
	pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
	queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
	reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
			 queue_slot);
	*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
	if (*wave_cnt != 0)
		*vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
			 CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
	reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
				  mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
	wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
	if (wave_cnt != 0) {
		queue_cnt->wave_cnt += wave_cnt;
		queue_cnt->doorbell_off =
			(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
	}
}

/**
@@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
 * or more queues running and submitting waves to compute units.
 *
 * @adev: Handle of device from which to get number of waves in flight
 * @pasid: Identifies the process for which this query call is invoked
 * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
 *                  belong to process with given pasid
 * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
 *		  for comparison later.
 * @max_waves_per_cu: Output parameter updated with maximum number of waves
 *                    possible per Compute Unit
 * @inst: xcc's instance number on a multi-XCC setup
@@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
 *    number of waves that are in flight for the queue at specified index. The
 *    index ranges from 0 to 7.
 *
 *    If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
 *    of the wave(s).
 *    If non-zero waves are in flight, store the corresponding doorbell offset
 *    of the queue, along with the wave count.
 *
 *    Determine if VMID from above step maps to pasid provided as parameter. If
 *    it matches agrregate the wave count. That the VMID will not match pasid is
 *    a normal condition i.e. a device is expected to support multiple queues
 *    from multiple proceses.
 *    Determine if the queue belongs to the process by comparing the doorbell
 *    offset against the process's queues. If it matches, aggregate the wave
 *    count for the process.
 *
 *  Reading registers referenced above involves programming GRBM appropriately
 */
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
				 struct kfd_cu_occupancy *cu_occupancy,
				 int *max_waves_per_cu, uint32_t inst)
{
	int qidx;
	int vmid;
	int se_idx;
	int sh_idx;
	int se_cnt;
	int sh_cnt;
	int wave_cnt;
	int queue_map;
	int pasid_tmp;
	int max_queue_cnt;
	int vmid_wave_cnt = 0;
	DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);

	lock_spi_csq_mutexes(adev);
	soc15_grbm_select(adev, 1, 0, 0, 0, inst);
	soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));

	/*
	 * Iterate through the shader engines and arrays of the device
@@ -1048,13 +1043,10 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
			  AMDGPU_MAX_QUEUES);
	max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
			adev->gfx.mec.num_queue_per_pipe;
	sh_cnt = adev->gfx.config.max_sh_per_se;
	se_cnt = adev->gfx.config.max_shader_engines;
	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {

			amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
			queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
		amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
		queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);

		/*
		 * Assumption: queue map encodes following schema: four
@@ -1063,7 +1055,6 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		 * and must be verified for newer device families
		 */
		for (qidx = 0; qidx < max_queue_cnt; qidx++) {

			/* Skip qeueus that are not associated with
			 * compute functions
			 */
@@ -1074,25 +1065,16 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
				continue;

			/* Get number of waves in flight and aggregate them */
				get_wave_count(adev, qidx, &wave_cnt, &vmid,
			get_wave_count(adev, qidx, &cu_occupancy[qidx],
					inst);
				if (wave_cnt != 0) {
					pasid_tmp =
					  RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
						 mmIH_VMID_0_LUT) + vmid);
					if (pasid_tmp == pasid)
						vmid_wave_cnt += wave_cnt;
				}
			}
		}
	}

	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
	soc15_grbm_select(adev, 0, 0, 0, 0, inst);
	soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
	unlock_spi_csq_mutexes(adev);

	/* Update the output parameters and return */
	*pasid_wave_cnt = vmid_wave_cnt;
	*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
				adev->gfx.cu_info.max_waves_per_simd;
}
+3 −2
Original line number Diff line number Diff line
@@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
					uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
			uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
				 struct kfd_cu_occupancy *cu_occupancy,
				 int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
		uint32_t inst);
+1 −1
Original line number Diff line number Diff line
@@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
		}
	}

	ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
	ret = amdgpu_bo_pin(bo, domain);
	if (ret)
		pr_err("Error in Pinning BO to domain: %d\n", domain);

Loading