Merge tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel (994aeacb) · Commits · git / linux-nf

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+0 −4

Original line number	Diff line number	Diff line
		@@ -1083,10 +1083,6 @@ struct amdgpu_device {

		struct amdgpu_virt virt;

		/* link all shadow bo */
		struct list_head shadow_list;
		struct mutex shadow_list_lock;

		/* record hw reset is performed */
		bool has_hw_reset;
		u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];

drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device adev, struct aca_handle h
		return -EINVAL;
		}

		/* udpate aca bank to aca source error_cache first */
		/* update aca bank to aca source error_cache first */
		ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
		if (ret)
		return ret;

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

+45 −63

Original line number	Diff line number	Diff line
		@@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
		* @inst: xcc's instance number on a multi-XCC setup
		*/
		static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
		int wave_cnt, int vmid, uint32_t inst)
		struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
		{
		int pipe_idx;
		int queue_slot;
		unsigned int reg_val;

		unsigned int wave_cnt;
		/*
		* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
		* parameters to read out waves in flight. Get VMID if there are
		* non-zero waves in flight.
		*/
		*vmid = 0xFF;
		*wave_cnt = 0;
		pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
		queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
		soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
		reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
		queue_slot);
		*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
		if (*wave_cnt != 0)
		*vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
		CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
		soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
		reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
		mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
		wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
		if (wave_cnt != 0) {
		queue_cnt->wave_cnt += wave_cnt;
		queue_cnt->doorbell_off =
		(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
		CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
		CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
		}
		}

		/**
		@@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
		* or more queues running and submitting waves to compute units.
		*
		* @adev: Handle of device from which to get number of waves in flight
		* @pasid: Identifies the process for which this query call is invoked
		* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
		* belong to process with given pasid
		* @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
		* for comparison later.
		* @max_waves_per_cu: Output parameter updated with maximum number of waves
		* possible per Compute Unit
		* @inst: xcc's instance number on a multi-XCC setup
		@@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
		* number of waves that are in flight for the queue at specified index. The
		* index ranges from 0 to 7.
		*
		* If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
		* of the wave(s).
		* If non-zero waves are in flight, store the corresponding doorbell offset
		* of the queue, along with the wave count.
		*
		* Determine if VMID from above step maps to pasid provided as parameter. If
		* it matches agrregate the wave count. That the VMID will not match pasid is
		* a normal condition i.e. a device is expected to support multiple queues
		* from multiple proceses.
		* Determine if the queue belongs to the process by comparing the doorbell
		* offset against the process's queues. If it matches, aggregate the wave
		* count for the process.
		*
		* Reading registers referenced above involves programming GRBM appropriately
		*/
		void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		int pasid_wave_cnt, int max_waves_per_cu, uint32_t inst)
		void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
		struct kfd_cu_occupancy *cu_occupancy,
		int *max_waves_per_cu, uint32_t inst)
		{
		int qidx;
		int vmid;
		int se_idx;
		int sh_idx;
		int se_cnt;
		int sh_cnt;
		int wave_cnt;
		int queue_map;
		int pasid_tmp;
		int max_queue_cnt;
		int vmid_wave_cnt = 0;
		DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);

		lock_spi_csq_mutexes(adev);
		soc15_grbm_select(adev, 1, 0, 0, 0, inst);
		soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));

		/*
		* Iterate through the shader engines and arrays of the device
		@@ -1048,13 +1043,10 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		AMDGPU_MAX_QUEUES);
		max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
		adev->gfx.mec.num_queue_per_pipe;
		sh_cnt = adev->gfx.config.max_sh_per_se;
		se_cnt = adev->gfx.config.max_shader_engines;
		for (se_idx = 0; se_idx < se_cnt; se_idx++) {
		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {

		amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
		queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
		amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
		queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);

		/*
		* Assumption: queue map encodes following schema: four
		@@ -1063,7 +1055,6 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		* and must be verified for newer device families
		*/
		for (qidx = 0; qidx < max_queue_cnt; qidx++) {

		/* Skip qeueus that are not associated with
		* compute functions
		*/
		@@ -1074,25 +1065,16 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		continue;

		/* Get number of waves in flight and aggregate them */
		get_wave_count(adev, qidx, &wave_cnt, &vmid,
		get_wave_count(adev, qidx, &cu_occupancy[qidx],
		inst);
		if (wave_cnt != 0) {
		pasid_tmp =
		RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
		mmIH_VMID_0_LUT) + vmid);
		if (pasid_tmp == pasid)
		vmid_wave_cnt += wave_cnt;
		}
		}
		}
		}

		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
		soc15_grbm_select(adev, 0, 0, 0, 0, inst);
		soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
		unlock_spi_csq_mutexes(adev);

		/* Update the output parameters and return */
		*pasid_wave_cnt = vmid_wave_cnt;
		max_waves_per_cu = adev->gfx.cu_info.simd_per_cu
		adev->gfx.cu_info.max_waves_per_simd;
		}

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h

+3 −2

Original line number	Diff line number	Diff line
		@@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
		uint8_t vmid, uint16_t *p_pasid);
		void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
		uint32_t vmid, uint64_t page_table_base);
		void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
		int pasid_wave_cnt, int max_waves_per_cu, uint32_t inst);
		void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
		struct kfd_cu_occupancy *cu_occupancy,
		int *max_waves_per_cu, uint32_t inst);
		void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
		uint32_t inst);

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
		}
		}

		ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
		ret = amdgpu_bo_pin(bo, domain);
		if (ret)
		pr_err("Error in Pinning BO to domain: %d\n", domain);