Commit 53503556 authored by Yifan Zhang's avatar Yifan Zhang Committed by Alex Deucher
Browse files

amd/amdkfd: correct mem limit calculation for small APUs



Current mem limit check leaks some GTT memory (reserved_for_pt
reserved_for_ras + adev->vram_pin_size) for small APUs.

Since carveout VRAM is tunable on APUs, there are three case
regarding the carveout VRAM size relative to GTT:

1. 0 < carveout < gtt
   apu_prefer_gtt = true, is_app_apu = false

2. carveout > gtt / 2
   apu_prefer_gtt = false, is_app_apu = false

3. 0 = carveout
   apu_prefer_gtt = true, is_app_apu = true

It doesn't make sense to check below limitation in case 1
(default case, small carveout) because the values in the below
expression are mixed with carveout and gtt.

adev->kfd.vram_used[xcp_id] + vram_needed >
    vram_size - reserved_for_pt - reserved_for_ras -
    atomic64_read(&adev->vram_pin_size)

gtt: kfd.vram_used, vram_needed, vram_size
carveout: reserved_for_pt, reserved_for_ras, adev->vram_pin_size

In case 1, vram allocation will go to gtt domain, skip vram check
since ttm_mem_limit check already cover this allocation.

Signed-off-by: default avatarYifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: default avatarMario Limonciello <mario.limonciello@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
(cherry picked from commit fa7c99f0)
parent ce42a3b5
Loading
Loading
Loading
Loading
+32 −12
Original line number Diff line number Diff line
@@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
	spin_lock(&kfd_mem_limit.mem_limit_lock);

	if (kfd_mem_limit.system_mem_used + system_mem_needed >
	    kfd_mem_limit.max_system_mem_limit)
	    kfd_mem_limit.max_system_mem_limit) {
		pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
		if (!no_system_mem_limit) {
			ret = -ENOMEM;
			goto release;
		}
	}

	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
	     kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
	     kfd_mem_limit.max_ttm_mem_limit) ||
	    (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
	     vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
	if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
		kfd_mem_limit.max_ttm_mem_limit) {
		ret = -ENOMEM;
		goto release;
	}

	/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
	 * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
	 * VRAM check since ttm_mem_limit check already cover this allocation
	 */

	if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
		uint64_t vram_available =
			vram_size - reserved_for_pt - reserved_for_ras -
			atomic64_read(&adev->vram_pin_size);
		if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
			ret = -ENOMEM;
			goto release;
		}
	}

	/* Update memory accounting by decreasing available system
	 * memory, TTM memory and GPU memory as computed above
	 */
@@ -1626,6 +1642,10 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
	uint64_t vram_available, system_mem_available, ttm_mem_available;

	spin_lock(&kfd_mem_limit.mem_limit_lock);
	if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
			- adev->kfd.vram_used_aligned[xcp_id];
	else
		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
			- adev->kfd.vram_used_aligned[xcp_id]
			- atomic64_read(&adev->vram_pin_size)