Merge tag 'drm-misc-next-2025-01-06' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next

drm-misc-next for 6.14:

UAPI Changes:
- Clarify drm memory stats documentation

Cross-subsystem Changes:

Core Changes:
 - sched: Documentation fixes,

Driver Changes:
 - amdgpu: Track BO memory stats at runtime
 - amdxdna: Various fixes
 - hisilicon: New HIBMC driver
 - bridges:
   - Provide default implementation of atomic_check for HDMI bridges
   - it605: HDCP improvements, MCCS Support

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maxime Ripard <mripard@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250106-augmented-kakapo-of-action-0cf000@houat
This commit is contained in:
Dave Airlie
2025-01-09 15:48:33 +10:00
79 changed files with 2516 additions and 606 deletions

View File

@@ -36,6 +36,7 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
@@ -310,6 +311,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_update_shared - helper to update shared memory stat
* @base: base structure for tracking BO usage in a VM
*
* Takes the vm status_lock and updates the shared memory stat. If the basic
* stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
* as well.
*/
static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
{
struct amdgpu_vm *vm = base->vm;
struct amdgpu_bo *bo = base->bo;
uint64_t size = amdgpu_bo_size(bo);
uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
bool shared;
spin_lock(&vm->status_lock);
shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
if (base->shared != shared) {
base->shared = shared;
if (shared) {
vm->stats[bo_memtype].drm.shared += size;
vm->stats[bo_memtype].drm.private -= size;
} else {
vm->stats[bo_memtype].drm.shared -= size;
vm->stats[bo_memtype].drm.private += size;
}
}
spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
* @bo: amdgpu buffer object
*
* Update the per VM stats for all the vm if needed from private to shared or
* vice versa.
*/
void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
{
struct amdgpu_vm_bo_base *base;
for (base = bo->vm_bo; base; base = base->next)
amdgpu_vm_update_shared(base);
}
/**
* amdgpu_vm_update_stats_locked - helper to update normal memory stat
* @base: base structure for tracking BO usage in a VM
* @res: the ttm_resource to use for the purpose of accounting, may or may not
* be bo->tbo.resource
* @sign: if we should add (+1) or subtract (-1) from the stat
*
* Caller need to have the vm status_lock held. Useful for when multiple update
* need to happen at the same time.
*/
static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
struct ttm_resource *res, int sign)
{
struct amdgpu_vm *vm = base->vm;
struct amdgpu_bo *bo = base->bo;
int64_t size = sign * amdgpu_bo_size(bo);
uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
/* For drm-total- and drm-shared-, BO are accounted by their preferred
* placement, see also amdgpu_bo_mem_stats_placement.
*/
if (base->shared)
vm->stats[bo_memtype].drm.shared += size;
else
vm->stats[bo_memtype].drm.private += size;
if (res && res->mem_type < __AMDGPU_PL_NUM) {
uint32_t res_memtype = res->mem_type;
vm->stats[res_memtype].drm.resident += size;
/* BO only count as purgeable if it is resident,
* since otherwise there's nothing to purge.
*/
if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
vm->stats[res_memtype].drm.purgeable += size;
if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
vm->stats[bo_memtype].evicted += size;
}
}
/**
* amdgpu_vm_update_stats - helper to update normal memory stat
* @base: base structure for tracking BO usage in a VM
* @res: the ttm_resource to use for the purpose of accounting, may or may not
* be bo->tbo.resource
* @sign: if we should add (+1) or subtract (-1) from the stat
*
* Updates the basic memory stat when bo is added/deleted/moved.
*/
void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
struct ttm_resource *res, int sign)
{
struct amdgpu_vm *vm = base->vm;
spin_lock(&vm->status_lock);
amdgpu_vm_update_stats_locked(base, res, sign);
spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
@@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
spin_lock(&vm->status_lock);
base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
spin_unlock(&vm->status_lock);
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
@@ -1083,53 +1194,11 @@ error_free:
return r;
}
static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
struct amdgpu_mem_stats *stats,
unsigned int size)
{
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo *bo = bo_va->base.bo;
if (!bo)
return;
/*
* For now ignore BOs which are currently locked and potentially
* changing their location.
*/
if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
!dma_resv_trylock(bo->tbo.base.resv))
return;
amdgpu_bo_get_memory(bo, stats, size);
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
dma_resv_unlock(bo->tbo.base.resv);
}
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
struct amdgpu_mem_stats *stats,
unsigned int size)
struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
struct amdgpu_bo_va *bo_va, *tmp;
spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
amdgpu_vm_bo_get_memory(bo_va, stats, size);
memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
spin_unlock(&vm->status_lock);
}
@@ -2075,6 +2144,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (*base != &bo_va->base)
continue;
amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
@@ -2143,14 +2213,12 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
* @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted)
void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
@@ -2175,6 +2243,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
}
/**
* amdgpu_vm_bo_move - handle BO move
*
* @bo: amdgpu buffer object
* @new_mem: the new placement of the BO move
* @evicted: is the BO evicted
*
* Update the memory stats for the new placement and mark @bo as invalid.
*/
void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
spin_lock(&vm->status_lock);
amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
spin_unlock(&vm->status_lock);
}
amdgpu_vm_bo_invalidate(bo, evicted);
}
/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
@@ -2594,6 +2688,16 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->is_compute_context = false;
}
static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
{
for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
vm->stats[i].evicted == 0))
return false;
}
return true;
}
/**
* amdgpu_vm_fini - tear down a vm instance
*
@@ -2617,7 +2721,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
amdgpu_vm_put_task_info(vm->task_info);
amdgpu_vm_set_pasid(adev, vm, 0);
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
@@ -2666,6 +2769,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
if (!amdgpu_vm_stats_is_zero(vm)) {
struct amdgpu_task_info *ti = vm->task_info;
dev_warn(adev->dev,
"VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
ti->process_name, ti->pid, ti->task_name, ti->tgid);
}
amdgpu_vm_put_task_info(vm->task_info);
}
/**