Commit 9ccde17d authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.7-2023-11-03' of https://gitlab.freedesktop.org/agd5f/linux into drm-next



amd-drm-next-6.7-2023-11-03:

amdgpu:
- Fix RAS support check
- RAS fixes
- MES fixes
- SMU13 fixes
- Contiguous memory allocation fix
- BACO fixes
- GPU reset fixes
- Min power limit fixes
- GFX11 fixes
- USB4/TB hotplug fixes
- ARM regression fix
- GFX9.4.3 fixes
- KASAN/KCSAN stack size check fixes
- SR-IOV fixes
- SMU14 fixes
- PSP13 fixes
- Display blend fixes
- Flexible array size fixes

amdkfd:
- GPUVM fix

radeon:
- Flexible array size fixes

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231103173203.4912-1-alexander.deucher@amd.com
parents f056cb96 6d5e0032
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -363,9 +363,6 @@ struct amdgpu_ip_block_version {
	const struct amd_ip_funcs *funcs;
};

#define HW_REV(_Major, _Minor, _Rev) \
	((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev)))

struct amdgpu_ip_block {
	struct amdgpu_ip_block_status status;
	const struct amdgpu_ip_block_version *version;
+48 −31
Original line number Diff line number Diff line
@@ -425,6 +425,32 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
	return ret;
}

static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
					       uint32_t domain,
					       struct dma_fence *fence)
{
	int ret = amdgpu_bo_reserve(bo, false);

	if (ret)
		return ret;

	ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
	if (ret)
		goto unreserve_out;

	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
	if (ret)
		goto unreserve_out;

	dma_resv_add_fence(bo->tbo.base.resv, fence,
			   DMA_RESV_USAGE_BOOKKEEP);

unreserve_out:
	amdgpu_bo_unreserve(bo);

	return ret;
}

static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
{
	return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
@@ -1784,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		}
		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
	} else {
		mutex_lock(&avm->process_info->lock);
		if (avm->process_info->eviction_fence &&
		    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
			ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
				&avm->process_info->eviction_fence->base);
		mutex_unlock(&avm->process_info->lock);
		if (ret)
			goto err_validate_bo;
	}

	if (offset)
@@ -1793,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

allocate_init_user_pages_failed:
err_pin_bo:
err_validate_bo:
	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
err_node_allow:
@@ -1866,10 +1902,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
	if (unlikely(ret))
		return ret;

	/* The eviction fence should be removed by the last unmap.
	 * TODO: Log an error condition if the bo still has the eviction fence
	 * attached
	 */
	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
					process_info->eviction_fence);
	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
@@ -1998,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
	if (unlikely(ret))
		goto out_unreserve;

	if (mem->mapped_to_gpu_memory == 0 &&
	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
		/* Validate BO only once. The eviction fence gets added to BO
		 * the first time it is mapped. Validate will wait for all
		 * background evictions to complete.
		 */
		ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
		if (ret) {
			pr_debug("Validate failed\n");
			goto out_unreserve;
		}
	}

	list_for_each_entry(entry, &mem->attachments, list) {
		if (entry->bo_va->base.vm != avm || entry->is_mapped)
			continue;
@@ -2037,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
			 mem->mapped_to_gpu_memory);
	}

	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
		dma_resv_add_fence(bo->tbo.base.resv,
				   &avm->process_info->eviction_fence->base,
				   DMA_RESV_USAGE_BOOKKEEP);
	ret = unreserve_bo_and_vms(&ctx, false, false);

	goto out;
@@ -2074,7 +2089,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
{
	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
	struct amdkfd_process_info *process_info = avm->process_info;
	unsigned long bo_size = mem->bo->tbo.base.size;
	struct kfd_mem_attachment *entry;
	struct bo_vm_reservation_context ctx;
@@ -2115,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
			 mem->mapped_to_gpu_memory);
	}

	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
	 * required.
	 */
	if (mem->mapped_to_gpu_memory == 0 &&
	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
	    !mem->bo->tbo.pin_count)
		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
						process_info->eviction_fence);

unreserve_out:
	unreserve_bo_and_vms(&ctx, false, false);
out:
@@ -2351,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
	amdgpu_sync_create(&(*mem)->sync);
	(*mem)->is_imported = true;

	mutex_lock(&avm->process_info->lock);
	if (avm->process_info->eviction_fence &&
	    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
		ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
				&avm->process_info->eviction_fence->base);
	mutex_unlock(&avm->process_info->lock);
	if (ret)
		goto err_remove_mem;

	return 0;

err_remove_mem:
	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
	drm_vma_node_revoke(&obj->vma_node, drm_priv);
err_free_mem:
	kfree(*mem);
err_put_obj:
+5 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "atom.h"

#include <linux/device.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
@@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
	if (adev->flags & AMD_IS_APU)
		return false;

	/* ATRM is for on-platform devices only */
	if (dev_is_removable(&adev->pdev->dev))
		return false;

	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
		dhandle = ACPI_HANDLE(&pdev->dev);
		if (!dhandle)
+6 −1
Original line number Diff line number Diff line
@@ -1116,6 +1116,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
			return r;
	}

	/* FIXME: In theory this loop shouldn't be needed any more when
	 * amdgpu_vm_handle_moved handles all moved BOs that are reserved
	 * with p->ticket. But removing it caused test regressions, so I'm
	 * leaving it here for now.
	 */
	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
		bo_va = e->bo_va;
		if (bo_va == NULL)
@@ -1130,7 +1135,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
			return r;
	}

	r = amdgpu_vm_handle_moved(adev, vm);
	r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
	if (r)
		return r;

+23 −15
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include <linux/device.h>
#include <linux/vgaarb.h>
#include <linux/vga_switcheroo.h>
#include <linux/efi.h>
@@ -1073,6 +1074,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
		amdgpu_psp_wait_for_bootloader(adev);
		ret = amdgpu_atomfirmware_asic_init(adev, true);
		/* TODO: check the return val and stop device initialization if boot fails */
		amdgpu_psp_query_boot_status(adev);
		return ret;
	} else {
		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -2223,7 +2226,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 */
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
	struct drm_device *dev = adev_to_drm(adev);
	struct pci_dev *parent;
	int i, r;
	bool total;
@@ -2294,7 +2296,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
	    (amdgpu_is_atpx_hybrid() ||
	     amdgpu_has_atpx_dgpu_power_cntl()) &&
	    ((adev->flags & AMD_IS_APU) == 0) &&
	    !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
	    !dev_is_removable(&adev->pdev->dev))
		adev->flags |= AMD_IS_PX;

	if (!(adev->flags & AMD_IS_APU)) {
@@ -3962,6 +3964,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
				}
			}
		} else {
			switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
			case IP_VERSION(13, 0, 0):
			case IP_VERSION(13, 0, 7):
			case IP_VERSION(13, 0, 10):
				r = psp_gpu_reset(adev);
				break;
			default:
				tmp = amdgpu_reset_method;
				/* It should do a default reset when loading or reloading the driver,
				 * regardless of the module parameter reset_method.
@@ -3969,6 +3978,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
				r = amdgpu_asic_reset(adev);
				amdgpu_reset_method = tmp;
				break;
			}

			if (r) {
				dev_err(adev->dev, "asic reset on init failed\n");
				goto failed;
@@ -4132,7 +4144,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,

	px = amdgpu_device_supports_px(ddev);

	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
	if (px || (!dev_is_removable(&adev->pdev->dev) &&
				apple_gmux_detect(NULL, NULL)))
		vga_switcheroo_register_client(adev->pdev,
					       &amdgpu_switcheroo_ops, px);
@@ -4282,7 +4294,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

	px = amdgpu_device_supports_px(adev_to_drm(adev));

	if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
	if (px || (!dev_is_removable(&adev->pdev->dev) &&
				apple_gmux_detect(NULL, NULL)))
		vga_switcheroo_unregister_client(adev->pdev);

@@ -5566,10 +5578,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
			drm_sched_start(&ring->sched, true);
		}

		if (adev->enable_mes &&
		    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
			amdgpu_mes_self_test(tmp_adev);

		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));

Loading