drm/amdgpu: Add support for CPERs on virtualization

Add support for CPERs on VFs.

VFs do not receive PMFW messages directly; as such, they need to
query them from the host. To avoid hitting host event guard,
CPER queries need to be rate limited. CPER queries share the same
RAS telemetry buffer as error count query, so a mutex protecting
the shared buffer was added as well.

For readability, the amdgpu_detect_virtualization was refactored
into multiple individual functions.

Signed-off-by: Tony Yi <Tony.Yi@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Tony Yi
2025-02-26 17:03:10 -05:00
committed by Alex Deucher
parent ca17c8e149
commit a91d91b600
5 changed files with 195 additions and 13 deletions

View File

@@ -578,12 +578,32 @@ out:
return result;
}
static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
if (*pos & 3 || size & 3)
return -EINVAL;
if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
amdgpu_virt_req_ras_cper_dump(ring->adev, false);
return amdgpu_debugfs_ring_read(f, buf, size, pos);
}
static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_virt_ring_read,
.llseek = default_llseek
};
static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
@@ -671,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_ring_fops,
ring->ring_size + 12);
if (amdgpu_sriov_vf(adev))
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_virt_ring_fops,
ring->ring_size + 12);
else
debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
&amdgpu_debugfs_ring_fops,
ring->ring_size + 12);
if (ring->mqd_obj) {
sprintf(name, "amdgpu_mqd_%s", ring->name);