Merge tag 'drm-misc-next-2022-02-23' of git://anongit.freedesktop.org/drm/drm-misc into drm-next

drm-misc-next for v5.18:

UAPI Changes:

Cross-subsystem Changes:
- Split out panel-lvds and lvds dt bindings .
- Put yes/no on/off disabled/enabled strings in linux/string_helpers.h
  and use it in drivers and tomoyo.
- Clarify dma_fence_chain and dma_fence_array should never include eachother.
- Flatten chains in syncobj's.
- Don't double add in fbdev/defio when page is already enlisted.
- Don't sort deferred-I/O pages by default in fbdev.

Core Changes:
- Fix missing pm_runtime_put_sync in bridge.
- Set modifier support to only linear fb modifier if drivers don't
  advertise support.
- As a result, we remove allow_fb_modifiers.
- Add missing clear for EDID Deep Color Modes in drm_reset_display_info.
- Assorted documentation updates.
- Warn once in drm_clflush if there is no arch support.
- Add missing select for dp helper in drm_panel_edp.
- Assorted small fixes.
- Improve fb-helper's clipping handling.
- Don't dump shmem mmaps in a core dump.
- Add accounting to ttm resource manager, and use it in amdgpu.
- Allow querying the detected eDP panel through debugfs.
- Add helpers for xrgb8888 to 8 and 1 bits gray.
- Improve drm's buddy allocator.
- Add selftests for the buddy allocator.

Driver Changes:
- Add support for nomodeset to a lot of drm drivers.
- Use drm_module_*_driver in a lot of drm drivers.
- Assorted small fixes to bridge/lt9611, v3d, vc4, vmwgfx, mxsfb, nouveau,
  bridge/dw-hdmi, panfrost, lima, ingenic, sprd, bridge/anx7625, ti-sn65dsi86.
- Add bridge/it6505.
- Create DP and DVI-I connectors in ast.
- Assorted nouveau backlight fixes.
- Rework amdgpu reset handling.
- Add dt bindings for ingenic,jz4780-dw-hdmi.
- Support reading edid through aux channel in ingenic.
- Add a drm driver for Solomon SSD130x OLED displays.
- Add simple support for sharp LQ140M1JW46.
- Add more panels to nt35560.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/686ec871-e77f-c230-22e5-9e3bb80f064a@linux.intel.com
This commit is contained in:
Dave Airlie
2022-02-25 05:30:17 +10:00
202 changed files with 8584 additions and 1856 deletions

View File

@@ -426,10 +426,10 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* the lock.
*/
if (in_task()) {
if (down_read_trylock(&adev->reset_sem))
up_read(&adev->reset_sem);
if (down_read_trylock(&adev->reset_domain->sem))
up_read(&adev->reset_domain->sem);
else
lockdep_assert_held(&adev->reset_sem);
lockdep_assert_held(&adev->reset_domain->sem);
}
#endif
return false;
@@ -455,9 +455,9 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_sem)) {
down_read_trylock(&adev->reset_domain->sem)) {
ret = amdgpu_kiq_rreg(adev, reg);
up_read(&adev->reset_sem);
up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -540,9 +540,9 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_sem)) {
down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_kiq_wreg(adev, reg, v);
up_read(&adev->reset_sem);
up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -2331,6 +2331,49 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
return r;
}
static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
{
long timeout;
int r, i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
/* No need to setup the GPU scheduler for rings that don't need it */
if (!ring || ring->no_scheduler)
continue;
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
timeout = adev->gfx_timeout;
break;
case AMDGPU_RING_TYPE_COMPUTE:
timeout = adev->compute_timeout;
break;
case AMDGPU_RING_TYPE_SDMA:
timeout = adev->sdma_timeout;
break;
default:
timeout = adev->video_timeout;
break;
}
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
ring->num_hw_submission, amdgpu_job_hang_limit,
timeout, adev->reset_domain->wq,
ring->sched_score, ring->name,
adev->dev);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
return r;
}
}
return 0;
}
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2442,8 +2485,28 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_add_device(adev);
/**
* In case of XGMI grab extra reference for reset domain for this device
*/
if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (amdgpu_xgmi_add_device(adev) == 0) {
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;
goto init_failed;
}
/* Drop the early temporary reset domain we created for device */
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = hive->reset_domain;
}
}
r = amdgpu_device_init_schedulers(adev);
if (r)
goto init_failed;
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
@@ -3543,8 +3606,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
atomic_set(&adev->in_gpu_reset, 0);
init_rwsem(&adev->reset_sem);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
mutex_init(&adev->pm.stable_pstate_ctx_lock);
@@ -3630,6 +3691,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
/*
* Reset domain needs to be present early, before XGMI hive discovered
* (if any) and intitialized to use reset sem and in_gpu reset flag
* early on during init.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
if (!adev->reset_domain)
return -ENOMEM;
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
@@ -4006,6 +4076,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if (adev->mman.discovery_bin)
amdgpu_discovery_fini(adev);
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = NULL;
kfree(adev->pci_state);
}
@@ -4817,17 +4890,8 @@ end:
return r;
}
static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive)
static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
return false;
if (hive) {
down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
} else {
down_write(&adev->reset_sem);
}
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
@@ -4840,56 +4904,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
return true;
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
atomic_set(&adev->in_gpu_reset, 0);
up_write(&adev->reset_sem);
}
/*
* to lockup a list of amdgpu devices in a hive safely, if not a hive
* with multiple nodes, it will be similar as amdgpu_device_lock_adev.
*
* unlock won't require roll back.
*/
static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
{
struct amdgpu_device *tmp_adev = NULL;
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
if (!hive) {
dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
return -ENODEV;
}
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
if (!amdgpu_device_lock_adev(tmp_adev, hive))
goto roll_back;
}
} else if (!amdgpu_device_lock_adev(adev, hive))
return -EAGAIN;
return 0;
roll_back:
if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
/*
* if the lockup iteration break in the middle of a hive,
* it may means there may has a race issue,
* or a hive device locked up independently.
* we may be in trouble and may not, so will try to roll back
* the lock and give out a warnning.
*/
dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
amdgpu_device_unlock_adev(tmp_adev);
}
}
return -EAGAIN;
}
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -5023,7 +5043,7 @@ retry:
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
* amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
*
* @adev: amdgpu_device pointer
* @job: which job trigger hang
@@ -5033,7 +5053,7 @@ retry:
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct list_head device_list, *device_list_handle = NULL;
@@ -5067,26 +5087,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset");
/*
* Here we trylock to avoid chain of resets executing from
* either trigger by jobs on different adevs in XGMI hive or jobs on
* different schedulers for same device while this TO handler is running.
* We always reset all schedulers for device and all devices for XGMI
* hive so that should take care of them too.
*/
if (!amdgpu_sriov_vf(adev))
hive = amdgpu_get_xgmi_hive(adev);
if (hive) {
if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
job ? job->base.id : -1, hive->hive_id);
amdgpu_put_xgmi_hive(hive);
if (job && job->vm)
drm_sched_increase_karma(&job->base);
return 0;
}
if (hive)
mutex_lock(&hive->hive_lock);
}
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
@@ -5094,22 +5098,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
reset_context.hive = hive;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
/*
* lock the device before we try to operate the linked list
* if didn't get the device lock, don't touch the linked list since
* others may iterating it.
*/
r = amdgpu_device_lock_hive_adev(adev, hive);
if (r) {
dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
job ? job->base.id : -1);
/* even we skipped this reset, still need to set the job to guilty */
if (job && job->vm)
drm_sched_increase_karma(&job->base);
goto skip_recovery;
}
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
@@ -5127,8 +5115,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
/* We need to lock reset domain only once both for XGMI and single device */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
amdgpu_device_set_mp1_state(tmp_adev);
/*
* Try to put the audio codec into suspend state
* before gpu reset started.
@@ -5280,21 +5276,55 @@ skip_sched_resume:
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
amdgpu_device_unset_mp1_state(tmp_adev);
}
skip_recovery:
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
if (hive) {
atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
if (r && r != -EAGAIN)
if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
return r;
}
struct amdgpu_recover_work_struct {
struct work_struct base;
struct amdgpu_device *adev;
struct amdgpu_job *job;
int ret;
};
static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
{
struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
}
/*
* Serialize gpu recover into reset domain single threaded wq
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
return -EAGAIN;
flush_work(&work.base);
return work.ret;
}
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@@ -5482,20 +5512,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
return 0;
}
static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
cancel_delayed_work_sync(&ring->sched.work_tdr);
}
}
/**
* amdgpu_pci_error_detected - Called when a PCI error is detected.
* @pdev: PCI device struct
@@ -5526,14 +5542,11 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
/* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
/*
* Cancel and wait for all TDRs in progress if failing to
* set adev->in_gpu_reset in amdgpu_device_lock_adev
*
* Locking adev->reset_sem will prevent any external access
* Locking adev->reset_domain->sem will prevent any external access
* to GPU during PCI error recovery
*/
while (!amdgpu_device_lock_adev(adev, NULL))
amdgpu_cancel_all_tdr(adev);
amdgpu_device_lock_reset_domain(adev->reset_domain);
amdgpu_device_set_mp1_state(adev);
/*
* Block any work scheduling as we do for regular GPU reset
@@ -5640,7 +5653,8 @@ out:
DRM_INFO("PCIe error recovery succeeded\n");
} else {
DRM_ERROR("PCIe error recovery failed, err:%d", r);
amdgpu_device_unlock_adev(adev);
amdgpu_device_unset_mp1_state(adev);
amdgpu_device_unlock_reset_domain(adev->reset_domain);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -5677,7 +5691,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
drm_sched_start(&ring->sched, true);
}
amdgpu_device_unlock_adev(adev);
amdgpu_device_unset_mp1_state(adev);
amdgpu_device_unlock_reset_domain(adev->reset_domain);
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -5754,6 +5769,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
amdgpu_asic_invalidate_hdp(adev, ring);
}
int amdgpu_in_reset(struct amdgpu_device *adev)
{
return atomic_read(&adev->reset_domain->in_gpu_reset);
}
/**
* amdgpu_device_halt() - bring hardware to some kind of halt state
*