Merge tag 'drm-intel-gt-next-2022-02-17' of git://anongit.freedesktop.org/drm/drm-intel into drm-intel-next

UAPI Changes:

- Weak parallel submission support for execlists

  Minimal implementation of the parallel submission support for
  execlists backend that was previously only implemented for GuC.
  Support one sibling non-virtual engine.

Core Changes:

- Two backmerges of drm/drm-next for header file renames/changes and
  i915_regs reorganization

Driver Changes:

- Add new DG2 subplatform: DG2-G12 (Matt R)
- Add new DG2 workarounds (Matt R, Ram, Bruce)
- Handle pre-programmed WOPCM registers for DG2+ (Daniele)
- Update guc shim control programming on XeHP SDV+ (Daniele)
- Add RPL-S C0/D0 stepping information (Anusha)
- Improve GuC ADS initialization to work on ARM64 on dGFX (Lucas)

- Fix KMD and GuC race on accessing PMU busyness (Umesh)
- Use PM timestamp instead of RING TIMESTAMP for reference in PMU with GuC (Umesh)
- Report error on invalid reset notification from GuC (John)
- Avoid WARN splat by holding RPM wakelock during PXP unbind (Juston)
- Fixes to parallel submission implementation (Matt B.)
- Improve GuC loading status check/error reports (John)
- Tweak TTM LRU priority hint selection (Matt A.)
- Align the plane_vma to min_page_size of stolen mem (Ram)

- Introduce vma resources and implement async unbinding (Thomas)
- Use struct vma_resource instead of struct vma_snapshot (Thomas)
- Return some TTM accel move errors instead of trying memcpy move (Thomas)
- Fix a race between vma / object destruction and unbinding (Thomas)
- Remove short-term pins from execbuf (Maarten)
- Update to GuC version 69.0.3 (John, Michal Wa.)
- Improvements to GT reset paths in GuC backend (Matt B.)
- Use shrinker_release_pages instead of writeback in shmem object hooks (Matt A., Tvrtko)
- Use trylock instead of blocking lock when freeing GEM objects (Maarten)
- Allocate intel_engine_coredump_alloc with ALLOW_FAIL (Matt B.)
- Fixes to object unmapping and purging (Matt A)
- Check for wedged device in GuC backend (John)
- Avoid lockdep splat by locking dpt_obj around set_cache_level (Maarten)
- Allow dead vm to unbind vma's without lock (Maarten)
- s/engine->i915/i915/ for DG2 engine workarounds (Matt R)

- Use to_gt() helper for GGTT accesses (Michal Wi.)
- Selftest improvements (Matt B., Thomas, Ram)
- Coding style and compiler warning fixes (Matt B., Jasmine, Andi, Colin, Gustavo, Dan)

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Yg4i2aCZvvee5Eai@jlahtine-mobl.ger.corp.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Fixed conflicts while applying, using the fixups/drm-intel-gt-next.patch
from drm-rerere's 1f2b1742abdd ("2022y-02m-23d-16h-07m-57s UTC: drm-tip
rerere cache update")]
This commit is contained in:
Rodrigo Vivi
2022-02-23 14:19:43 -05:00
334 changed files with 8923 additions and 4047 deletions

View File

@@ -32,7 +32,6 @@
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -444,7 +443,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
else
pin_flags = entry->offset & PIN_OFFSET_MASK;
pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
@@ -462,17 +461,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
entry->pad_to_size,
entry->alignment,
eb_pin_flags(entry, ev->flags) |
PIN_USER | PIN_NOEVICT);
PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
if (unlikely(err))
return err;
}
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
if (unlikely(err))
return err;
}
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -488,13 +485,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
static inline void
eb_unreserve_vma(struct eb_vma *ev)
{
if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
return;
if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
__i915_vma_unpin_fence(ev->vma);
__i915_vma_unpin(ev->vma);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
}
@@ -676,10 +669,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
if (unlikely(err)) {
i915_vma_unpin(vma);
if (unlikely(err))
return err;
}
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -691,85 +682,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
return 0;
}
static int eb_reserve(struct i915_execbuffer *eb)
static bool eb_unbind(struct i915_execbuffer *eb, bool force)
{
const unsigned int count = eb->buffer_count;
unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
unsigned int i;
struct list_head last;
bool unpinned = false;
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
INIT_LIST_HEAD(&last);
for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];
unsigned int flags = ev->flags;
if (!force && flags & EXEC_OBJECT_PINNED &&
flags & __EXEC_OBJECT_HAS_PIN)
continue;
unpinned = true;
eb_unreserve_vma(ev);
if (flags & EXEC_OBJECT_PINNED)
/* Pinned must have their slot */
list_add(&ev->bind_link, &eb->unbound);
else if (flags & __EXEC_OBJECT_NEEDS_MAP)
/* Map require the lowest 256MiB (aperture) */
list_add_tail(&ev->bind_link, &eb->unbound);
else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
/* Prioritise 4GiB region for restricted bo */
list_add(&ev->bind_link, &last);
else
list_add_tail(&ev->bind_link, &last);
}
list_splice_tail(&last, &eb->unbound);
return unpinned;
}
static int eb_reserve(struct i915_execbuffer *eb)
{
struct eb_vma *ev;
unsigned int i, pass;
unsigned int pass;
int err = 0;
bool unpinned;
/*
* Attempt to pin all of the buffers into the GTT.
* This is done in 3 phases:
* This is done in 2 phases:
*
* 1a. Unbind all objects that do not match the GTT constraints for
* the execbuffer (fenceable, mappable, alignment etc).
* 1b. Increment pin count for already bound objects.
* 2. Bind new objects.
* 3. Decrement pin count.
* 1. Unbind all objects that do not match the GTT constraints for
* the execbuffer (fenceable, mappable, alignment etc).
* 2. Bind new objects.
*
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*
* Defragmenting is skipped if all objects are pinned at a fixed location.
*/
pass = 0;
do {
for (pass = 0; pass <= 2; pass++) {
int pin_flags = PIN_USER | PIN_VALIDATE;
if (pass == 0)
pin_flags |= PIN_NONBLOCK;
if (pass >= 1)
unpinned = eb_unbind(eb, pass == 2);
if (pass == 2) {
err = mutex_lock_interruptible(&eb->context->vm->mutex);
if (!err) {
err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
mutex_unlock(&eb->context->vm->mutex);
}
if (err)
return err;
}
list_for_each_entry(ev, &eb->unbound, bind_link) {
err = eb_reserve_vma(eb, ev, pin_flags);
if (err)
break;
}
if (err != -ENOSPC)
return err;
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
INIT_LIST_HEAD(&last);
for (i = 0; i < count; i++) {
unsigned int flags;
ev = &eb->vma[i];
flags = ev->flags;
if (flags & EXEC_OBJECT_PINNED &&
flags & __EXEC_OBJECT_HAS_PIN)
continue;
eb_unreserve_vma(ev);
if (flags & EXEC_OBJECT_PINNED)
/* Pinned must have their slot */
list_add(&ev->bind_link, &eb->unbound);
else if (flags & __EXEC_OBJECT_NEEDS_MAP)
/* Map require the lowest 256MiB (aperture) */
list_add_tail(&ev->bind_link, &eb->unbound);
else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
/* Prioritise 4GiB region for restricted bo */
list_add(&ev->bind_link, &last);
else
list_add_tail(&ev->bind_link, &last);
}
list_splice_tail(&last, &eb->unbound);
switch (pass++) {
case 0:
break;
}
case 1:
/* Too fragmented, unbind everything and retry */
mutex_lock(&eb->context->vm->mutex);
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
return err;
break;
default:
return -ENOSPC;
}
pin_flags = PIN_USER;
} while (1);
return err;
}
static int eb_select_context(struct i915_execbuffer *eb)
@@ -1098,7 +1099,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
{
struct drm_i915_private *i915 =
container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
return &i915->ggtt;
return to_gt(i915)->ggtt;
}
static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1217,10 +1218,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
return vaddr;
}
static void *reloc_iomap(struct drm_i915_gem_object *obj,
static void *reloc_iomap(struct i915_vma *batch,
struct i915_execbuffer *eb,
unsigned long page)
{
struct drm_i915_gem_object *obj = batch->obj;
struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
@@ -1230,7 +1232,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
struct i915_vma *vma;
struct i915_vma *vma = ERR_PTR(-ENODEV);
int err;
if (i915_gem_object_is_tiled(obj))
@@ -1243,10 +1245,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err)
return ERR_PTR(err);
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ |
PIN_NOEVICT);
/*
* i915_gem_object_ggtt_pin_ww may attempt to remove the batch
* VMA from the object list because we no longer pin.
*
* Only attempt to pin the batch buffer to ggtt if the current batch
* is not inside ggtt, or the batch buffer is not misplaced.
*/
if (!i915_is_ggtt(batch->vm)) {
vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ |
PIN_NOEVICT);
} else if (i915_vma_is_map_and_fenceable(batch)) {
__i915_vma_pin(batch);
vma = batch;
}
if (vma == ERR_PTR(-EDEADLK))
return vma;
@@ -1284,7 +1299,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
return vaddr;
}
static void *reloc_vaddr(struct drm_i915_gem_object *obj,
static void *reloc_vaddr(struct i915_vma *vma,
struct i915_execbuffer *eb,
unsigned long page)
{
@@ -1296,9 +1311,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
vaddr = reloc_iomap(obj, eb, page);
vaddr = reloc_iomap(vma, eb, page);
if (!vaddr)
vaddr = reloc_kmap(obj, cache, page);
vaddr = reloc_kmap(vma->obj, cache, page);
}
return vaddr;
@@ -1339,7 +1354,7 @@ relocate_entry(struct i915_vma *vma,
void *vaddr;
repeat:
vaddr = reloc_vaddr(vma->obj, eb,
vaddr = reloc_vaddr(vma, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1414,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
PIN_GLOBAL, NULL);
PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
@@ -1944,7 +1959,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count, j;
struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1954,11 +1968,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;
vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
if (!vsnap)
continue;
i915_vma_snapshot_init(vsnap, vma, "user");
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
@@ -1967,10 +1976,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
continue;
capture->next = eb->capture_lists[j];
capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
capture->vma_res = i915_vma_resource_get(vma->resource);
eb->capture_lists[j] = capture;
}
i915_vma_snapshot_put(vsnap);
}
}
@@ -2201,7 +2209,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return vma;
err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
if (err)
return ERR_PTR(err);
@@ -2215,7 +2223,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (eb->batch_flags & I915_DISPATCH_SECURE)
return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
return NULL;
}
@@ -2266,13 +2274,12 @@ static int eb_parse(struct i915_execbuffer *eb)
err = i915_gem_object_lock(pool->obj, &eb->ww);
if (err)
goto err;
return err;
shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
goto err;
}
if (IS_ERR(shadow))
return PTR_ERR(shadow);
intel_gt_buffer_pool_mark_used(pool);
i915_gem_object_set_readonly(shadow->obj);
shadow->private = pool;
@@ -2284,25 +2291,21 @@ static int eb_parse(struct i915_execbuffer *eb)
shadow = shadow_batch_pin(eb, pool->obj,
&eb->gt->ggtt->vm,
PIN_GLOBAL);
if (IS_ERR(shadow)) {
err = PTR_ERR(shadow);
shadow = trampoline;
goto err_shadow;
}
if (IS_ERR(shadow))
return PTR_ERR(shadow);
shadow->private = pool;
eb->batch_flags |= I915_DISPATCH_SECURE;
}
batch = eb_dispatch_secure(eb, shadow);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_trampoline;
}
if (IS_ERR(batch))
return PTR_ERR(batch);
err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
if (err)
goto err_trampoline;
return err;
err = intel_engine_cmd_parser(eb->context->engine,
eb->batches[0]->vma,
@@ -2310,7 +2313,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_len[0],
shadow, trampoline);
if (err)
goto err_unpin_batch;
return err;
eb->batches[0] = &eb->vma[eb->buffer_count++];
eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2329,17 +2332,6 @@ secure_batch:
eb->batches[0]->vma = i915_vma_get(batch);
}
return 0;
err_unpin_batch:
if (batch)
i915_vma_unpin(batch);
err_trampoline:
if (trampoline)
i915_vma_unpin(trampoline);
err_shadow:
i915_vma_unpin(shadow);
err:
return err;
}
static int eb_request_submit(struct i915_execbuffer *eb,
@@ -2508,9 +2500,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
timeout) < 0) {
i915_request_put(rq);
tl = intel_context_timeline_lock(ce);
/*
* Error path, cannot use intel_context_timeline_lock as
* that is user interruptable and this clean up step
* must be done.
*/
mutex_lock(&ce->timeline->mutex);
intel_context_exit(ce);
intel_context_timeline_unlock(tl);
mutex_unlock(&ce->timeline->mutex);
if (nonblock)
return -EWOULDBLOCK;
@@ -3273,9 +3270,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
* _onstack interface.
*/
if (eb->batches[i]->vma)
i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
eb->batches[i]->vma,
"batch");
eb->requests[i]->batch_res =
i915_vma_resource_get(eb->batches[i]->vma->resource);
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3460,8 +3456,6 @@ err_request:
err_vma:
eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
i915_gem_ww_ctx_fini(&eb.ww);