Commit 01570b44 authored by Matthew Auld's avatar Matthew Auld
Browse files

drm/xe/bmg: implement Wa_16023588340



This involves enabling l2 caching of host side memory access to VRAM
through the CPU BAR. The main fallout here is with display since VRAM
writes from CPU can now be cached in GPU l2, and display is never
coherent with caches, so needs various manual flushing.  In the case of
fbc we disable it due to complications in getting this to work
correctly (in a later patch).

Signed-off-by: default avatarMatthew Auld <matthew.auld@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: default avatarJonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703124338.208220-3-matthew.auld@intel.com
parent 3078d9c8
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \

uses_generated_oob := \
	$(obj)/xe_ggtt.o \
	$(obj)/xe_device.o \
	$(obj)/xe_gsc.o \
	$(obj)/xe_gt.o \
	$(obj)/xe_guc.o \
	$(obj)/xe_guc_ads.o \
	$(obj)/xe_guc_pc.o \
	$(obj)/xe_migrate.o \
	$(obj)/xe_pat.o \
	$(obj)/xe_ring_ops.o \
	$(obj)/xe_vm.o \
	$(obj)/xe_wa.o \
+8 −0
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_gt.h"

u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)

void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
	struct xe_device *xe = dsb_buf->vma->bo->tile->xe;

	iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
	xe_device_l2_flush(xe);
}

u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)

void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
	struct xe_device *xe = dsb_buf->vma->bo->tile->xe;

	WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));

	iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
	xe_device_l2_flush(xe);
}

bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+3 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
	if (ret)
		goto err_unpin;

	/* Ensure DPT writes are flushed */
	xe_device_l2_flush(xe);
	return vma;

err_unpin:
+8 −0
Original line number Diff line number Diff line
@@ -80,6 +80,9 @@
#define   LE_CACHEABILITY_MASK			REG_GENMASK(1, 0)
#define   LE_CACHEABILITY(value)		REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)

#define XE2_GAMREQSTRM_CTRL			XE_REG(0x4194)
#define   CG_DIS_CNTLBUS			REG_BIT(6)

#define CCS_AUX_INV				XE_REG(0x4208)

#define VD0_AUX_INV				XE_REG(0x4218)
@@ -374,6 +377,11 @@

#define XEHPC_L3CLOS_MASK(i)			XE_REG_MCR(0xb194 + (i) * 8)

#define XE2_GLOBAL_INVAL			XE_REG(0xb404)

#define SCRATCH1LPFC				XE_REG(0xb474)
#define   EN_L3_RW_CCS_CACHE_FLUSH		REG_BIT(0)

#define XE2LPM_L3SQCREG5			XE_REG_MCR(0xb658)

#define XE2_TDF_CTRL				XE_REG(0xb418)
+30 −0
Original line number Diff line number Diff line
@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"

#include <generated/xe_wa_oob.h>

static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
@@ -788,6 +791,11 @@ void xe_device_td_flush(struct xe_device *xe)
	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
		return;

	if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
		xe_device_l2_flush(xe);
		return;
	}

	for_each_gt(gt, xe, id) {
		if (xe_gt_is_media_type(gt))
			continue;
@@ -811,6 +819,28 @@ void xe_device_td_flush(struct xe_device *xe)
	}
}

void xe_device_l2_flush(struct xe_device *xe)
{
	struct xe_gt *gt;
	int err;

	gt = xe_root_mmio_gt(xe);

	if (!XE_WA(gt, 16023588340))
		return;

	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
	if (err)
		return;

	xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);

	if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
		xe_gt_err_once(gt, "Global invalidation timeout\n");

	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}

u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
	return xe_device_has_flat_ccs(xe) ?
Loading