Commit a9f905ae authored by Ashutosh Dixit's avatar Ashutosh Dixit
Browse files

drm/xe/oa/uapi: Initialize OA units



Initialize OA unit data struct's for each gt during device probe. Also
assign OA units for hardware engines.

v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh)
    Change mtl_oa_base to 0x13000 (Umesh)
v3: Switch to drmm_ functions and other cleanups (Michal)

Acked-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: default avatarJosé Roberto de Souza <jose.souza@intel.com>
Reviewed-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com
parent 67977882
Loading
Loading
Loading
Loading
+92 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2023 Intel Corporation
 */

#ifndef __XE_OA_REGS__
#define __XE_OA_REGS__

#define RPM_CONFIG1			XE_REG(0xd04)
#define   GT_NOA_ENABLE			REG_BIT(9)

#define EU_PERF_CNTL0			XE_REG(0xe458)
#define EU_PERF_CNTL4			XE_REG(0xe45c)
#define EU_PERF_CNTL1			XE_REG(0xe558)
#define EU_PERF_CNTL5			XE_REG(0xe55c)
#define EU_PERF_CNTL2			XE_REG(0xe658)
#define EU_PERF_CNTL6			XE_REG(0xe65c)
#define EU_PERF_CNTL3			XE_REG(0xe758)

#define OA_TLB_INV_CR			XE_REG(0xceec)

/* OAR unit */
#define OAR_OACONTROL			XE_REG(0x2960)
#define  OAR_OACONTROL_COUNTER_SEL_MASK	REG_GENMASK(3, 1)
#define  OAR_OACONTROL_COUNTER_ENABLE	REG_BIT(0)

#define OACTXCONTROL(base) XE_REG((base) + 0x360)
#define OAR_OASTATUS			XE_REG(0x2968)
#define  OA_COUNTER_RESUME		REG_BIT(0)

/* OAG unit */
#define OAG_OAGLBCTXCTRL		XE_REG(0x2b28)
#define  OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK	REG_GENMASK(7, 2)
#define  OAG_OAGLBCTXCTRL_TIMER_ENABLE		REG_BIT(1)
#define  OAG_OAGLBCTXCTRL_COUNTER_RESUME	REG_BIT(0)

#define OAG_OAHEADPTR				XE_REG(0xdb00)
#define  OAG_OAHEADPTR_MASK			REG_GENMASK(31, 6)
#define OAG_OATAILPTR				XE_REG(0xdb04)
#define  OAG_OATAILPTR_MASK			REG_GENMASK(31, 6)

#define OAG_OABUFFER		XE_REG(0xdb08)
#define  OABUFFER_SIZE_MASK	REG_GENMASK(5, 3)
#define  OABUFFER_SIZE_128K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0)
#define  OABUFFER_SIZE_256K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1)
#define  OABUFFER_SIZE_512K	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2)
#define  OABUFFER_SIZE_1M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3)
#define  OABUFFER_SIZE_2M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4)
#define  OABUFFER_SIZE_4M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5)
#define  OABUFFER_SIZE_8M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6)
#define  OABUFFER_SIZE_16M	REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7)
#define  OAG_OABUFFER_MEMORY_SELECT		REG_BIT(0) /* 0: PPGTT, 1: GGTT */

#define OAG_OACONTROL				XE_REG(0xdaf4)
#define  OAG_OACONTROL_OA_CCS_SELECT_MASK	REG_GENMASK(18, 16)
#define  OAG_OACONTROL_OA_COUNTER_SEL_MASK	REG_GENMASK(4, 2)
#define  OAG_OACONTROL_OA_COUNTER_ENABLE	REG_BIT(0)
/* Common to all OA units */
#define  OA_OACONTROL_REPORT_BC_MASK		REG_GENMASK(9, 9)
#define  OA_OACONTROL_COUNTER_SIZE_MASK		REG_GENMASK(8, 8)

#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED)
#define  OAG_OA_DEBUG_INCLUDE_CLK_RATIO			REG_BIT(6)
#define  OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS		REG_BIT(5)
#define  OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS	REG_BIT(1)

#define OAG_OASTATUS			XE_REG(0xdafc)
#define  OASTATUS_MMIO_TRG_Q_FULL	REG_BIT(6)
#define  OASTATUS_COUNTER_OVERFLOW	REG_BIT(2)
#define  OASTATUS_BUFFER_OVERFLOW	REG_BIT(1)
#define  OASTATUS_REPORT_LOST		REG_BIT(0)
/* OAM unit */
#define OAM_HEAD_POINTER_OFFSET			(0x1a0)
#define OAM_TAIL_POINTER_OFFSET			(0x1a4)
#define OAM_BUFFER_OFFSET			(0x1a8)
#define OAM_CONTEXT_CONTROL_OFFSET		(0x1bc)
#define OAM_CONTROL_OFFSET			(0x194)
#define  OAM_CONTROL_COUNTER_SEL_MASK		REG_GENMASK(3, 1)
#define OAM_DEBUG_OFFSET			(0x198)
#define OAM_STATUS_OFFSET			(0x19c)
#define OAM_MMIO_TRG_OFFSET			(0x1d0)

#define OAM_HEAD_POINTER(base)			XE_REG((base) + OAM_HEAD_POINTER_OFFSET)
#define OAM_TAIL_POINTER(base)			XE_REG((base) + OAM_TAIL_POINTER_OFFSET)
#define OAM_BUFFER(base)			XE_REG((base) + OAM_BUFFER_OFFSET)
#define OAM_CONTEXT_CONTROL(base)		XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET)
#define OAM_CONTROL(base)			XE_REG((base) + OAM_CONTROL_OFFSET)
#define OAM_DEBUG(base)				XE_REG((base) + OAM_DEBUG_OFFSET)
#define OAM_STATUS(base)			XE_REG((base) + OAM_STATUS_OFFSET)
#define OAM_MMIO_TRG(base)			XE_REG((base) + OAM_MMIO_TRG_OFFSET)

#endif
+4 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "xe_gt_sriov_vf_types.h"
#include "xe_hw_engine_types.h"
#include "xe_hw_fence_types.h"
#include "xe_oa.h"
#include "xe_reg_sr_types.h"
#include "xe_sa_types.h"
#include "xe_uc_types.h"
@@ -387,6 +388,9 @@ struct xe_gt {
		 */
		u8 instances_per_class[XE_ENGINE_CLASS_MAX];
	} user_engines;

	/** @oa: oa perf counter subsystem per gt info */
	struct xe_oa_gt oa;
};

#endif
+2 −0
Original line number Diff line number Diff line
@@ -148,6 +148,8 @@ struct xe_hw_engine {
	enum xe_hw_engine_id engine_id;
	/** @eclass: pointer to per hw engine class interface */
	struct xe_hw_engine_class_intf *eclass;
	/** @oa_unit: oa unit for this hw engine */
	struct xe_oa_unit *oa_unit;
};

/**
+156 −0
Original line number Diff line number Diff line
@@ -3,13 +3,20 @@
 * Copyright © 2023-2024 Intel Corporation
 */

#include <drm/drm_managed.h>
#include <drm/xe_drm.h>

#include "regs/xe_oa_regs.h"
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_oa.h"

#define XE_OA_UNIT_INVALID U32_MAX

#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x

static const struct xe_oa_format oa_formats[] = {
@@ -34,6 +41,142 @@ static const struct xe_oa_format oa_formats[] = {
	[XE_OA_FORMAT_PEC36u64_G1_4_G2_32]	= { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 },
};

static u32 num_oa_units_per_gt(struct xe_gt *gt)
{
	return 1;
}

static u32 __hwe_oam_unit(struct xe_hw_engine *hwe)
{
	if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
		/*
		 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
		 * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA
		 */
		xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA);

		return 0;
	}

	return XE_OA_UNIT_INVALID;
}

static u32 __hwe_oa_unit(struct xe_hw_engine *hwe)
{
	switch (hwe->class) {
	case XE_ENGINE_CLASS_RENDER:
	case XE_ENGINE_CLASS_COMPUTE:
		return 0;

	case XE_ENGINE_CLASS_VIDEO_DECODE:
	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
		return __hwe_oam_unit(hwe);

	default:
		return XE_OA_UNIT_INVALID;
	}
}

static struct xe_oa_regs __oam_regs(u32 base)
{
	return (struct xe_oa_regs) {
		base,
		OAM_HEAD_POINTER(base),
		OAM_TAIL_POINTER(base),
		OAM_BUFFER(base),
		OAM_CONTEXT_CONTROL(base),
		OAM_CONTROL(base),
		OAM_DEBUG(base),
		OAM_STATUS(base),
		OAM_CONTROL_COUNTER_SEL_MASK,
	};
}

static struct xe_oa_regs __oag_regs(void)
{
	return (struct xe_oa_regs) {
		0,
		OAG_OAHEADPTR,
		OAG_OATAILPTR,
		OAG_OABUFFER,
		OAG_OAGLBCTXCTRL,
		OAG_OACONTROL,
		OAG_OA_DEBUG,
		OAG_OASTATUS,
		OAG_OACONTROL_OA_COUNTER_SEL_MASK,
	};
}

static void __xe_oa_init_oa_units(struct xe_gt *gt)
{
	const u32 mtl_oa_base[] = { 0x13000 };
	int i, num_units = gt->oa.num_oa_units;

	for (i = 0; i < num_units; i++) {
		struct xe_oa_unit *u = &gt->oa.oa_unit[i];

		if (gt->info.type != XE_GT_TYPE_MEDIA) {
			u->regs = __oag_regs();
			u->type = DRM_XE_OA_UNIT_TYPE_OAG;
		} else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
			u->regs = __oam_regs(mtl_oa_base[i]);
			u->type = DRM_XE_OA_UNIT_TYPE_OAM;
		}

		/* Set oa_unit_ids now to ensure ids remain contiguous */
		u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++;
	}
}

static int xe_oa_init_gt(struct xe_gt *gt)
{
	u32 num_oa_units = num_oa_units_per_gt(gt);
	struct xe_hw_engine *hwe;
	enum xe_hw_engine_id id;
	struct xe_oa_unit *u;

	u = drmm_kcalloc(&gt_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL);
	if (!u)
		return -ENOMEM;

	for_each_hw_engine(hwe, gt, id) {
		u32 index = __hwe_oa_unit(hwe);

		hwe->oa_unit = NULL;
		if (index < num_oa_units) {
			u[index].num_engines++;
			hwe->oa_unit = &u[index];
		}
	}

	/*
	 * Fused off engines can result in oa_unit's with num_engines == 0. These units
	 * will appear in OA unit query, but no perf streams can be opened on them.
	 */
	gt->oa.num_oa_units = num_oa_units;
	gt->oa.oa_unit = u;

	__xe_oa_init_oa_units(gt);

	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->oa.gt_lock);

	return 0;
}

static int xe_oa_init_oa_units(struct xe_oa *oa)
{
	struct xe_gt *gt;
	int i, ret;

	for_each_gt(gt, oa->xe, i) {
		ret = xe_oa_init_gt(gt);
		if (ret)
			return ret;
	}

	return 0;
}

static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format)
{
	__set_bit(format, oa->format_mask);
@@ -87,6 +230,7 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa)
int xe_oa_init(struct xe_device *xe)
{
	struct xe_oa *oa = &xe->oa;
	int ret;

	/* Support OA only with GuC submission and Gen12+ */
	if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12))
@@ -95,8 +239,17 @@ int xe_oa_init(struct xe_device *xe)
	oa->xe = xe;
	oa->oa_formats = oa_formats;

	ret = xe_oa_init_oa_units(oa);
	if (ret) {
		drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret));
		goto exit;
	}

	xe_oa_init_supported_formats(oa);
	return 0;
exit:
	oa->xe = NULL;
	return ret;
}

/**
@@ -107,5 +260,8 @@ void xe_oa_fini(struct xe_device *xe)
{
	struct xe_oa *oa = &xe->oa;

	if (!oa->xe)
		return;

	oa->xe = NULL;
}
+54 −0
Original line number Diff line number Diff line
@@ -7,8 +7,12 @@
#define _XE_OA_TYPES_H_

#include <linux/bitops.h>
#include <linux/mutex.h>
#include <linux/types.h>

#include <drm/xe_drm.h>
#include "regs/xe_reg_defs.h"

enum xe_oa_report_header {
	HDR_32_BIT = 0,
	HDR_64_BIT,
@@ -67,6 +71,53 @@ struct xe_oa_format {
	u16 bc_report;
};

/** struct xe_oa_regs - Registers for each OA unit */
struct xe_oa_regs {
	u32 base;
	struct xe_reg oa_head_ptr;
	struct xe_reg oa_tail_ptr;
	struct xe_reg oa_buffer;
	struct xe_reg oa_ctx_ctrl;
	struct xe_reg oa_ctrl;
	struct xe_reg oa_debug;
	struct xe_reg oa_status;
	u32 oa_ctrl_counter_select_mask;
};

/**
 * struct xe_oa_unit - Hardware OA unit
 */
struct xe_oa_unit {
	/** @oa_unit_id: identifier for the OA unit */
	u16 oa_unit_id;

	/** @type: Type of OA unit - OAM, OAG etc. */
	enum drm_xe_oa_unit_type type;

	/** @regs: OA registers for programming the OA unit */
	struct xe_oa_regs regs;

	/** @num_engines: number of engines attached to this OA unit */
	u32 num_engines;

	/** @exclusive_stream: The stream currently using the OA unit */
	struct xe_oa_stream *exclusive_stream;
};

/**
 * struct xe_oa_gt - OA per-gt information
 */
struct xe_oa_gt {
	/** @gt_lock: lock protecting create/destroy OA streams */
	struct mutex gt_lock;

	/** @num_oa_units: number of oa units for each gt */
	u32 num_oa_units;

	/** @oa_unit: array of oa_units */
	struct xe_oa_unit *oa_unit;
};

/**
 * struct xe_oa - OA device level information
 */
@@ -79,5 +130,8 @@ struct xe_oa {

	/** @format_mask: tracks valid OA formats for a platform */
	unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)];

	/** @oa_unit_ids: tracks oa unit ids assigned across gt's */
	u16 oa_unit_ids;
};
#endif
Loading