Commit 95b49479 authored by Ira Weiny's avatar Ira Weiny Committed by Dan Williams
Browse files

cxl/mem: Trace Memory Module Event Record



CXL rev 3.0 section 8.2.9.2.1.3 defines the Memory Module Event Record.

Determine if the event read is memory module record and if so trace the
record.

Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Reviewed-by: default avatarJonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: default avatarIra Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-5-2316a5c8f7d8@intel.com


Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 2d6c1e6d
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -734,6 +734,14 @@ static const uuid_t dram_event_uuid =
	UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
		  0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);

/*
 * Memory Module Event Record
 * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
 */
static const uuid_t mem_mod_event_uuid =
	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);

static void cxl_event_trace_record(const struct device *dev,
				   enum cxl_event_log_type type,
				   struct cxl_event_record_raw *record)
@@ -749,6 +757,11 @@ static void cxl_event_trace_record(const struct device *dev,
		struct cxl_event_dram *rec = (struct cxl_event_dram *)record;

		trace_cxl_dram(dev, type, rec);
	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
		struct cxl_event_mem_module *rec =
				(struct cxl_event_mem_module *)record;

		trace_cxl_memory_module(dev, type, rec);
	} else {
		/* For unknown record types print just the header */
		trace_cxl_generic_event(dev, type, record);
+143 −0
Original line number Diff line number Diff line
@@ -438,6 +438,149 @@ TRACE_EVENT(cxl_dram,
	)
);

/*
 * Memory Module Event Record - MMER
 *
 * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
 */
#define CXL_MMER_HEALTH_STATUS_CHANGE		0x00
#define CXL_MMER_MEDIA_STATUS_CHANGE		0x01
#define CXL_MMER_LIFE_USED_CHANGE		0x02
#define CXL_MMER_TEMP_CHANGE			0x03
#define CXL_MMER_DATA_PATH_ERROR		0x04
#define CXL_MMER_LSA_ERROR			0x05
#define show_dev_evt_type(type)	__print_symbolic(type,			   \
	{ CXL_MMER_HEALTH_STATUS_CHANGE,	"Health Status Change"	}, \
	{ CXL_MMER_MEDIA_STATUS_CHANGE,		"Media Status Change"	}, \
	{ CXL_MMER_LIFE_USED_CHANGE,		"Life Used Change"	}, \
	{ CXL_MMER_TEMP_CHANGE,			"Temperature Change"	}, \
	{ CXL_MMER_DATA_PATH_ERROR,		"Data Path Error"	}, \
	{ CXL_MMER_LSA_ERROR,			"LSA Error"		}  \
)

/*
 * Device Health Information - DHI
 *
 * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
 */
#define CXL_DHI_HS_MAINTENANCE_NEEDED				BIT(0)
#define CXL_DHI_HS_PERFORMANCE_DEGRADED				BIT(1)
#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED			BIT(2)
#define show_health_status_flags(flags)	__print_flags(flags, "|",	   \
	{ CXL_DHI_HS_MAINTENANCE_NEEDED,	"MAINTENANCE_NEEDED"	}, \
	{ CXL_DHI_HS_PERFORMANCE_DEGRADED,	"PERFORMANCE_DEGRADED"	}, \
	{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED,	"REPLACEMENT_NEEDED"	}  \
)

#define CXL_DHI_MS_NORMAL							0x00
#define CXL_DHI_MS_NOT_READY							0x01
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST					0x02
#define CXL_DHI_MS_ALL_DATA_LOST						0x03
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS			0x04
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN			0x05
#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT				0x06
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS				0x07
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN				0x08
#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT					0x09
#define show_media_status(ms)	__print_symbolic(ms,			   \
	{ CXL_DHI_MS_NORMAL,						   \
		"Normal"						}, \
	{ CXL_DHI_MS_NOT_READY,						   \
		"Not Ready"						}, \
	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOST,				   \
		"Write Persistency Lost"				}, \
	{ CXL_DHI_MS_ALL_DATA_LOST,					   \
		"All Data Lost"						}, \
	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS,		   \
		"Write Persistency Loss in the Event of Power Loss"	}, \
	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN,		   \
		"Write Persistency Loss in Event of Shutdown"		}, \
	{ CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT,			   \
		"Write Persistency Loss Imminent"			}, \
	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS,		   \
		"All Data Loss in Event of Power Loss"			}, \
	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN,		   \
		"All Data loss in the Event of Shutdown"		}, \
	{ CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT,			   \
		"All Data Loss Imminent"				}  \
)

#define CXL_DHI_AS_NORMAL		0x0
#define CXL_DHI_AS_WARNING		0x1
#define CXL_DHI_AS_CRITICAL		0x2
#define show_two_bit_status(as) __print_symbolic(as,	   \
	{ CXL_DHI_AS_NORMAL,		"Normal"	}, \
	{ CXL_DHI_AS_WARNING,		"Warning"	}, \
	{ CXL_DHI_AS_CRITICAL,		"Critical"	}  \
)
#define show_one_bit_status(as) __print_symbolic(as,	   \
	{ CXL_DHI_AS_NORMAL,		"Normal"	}, \
	{ CXL_DHI_AS_WARNING,		"Warning"	}  \
)

#define CXL_DHI_AS_LIFE_USED(as)			(as & 0x3)
#define CXL_DHI_AS_DEV_TEMP(as)				((as & 0xC) >> 2)
#define CXL_DHI_AS_COR_VOL_ERR_CNT(as)			((as & 0x10) >> 4)
#define CXL_DHI_AS_COR_PER_ERR_CNT(as)			((as & 0x20) >> 5)

TRACE_EVENT(cxl_memory_module,

	TP_PROTO(const struct device *dev, enum cxl_event_log_type log,
		 struct cxl_event_mem_module *rec),

	TP_ARGS(dev, log, rec),

	TP_STRUCT__entry(
		CXL_EVT_TP_entry

		/* Memory Module Event */
		__field(u8, event_type)

		/* Device Health Info */
		__field(u8, health_status)
		__field(u8, media_status)
		__field(u8, life_used)
		__field(u32, dirty_shutdown_cnt)
		__field(u32, cor_vol_err_cnt)
		__field(u32, cor_per_err_cnt)
		__field(s16, device_temp)
		__field(u8, add_status)
	),

	TP_fast_assign(
		CXL_EVT_TP_fast_assign(dev, log, rec->hdr);

		/* Memory Module Event */
		__entry->event_type = rec->event_type;

		/* Device Health Info */
		__entry->health_status = rec->info.health_status;
		__entry->media_status = rec->info.media_status;
		__entry->life_used = rec->info.life_used;
		__entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
		__entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
		__entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
		__entry->device_temp = get_unaligned_le16(rec->info.device_temp);
		__entry->add_status = rec->info.add_status;
	),

	CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
		"as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
		"as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
		"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
		show_dev_evt_type(__entry->event_type),
		show_health_status_flags(__entry->health_status),
		show_media_status(__entry->media_status),
		show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
		show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
		show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
		show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
		__entry->life_used, __entry->device_temp,
		__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
		__entry->cor_per_err_cnt
	)
);

#endif /* _CXL_EVENTS_H */

#define TRACE_INCLUDE_FILE trace
+26 −0
Original line number Diff line number Diff line
@@ -486,6 +486,32 @@ struct cxl_event_dram {
	u8 reserved[0x17];
} __packed;

/*
 * Get Health Info Record
 * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
 */
struct cxl_get_health_info {
	u8 health_status;
	u8 media_status;
	u8 add_status;
	u8 life_used;
	u8 device_temp[2];
	u8 dirty_shutdown_cnt[4];
	u8 cor_vol_err_cnt[4];
	u8 cor_per_err_cnt[4];
} __packed;

/*
 * Memory Module Event Record
 * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
 */
struct cxl_event_mem_module {
	struct cxl_event_record_hdr hdr;
	u8 event_type;
	struct cxl_get_health_info info;
	u8 reserved[0x3d];
} __packed;

struct cxl_mbox_get_partition_info {
	__le64 active_volatile_cap;
	__le64 active_persistent_cap;