Commit 532d7f6b authored by Shashank Gupta's avatar Shashank Gupta Committed by Herbert Xu
Browse files

crypto: qat - add error counters



Introduce ras counters interface for counting QAT specific device
errors and expose them through the newly created qat_ras sysfs
group attribute.

This adds the following attributes:

- errors_correctable: number of correctable errors
- errors_nonfatal: number of uncorrectable non fatal errors
- errors_fatal: number of uncorrectable fatal errors
- reset_error_counters: resets all counters

These counters are initialized during device bring up and cleared
during device shutdown and are applicable only to QAT GEN4 devices.

Signed-off-by: default avatarShashank Gupta <shashank.gupta@intel.com>
Reviewed-by: default avatarGiovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: default avatarTero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 22289dc9
Loading
Loading
Loading
Loading
+41 −0
Original line number Diff line number Diff line
What:		/sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
Date:		January 2024
KernelVersion:	6.7
Contact:	qat-linux@intel.com
Description:	(RO) Reports the number of correctable errors detected by the device.

		This attribute is only available for qat_4xxx devices.

What:		/sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
Date:		January 2024
KernelVersion:	6.7
Contact:	qat-linux@intel.com
Description:	(RO) Reports the number of non fatal errors detected by the device.

		This attribute is only available for qat_4xxx devices.

What:		/sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
Date:		January 2024
KernelVersion:	6.7
Contact:	qat-linux@intel.com
Description:	(RO) Reports the number of fatal errors detected by the device.

		This attribute is only available for qat_4xxx devices.

What:		/sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
Date:		January 2024
KernelVersion:	6.7
Contact:	qat-linux@intel.com
Description:	(WO) Write to resets all error counters of a device.

		The following example reports how to reset the counters::

			# echo 1 > /sys/bus/pci/devices/<BDF>/qat_ras/reset_error_counters
			# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_correctable
			0
			# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_nonfatal
			0
			# cat /sys/bus/pci/devices/<BDF>/qat_ras/errors_fatal
			0

		This attribute is only available for qat_4xxx devices.
+1 −0
Original line number Diff line number Diff line
@@ -418,6 +418,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
		goto out_err;
	}

	accel_dev->ras_errors.enabled = true;
	adf_dbgfs_init(accel_dev);

	ret = adf_dev_up(accel_dev, true);
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
	adf_admin.o \
	adf_hw_arbiter.o \
	adf_sysfs.o \
	adf_sysfs_ras_counters.o \
	adf_gen2_hw_data.o \
	adf_gen2_config.o \
	adf_gen4_hw_data.o \
+14 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/list.h>
#include <linux/io.h>
#include <linux/ratelimit.h>
#include <linux/types.h>
#include "adf_cfg_common.h"
#include "adf_pfvf_msg.h"

@@ -81,6 +82,18 @@ enum dev_sku_info {
	DEV_SKU_UNKNOWN,
};

enum ras_errors {
	ADF_RAS_CORR,
	ADF_RAS_UNCORR,
	ADF_RAS_FATAL,
	ADF_RAS_ERRORS,
};

struct adf_error_counters {
	atomic_t counter[ADF_RAS_ERRORS];
	bool enabled;
};

static inline const char *get_sku_info(enum dev_sku_info info)
{
	switch (info) {
@@ -360,6 +373,7 @@ struct adf_accel_dev {
			u8 pf_compat_ver;
		} vf;
	};
	struct adf_error_counters ras_errors;
	struct mutex state_lock; /* protect state of the device */
	bool is_vf;
	u32 accel_id;
+3 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include "adf_common_drv.h"
#include "adf_dbgfs.h"
#include "adf_heartbeat.h"
#include "adf_sysfs_ras_counters.h"

static LIST_HEAD(service_table);
static DEFINE_MUTEX(service_lock);
@@ -242,6 +243,7 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev)
	set_bit(ADF_STATUS_COMP_ALGS_REGISTERED, &accel_dev->status);

	adf_dbgfs_add(accel_dev);
	adf_sysfs_start_ras(accel_dev);

	return 0;
}
@@ -268,6 +270,7 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev)
		return;

	adf_dbgfs_rm(accel_dev);
	adf_sysfs_stop_ras(accel_dev);

	clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
	clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
Loading