Unverified Commit 91b2c42c authored by Francois Dugast's avatar Francois Dugast Committed by Rodrigo Vivi
Browse files

drm/xe: Use fault injection infrastructure to find issues at probe time



The kernel fault injection infrastructure is used to test proper error
handling during probe. The return code of the functions using
ALLOW_ERROR_INJECTION() can be conditionnally modified at runtime by
tuning some debugfs entries. This requires CONFIG_FUNCTION_ERROR_INJECTION
(among others).

One way to use fault injection at probe time by making each of those
functions fail one at a time is:

    FAILTYPE=fail_function
    DEVICE="0000:00:08.0" # depends on the system
    ERRNO=-12 # -ENOMEM, can depend on the function

    echo N > /sys/kernel/debug/$FAILTYPE/task-filter
    echo 100 > /sys/kernel/debug/$FAILTYPE/probability
    echo 0 > /sys/kernel/debug/$FAILTYPE/interval
    echo -1 > /sys/kernel/debug/$FAILTYPE/times
    echo 0 > /sys/kernel/debug/$FAILTYPE/space
    echo 1 > /sys/kernel/debug/$FAILTYPE/verbose

    modprobe xe
    echo $DEVICE > /sys/bus/pci/drivers/xe/unbind

    grep -oP "^.* \[xe\]" /sys/kernel/debug/$FAILTYPE/injectable | \
    cut -d ' ' -f 1 | while read -r FUNCTION ; do
        echo "Injecting fault in $FUNCTION"
        echo "" > /sys/kernel/debug/$FAILTYPE/inject
        echo $FUNCTION > /sys/kernel/debug/$FAILTYPE/inject
        printf %#x $ERRNO > /sys/kernel/debug/$FAILTYPE/$FUNCTION/retval
        echo $DEVICE > /sys/bus/pci/drivers/xe/bind
    done

    rmmod xe

It will also be integrated into IGT for systematic execution by CI.

v2: Wrappers are not needed in the cases covered by this patch, so
    remove them and use ALLOW_ERROR_INJECTION() directly.

v3: Document the use of fault injection at probe time in xe_pci_probe
    and refer to it where ALLOW_ERROR_INJECTION() is used.

Signed-off-by: default avatarFrancois Dugast <francois.dugast@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240927151207.399354-1-francois.dugast@intel.com


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 11bfc4a2
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include "xe_device.h"

#include <linux/delay.h>
#include <linux/fault-inject.h>
#include <linux/units.h>

#include <drm/drm_aperture.h>
@@ -382,6 +383,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err:
	return ERR_PTR(err);
}
ALLOW_ERROR_INJECTION(xe_device_create, ERRNO); /* See xe_pci_probe() */

static bool xe_driver_flr_disabled(struct xe_device *xe)
{
@@ -550,6 +552,7 @@ static int wait_for_lmem_ready(struct xe_device *xe)

	return 0;
}
ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */

static void update_device_info(struct xe_device *xe)
{
+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@

#include "xe_ggtt.h"

#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>

@@ -264,6 +265,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)

	return 0;
}
ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */

static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);

+3 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@

#include "xe_guc_ads.h"

#include <linux/fault-inject.h>

#include <drm/drm_managed.h>

#include <generated/xe_wa_oob.h>
@@ -418,6 +420,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)

	return 0;
}
ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */

/**
 * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
+2 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/circ_buf.h>
#include <linux/delay.h>
#include <linux/fault-inject.h>

#include <kunit/static_stub.h>

@@ -209,6 +210,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
	ct->state = XE_GUC_CT_STATE_DISABLED;
	return 0;
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */

#define desc_read(xe_, guc_ctb__, field_)			\
	xe_map_rd_field(xe_, &guc_ctb__->desc, 0,		\
+3 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@

#include "xe_guc_log.h"

#include <linux/fault-inject.h>

#include <drm/drm_managed.h>

#include "xe_bo.h"
@@ -96,3 +98,4 @@ int xe_guc_log_init(struct xe_guc_log *log)

	return 0;
}
ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */
Loading