Commit b4517670 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'selftests-net-page_poll-allocation-error-injection'

Jakub Kicinski says:

====================
selftests: net: page_poll allocation error injection

Add a test for exercising driver memory allocation failure paths.
page pool is a bit tricky to inject errors into at the page allocator
level because of the bulk alloc and recycling, so add explicit error
injection support "in front" of the caches.

Add a test to exercise that using only the standard APIs.
This is the first useful test for the new tests with an endpoint.
There's no point testing netdevsim here, so this is also the first
HW-only test in Python.

I'm not super happy with the traffic generation using iperf3,
my initial approach was to use mausezahn. But it turned out to be
5x slower in terms of PPS. Hopefully this is good enough for now.

v1: https://lore.kernel.org/all/20240426232400.624864-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20240429144426.743476-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f581bcf0 9da271f8
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
 *	Copyright (C) 2016 Red Hat, Inc.
 */

#include <linux/error-injection.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -550,6 +551,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
	return page;
}
EXPORT_SYMBOL(page_pool_alloc_pages);
ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL);

/* Calculate distance between two u32 values, valid if distance is below 2^(31)
 *  https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
+1 −1
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug

# Networking tests want the net/lib target, include it automatically
ifneq ($(filter net drivers/net,$(TARGETS)),)
ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),)
ifeq ($(filter net/lib,$(TARGETS)),)
	INSTALL_DEP_TARGETS := net/lib
endif
+2 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ TEST_PROGS = \
	hw_stats_l3.sh \
	hw_stats_l3_gre.sh \
	loopback.sh \
	pp_alloc_fail.py \
	#

TEST_FILES := \
@@ -16,6 +17,7 @@ TEST_FILES := \
	#

TEST_INCLUDES := \
	$(wildcard lib/py/*.py ../lib/py/*.py) \
	../../../net/lib.sh \
	../../../net/forwarding/lib.sh \
	../../../net/forwarding/ipip_lib.sh \
+16 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0

import sys
from pathlib import Path

KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()

try:
    sys.path.append(KSFT_DIR.as_posix())
    from net.lib.py import *
    from drivers.net.lib.py import *
except ModuleNotFoundError as e:
    ksft_pr("Failed importing `net` library from kernel sources")
    ksft_pr(str(e))
    ktap_result(True, comment="SKIP")
    sys.exit(4)
+129 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0

import time
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import KsftSkipEx, KsftFailEx
from lib.py import NetdevFamily, NlError
from lib.py import NetDrvEpEnv
from lib.py import cmd, tool, GenerateTraffic


def _write_fail_config(config):
    for key, value in config.items():
        with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
            fp.write(str(value) + "\n")


def _enable_pp_allocation_fail():
    if not os.path.exists("/sys/kernel/debug/fail_function"):
        raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")

    if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
        with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
            fp.write("page_pool_alloc_pages\n")

    _write_fail_config({
        "verbose": 0,
        "interval": 511,
        "probability": 100,
        "times": -1,
    })


def _disable_pp_allocation_fail():
    if not os.path.exists("/sys/kernel/debug/fail_function"):
        return

    if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
        with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
            fp.write("\n")

    _write_fail_config({
        "probability": 0,
        "times": 0,
    })


def test_pp_alloc(cfg, netdevnl):
    def get_stats():
        return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]

    def check_traffic_flowing():
        stat1 = get_stats()
        time.sleep(1)
        stat2 = get_stats()
        if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
            raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])


    try:
        stats = get_stats()
    except NlError as e:
        if e.nl_msg.error == -95:
            stats = {}
        else:
            raise
    if 'rx-alloc-fail' not in stats:
        raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")

    set_g = False
    traffic = None
    try:
        traffic = GenerateTraffic(cfg)

        check_traffic_flowing()

        _enable_pp_allocation_fail()

        s1 = get_stats()
        time.sleep(3)
        s2 = get_stats()

        if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
            raise KsftSkipEx("Allocation failures not increasing")
        if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
            raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
                             "packets:", s2['rx-packets'] - s1['rx-packets'])

        # Basic failures are fine, try to wobble some settings to catch extra failures
        check_traffic_flowing()
        g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
        if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
            new_g = g['rx'] * 2
        elif 'rx' in g:
            new_g = g['rx'] // 2
        else:
            new_g = None

        if new_g:
            set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
            if set_g:
                ksft_pr("ethtool -G change retval: success")
            else:
                ksft_pr("ethtool -G change retval: did not succeed", new_g)
        else:
                ksft_pr("ethtool -G change retval: did not try")

        time.sleep(0.1)
        check_traffic_flowing()
    finally:
        _disable_pp_allocation_fail()
        if traffic:
            traffic.stop()
        time.sleep(0.1)
        if set_g:
            cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")


def main() -> None:
    netdevnl = NetdevFamily()
    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:

        ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
    ksft_exit()


if __name__ == "__main__":
    main()
Loading