Commit df548e62 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'selftests-drv-net-iou-zcrx-improve-stability-and-make-the-large-chunk-test-work'

Jakub Kicinski says:

====================
selftests: drv-net: iou-zcrx: improve stability and make the large chunk test work

The iou-zcrx test hasn't been passing in NIPA, I assumed it's because
we're missing iouring changes, but it's still failing after the merge
window. Turns out there was a bug in the implementation which was fixed
separately via the iouring tree. With that out of the way the tests
are passing but flaky. Patch 1 deals with the flakiness.

While looking at this I also noticed that the large chunk test isn't
running at all. So fix and enable it (patches 2 and 3).
====================

Link: https://patch.msgid.link/20260227171305.2848240-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 8341c989 c7b22841
Loading
Loading
Loading
Loading
+31 −26
Original line number Diff line number Diff line
@@ -2,14 +2,27 @@
# SPDX-License-Identifier: GPL-2.0

import re
import time
from os import path
from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
from lib.py import EthtoolFamily
from lib.py import EthtoolFamily, NetdevFamily

SKIP_CODE = 42


def mp_clear_wait(cfg):
    """Wait for io_uring memory providers to clear from all device queues."""
    deadline = time.time() + 5
    while time.time() < deadline:
        queues = cfg.netnl.queue_get({'ifindex': cfg.ifindex}, dump=True)
        if not any('io-uring' in q for q in queues):
            return
        time.sleep(0.1)
    raise TimeoutError("Timed out waiting for memory provider to clear")


def create_rss_ctx(cfg):
    output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout
    values = re.search(r'New RSS context is (\d+)', output).group(1)
@@ -46,6 +59,7 @@ def single(cfg):
                                'tcp-data-split': 'unknown',
                                'hds-thresh': hds_thresh,
                                'rx': rx_rings})
    defer(mp_clear_wait, cfg)

    cfg.target = channels - 1
    ethtool(f"-X {cfg.ifname} equal {cfg.target}")
@@ -73,6 +87,7 @@ def rss(cfg):
                                'tcp-data-split': 'unknown',
                                'hds-thresh': hds_thresh,
                                'rx': rx_rings})
    defer(mp_clear_wait, cfg)

    cfg.target = channels - 1
    ethtool(f"-X {cfg.ifname} equal {cfg.target}")
@@ -120,36 +135,24 @@ def test_zcrx_large_chunks(cfg) -> None:

    cfg.require_ipver('6')

    combined_chans = _get_combined_channels(cfg)
    if combined_chans < 2:
        raise KsftSkipEx('at least 2 combined channels required')
    (rx_ring, hds_thresh) = _get_current_settings(cfg)
    port = rand_port()

    ethtool(f"-G {cfg.ifname} tcp-data-split on")
    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")

    ethtool(f"-G {cfg.ifname} hds-thresh 0")
    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")

    ethtool(f"-G {cfg.ifname} rx 64")
    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
    hp_file = "/proc/sys/vm/nr_hugepages"
    with open(hp_file, 'r+', encoding='utf-8') as f:
        nr_hugepages = int(f.read().strip())
        if nr_hugepages < 64:
            f.seek(0)
            f.write("64")
            defer(lambda: open(hp_file, 'w', encoding='utf-8').write(str(nr_hugepages)))

    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
    defer(ethtool, f"-X {cfg.ifname} default")

    flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")

    rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2"
    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
    single(cfg)
    rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2"
    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840"

    probe = cmd(rx_cmd + " -d", fail=False)
    if probe.ret == SKIP_CODE:
        raise KsftSkipEx(probe.stdout)
        raise KsftSkipEx(probe.stdout.strip())

    with bkg(rx_cmd, exit_wait=True):
        wait_port_listen(port, proto="tcp")
        wait_port_listen(cfg.port, proto="tcp")
        cmd(tx_cmd, host=cfg.remote)


@@ -159,8 +162,10 @@ def main() -> None:
        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)

        cfg.ethnl = EthtoolFamily()
        cfg.netnl = NetdevFamily()
        cfg.port = rand_port()
        ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, ))
        ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot,
                                        test_zcrx_large_chunks], args=(cfg, ))
    ksft_exit()