Commit 7dae8ffb authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

selftests: drv-net: gro: add a test for GRO depth

Reuse the long sequence test to max out the GRO contexts.
Repeat for a single queue, 8 queues, and default number
of queues but flow steering to just one.

The SW GRO's capacity should be around 64 per queue
(8 buckets, up to 8 skbs in a chain).

Link: https://patch.msgid.link/20260318033819.1469350-7-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent ff1cb3ad
Loading
Loading
Loading
Loading
+181 −20
Original line number Diff line number Diff line
@@ -35,11 +35,18 @@ Test cases:
  - large_rem: Large packet remainder handling
"""

import glob
import os
import re
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import NetDrvEpEnv, KsftXfailEx
from lib.py import NetdevFamily, EthtoolFamily
from lib.py import bkg, cmd, defer, ethtool, ip
from lib.py import ksft_variants
from lib.py import ksft_variants, KsftNamedVariant


# gro.c uses hardcoded DPORT=8000
GRO_DPORT = 8000


def _resolve_dmac(cfg, ipver):
@@ -113,6 +120,98 @@ def _set_ethtool_feat(dev, current, feats, host=None):
        ksft_pr(eth_cmd)


def _get_queue_stats(cfg, queue_id):
    """Get stats for a specific Rx queue."""
    cfg.wait_hw_stats_settle()
    data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]},
                                dump=True)
    for q in data:
        if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id:
            return q
    return {}


def _setup_isolated_queue(cfg):
    """Set up an isolated queue for testing using ntuple filter.

    Remove queue 1 from the default RSS context and steer test traffic to it.
    """
    test_queue = 1

    qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
    if qcnt < 2:
        raise KsftXfailEx(f"Need at least 2 queues, have {qcnt}")

    # Remove queue 1 from default RSS context by setting its weight to 0
    weights = ["1"] * qcnt
    weights[test_queue] = "0"
    ethtool(f"-X {cfg.ifname} weight " + " ".join(weights))
    defer(ethtool, f"-X {cfg.ifname} default")

    # Set up ntuple filter to steer our test traffic to the isolated queue
    flow  = f"flow-type tcp{cfg.addr_ipver} "
    flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}"
    output = ethtool(f"-N {cfg.ifname} {flow}").stdout
    ntuple_id = int(output.split()[-1])
    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")

    return test_queue


def _setup_queue_count(cfg, num_queues):
    """Configure the NIC to use a specific number of queues."""
    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
    ch_max = channels.get('combined-max', 0)
    qcnt = channels['combined-count']

    if ch_max < num_queues:
        raise KsftXfailEx(f"Need at least {num_queues} queues, max={ch_max}")

    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
    ethtool(f"-L {cfg.ifname} combined {num_queues}")


def _run_gro_bin(cfg, test_name, protocol=None, num_flows=None,
                 order_check=False, verbose=False, fail=False):
    """Run gro binary with given test and return the process result."""
    if not hasattr(cfg, "bin_remote"):
        cfg.bin_local = cfg.net_lib_dir / "gro"
        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)

    if protocol is None:
        ipver = cfg.addr_ipver
        protocol = f"ipv{ipver}"
    else:
        ipver = "6" if protocol[-1] == "6" else "4"

    dmac = _resolve_dmac(cfg, ipver)

    base_args = [
        f"--{protocol}",
        f"--dmac {dmac}",
        f"--smac {cfg.remote_dev['address']}",
        f"--daddr {cfg.addr_v[ipver]}",
        f"--saddr {cfg.remote_addr_v[ipver]}",
        f"--test {test_name}",
    ]
    if num_flows:
        base_args.append(f"--num-flows {num_flows}")
    if order_check:
        base_args.append("--order-check")
    if verbose:
        base_args.append("--verbose")

    args = " ".join(base_args)

    rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}"
    tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}"

    with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=fail) as rx_proc:
        cmd(tx_cmd, host=cfg.remote)

    return rx_proc


def _setup(cfg, mode, test_name):
    """ Setup hardware loopback mode for GRO testing. """

@@ -233,30 +332,14 @@ def test(cfg, mode, protocol, test_name):

    _setup(cfg, mode, test_name)

    base_cmd_args = [
        f"--{protocol}",
        f"--dmac {_resolve_dmac(cfg, ipver)}",
        f"--smac {cfg.remote_dev['address']}",
        f"--daddr {cfg.addr_v[ipver]}",
        f"--saddr {cfg.remote_addr_v[ipver]}",
        f"--test {test_name}",
        "--verbose"
    ]
    base_args = " ".join(base_cmd_args)

    # Each test is run 6 times to deflake, because given the receive timing,
    # not all packets that should coalesce will be considered in the same flow
    # on every try.
    max_retries = 6
    for attempt in range(max_retries):
        rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
        tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"

        fail_now = attempt >= max_retries - 1

        with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
                 fail=fail_now) as rx_proc:
            cmd(tx_cmd, host=cfg.remote)
        rx_proc = _run_gro_bin(cfg, test_name, protocol=protocol,
                               verbose=True, fail=fail_now)

        if rx_proc.ret == 0:
            return
@@ -270,11 +353,89 @@ def test(cfg, mode, protocol, test_name):
        ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")


def _capacity_variants():
    """Generate variants for capacity test: mode x queue setup."""
    setups = [
        ("isolated", _setup_isolated_queue),
        ("1q", lambda cfg: _setup_queue_count(cfg, 1)),
        ("8q", lambda cfg: _setup_queue_count(cfg, 8)),
    ]
    for mode in ["sw", "hw", "lro"]:
        for name, func in setups:
            yield KsftNamedVariant(f"{mode}_{name}", mode, func)


@ksft_variants(_capacity_variants())
def test_gro_capacity(cfg, mode, setup_func):
    """
    Probe GRO capacity.

    Start with 8 flows and increase by 2x on each successful run.
    Retry up to 3 times on failure.

    Variants combine mode (sw, hw, lro) with queue setup:
      - isolated: Use a single queue isolated from RSS
      - 1q: Configure NIC to use 1 queue
      - 8q: Configure NIC to use 8 queues
    """
    max_retries = 3

    _setup(cfg, mode, "capacity")
    queue_id = setup_func(cfg)

    num_flows = 8
    while True:
        success = False
        for attempt in range(max_retries):
            if queue_id is not None:
                stats_before = _get_queue_stats(cfg, queue_id)

            rx_proc = _run_gro_bin(cfg, "capacity", num_flows=num_flows)
            output = rx_proc.stdout

            if queue_id is not None:
                stats_after = _get_queue_stats(cfg, queue_id)
                qstat_pkts = (stats_after.get('rx-packets', 0) -
                              stats_before.get('rx-packets', 0))
                gro_pkts = (stats_after.get('rx-hw-gro-packets', 0) -
                            stats_before.get('rx-hw-gro-packets', 0))
                qstat_str = f" qstat={qstat_pkts} hw-gro={gro_pkts}"
            else:
                qstat_str = ""

            # Parse and print STATS line
            match = re.search(
                r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', output)
            if match:
                received = int(match.group(1))
                wire = int(match.group(2))
                coalesced = int(match.group(3))
                status = "PASS" if received == num_flows else "MISS"
                ksft_pr(f"flows={num_flows} attempt={attempt + 1} "
                        f"received={received} wire={wire} "
                        f"coalesced={coalesced}{qstat_str} [{status}]")
                if received == num_flows:
                    success = True
                    break
            else:
                ksft_pr(rx_proc)
                ksft_pr(f"flows={num_flows} attempt={attempt + 1}"
                        f"{qstat_str} [FAIL - can't parse stats]")

        if not success:
            ksft_pr(f"Stopped at {num_flows} flows")
            break

        num_flows *= 2


def main() -> None:
    """ Ksft boiler plate main """

    with NetDrvEpEnv(__file__) as cfg:
        ksft_run(cases=[test], args=(cfg,))
        cfg.ethnl = EthtoolFamily()
        cfg.netnl = NetdevFamily()
        ksft_run(cases=[test, test_gro_capacity], args=(cfg,))
    ksft_exit()