Commit 31eae6d9 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

selftests: drv-net: test drivers sleeping in ndo_get_stats64



Most of our tests use rtnetlink to read device stats, so they
don't expose the drivers much to paths in which device stats
are read under RCU. Add tests which hammer profcs reads to
make sure drivers:
 - don't sleep while reporting stats,
 - can handle parallel reads,
 - can handle device going down while reading.

Set ifname on the env class in NetDrvEnv, we already do that
in NetDrvEpEnv.

  KTAP version 1
  1..7
  ok 1 stats.check_pause
  ok 2 stats.check_fec
  ok 3 stats.pkt_byte_sum
  ok 4 stats.qstat_by_ifindex
  ok 5 stats.check_down
  ok 6 stats.procfs_hammer
  # completed up/down cycles: 6
  ok 7 stats.procfs_downup_hammer
  # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Reviewed-by: default avatarPetr Machata <petrm@nvidia.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250107022932.2087744-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 7bf1659b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@ class NetDrvEnv:
        else:
            self._ns = NetdevSimDev(**kwargs)
            self.dev = self._ns.nsims[0].dev
        self.ifname = self.dev['ifname']
        self.ifindex = self.dev['ifindex']

    def __enter__(self):
+91 −3
Original line number Diff line number Diff line
@@ -2,12 +2,15 @@
# SPDX-License-Identifier: GPL-2.0

import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
from lib.py import ip, defer
from lib.py import cmd, ip, defer

ethnl = EthtoolFamily()
netfam = NetdevFamily()
@@ -174,10 +177,95 @@ def check_down(cfg) -> None:
    netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)


def __run_inf_loop(body):
    body = body.strip()
    if body[-1] != ';':
        body += ';'

    return subprocess.Popen(f"while true; do {body} done", shell=True,
                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)


def __stats_increase_sanely(old, new) -> None:
    for k in old.keys():
        ksft_ge(new[k], old[k])
        ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")


def procfs_hammer(cfg) -> None:
    """
    Reading stats via procfs only holds the RCU lock, which is not an exclusive
    lock, make sure drivers can handle parallel reads of stats.
    """
    one = __run_inf_loop("cat /proc/net/dev")
    defer(one.kill)
    two = __run_inf_loop("cat /proc/net/dev")
    defer(two.kill)

    time.sleep(1)
    # Make sure the processes are running
    ksft_is(one.poll(), None)
    ksft_is(two.poll(), None)

    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
    time.sleep(2)
    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
    __stats_increase_sanely(rtstat1, rtstat2)
    # defers will kill the loops


@ksft_disruptive
def procfs_downup_hammer(cfg) -> None:
    """
    Reading stats via procfs only holds the RCU lock, drivers often try
    to sleep when reading the stats, or don't protect against races.
    """
    # Max out the queues, we'll flip between max and 1
    channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
    if channels['combined-count'] == 0:
        rx_type = 'rx'
    else:
        rx_type = 'combined'
    cur_queue_cnt = channels[f'{rx_type}-count']
    max_queue_cnt = channels[f'{rx_type}-max']

    cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
    defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")

    # Real test stats
    stats = __run_inf_loop("cat /proc/net/dev")
    defer(stats.kill)

    ipset = f"ip link set dev {cfg.ifname}"
    defer(ip, f"link set dev {cfg.ifname} up")
    # The "echo -n 1" lets us count iterations below
    updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
             f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
             f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
              "echo -n 1"
    updown = __run_inf_loop(updown)
    kill_updown = defer(updown.kill)

    time.sleep(1)
    # Make sure the processes are running
    ksft_is(stats.poll(), None)
    ksft_is(updown.poll(), None)

    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
    # We're looking for crashes, give it extra time
    time.sleep(9)
    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
    __stats_increase_sanely(rtstat1, rtstat2)

    kill_updown.exec()
    stdout, _ = updown.communicate(timeout=5)
    ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))


def main() -> None:
    with NetDrvEnv(__file__, queue_count=100) as cfg:
        ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
                  check_down],
                  check_down, procfs_hammer, procfs_downup_hammer],
                 args=(cfg, ))
    ksft_exit()

+5 −0
Original line number Diff line number Diff line
@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
        _fail("Check failed", a, "not in", b, comment)


def ksft_is(a, b, comment=""):
    if a is not b:
        _fail("Check failed", a, "is not", b, comment)


def ksft_ge(a, b, comment=""):
    if a < b:
        _fail("Check failed", a, "<", b, comment)