Commit 5bd6252b authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'netem-bug-fixes'

Stephen Hemminger says:

====================
netem: bug fixes

These bugs were found when doing AI-assisted review of sch_netem.c
during investigation of the packet duplication recursion problem
addressed in Jamal's series.

The fixes cover:

 - probability gaps in the 4-state Markov loss model
 - queue limit not accounting for reordered packets
 - PRNG reseeded on every tc change, breaking reproducibility
 - slot configuration not validated (inverted ranges, negative
   delays, negative limits)
 - slot delay arithmetic overflow for ranges above ~2.1 seconds
 - negative latency and jitter wrapping to huge time_to_send
   values via u64 arithmetic
====================

Link: https://patch.msgid.link/20260418032027.900913-1-stephen@networkplumber.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 35eaa6d8 90be9fed
Loading
Loading
Loading
Loading
+64 −12
Original line number Diff line number Diff line
@@ -227,10 +227,10 @@ static bool loss_4state(struct netem_sched_data *q)
		if (rnd < clg->a4) {
			clg->state = LOST_IN_GAP_PERIOD;
			return true;
		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
		} else if (rnd < clg->a1 + clg->a4) {
			clg->state = LOST_IN_BURST_PERIOD;
			return true;
		} else if (clg->a1 + clg->a4 < rnd) {
		} else {
			clg->state = TX_IN_GAP_PERIOD;
		}

@@ -247,9 +247,9 @@ static bool loss_4state(struct netem_sched_data *q)
	case LOST_IN_BURST_PERIOD:
		if (rnd < clg->a3)
			clg->state = TX_IN_BURST_PERIOD;
		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
		else if (rnd < clg->a2 + clg->a3) {
			clg->state = TX_IN_GAP_PERIOD;
		} else if (clg->a2 + clg->a3 < rnd) {
		} else {
			clg->state = LOST_IN_BURST_PERIOD;
			return true;
		}
@@ -524,7 +524,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
				1 << get_random_u32_below(8);
	}

	if (unlikely(q->t_len >= sch->limit)) {
	if (unlikely(sch->q.qlen >= sch->limit)) {
		/* re-link segs, so that qdisc_drop_all() frees them all */
		skb->next = segs;
		qdisc_drop_all(skb, sch, to_free);
@@ -659,9 +659,8 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)

	if (!q->slot_dist)
		next_delay = q->slot_config.min_delay +
				(get_random_u32() *
				 (q->slot_config.max_delay -
				  q->slot_config.min_delay) >> 32);
			mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay,
					get_random_u32(), 32);
	else
		next_delay = tabledist(q->slot_config.dist_delay,
				       (s32)(q->slot_config.dist_jitter),
@@ -827,6 +826,39 @@ static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
	return 0;
}

static int validate_time(const struct nlattr *attr, const char *name,
			 struct netlink_ext_ack *extack)
{
	if (nla_get_s64(attr) < 0) {
		NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name);
		return -EINVAL;
	}
	return 0;
}

static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack)
{
	const struct tc_netem_slot *c = nla_data(attr);

	if (c->min_delay < 0 || c->max_delay < 0) {
		NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay");
		return -EINVAL;
	}
	if (c->min_delay > c->max_delay) {
		NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay");
		return -EINVAL;
	}
	if (c->dist_delay < 0 || c->dist_jitter < 0) {
		NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay");
		return -EINVAL;
	}
	if (c->max_packets < 0 || c->max_bytes < 0) {
		NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit");
		return -EINVAL;
	}
	return 0;
}

static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
{
	const struct tc_netem_slot *c = nla_data(attr);
@@ -1040,6 +1072,24 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
			goto table_free;
	}

	if (tb[TCA_NETEM_SLOT]) {
		ret = validate_slot(tb[TCA_NETEM_SLOT], extack);
		if (ret)
			goto table_free;
	}

	if (tb[TCA_NETEM_LATENCY64]) {
		ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack);
		if (ret)
			goto table_free;
	}

	if (tb[TCA_NETEM_JITTER64]) {
		ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack);
		if (ret)
			goto table_free;
	}

	sch_tree_lock(sch);
	/* backup q->clg and q->loss_model */
	old_clg = q->clg;
@@ -1112,11 +1162,10 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
	/* capping jitter to the range acceptable by tabledist() */
	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);

	if (tb[TCA_NETEM_PRNG_SEED])
	if (tb[TCA_NETEM_PRNG_SEED]) {
		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
	else
		q->prng.seed = get_random_u64();
		prandom_seed_state(&q->prng.prng_state, q->prng.seed);
	}

unlock:
	sch_tree_unlock(sch);
@@ -1139,6 +1188,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt,
		return -EINVAL;

	q->loss_model = CLG_RANDOM;
	q->prng.seed = get_random_u64();
	prandom_seed_state(&q->prng.prng_state, q->prng.seed);

	ret = netem_change(sch, opt, extack);
	if (ret)
		pr_info("netem: change failed\n");