Commit 566e8f10 authored by Carolina Jubran's avatar Carolina Jubran Committed by Jakub Kicinski
Browse files

devlink: Extend devlink rate API with traffic classes bandwidth management



Introduce support for specifying relative bandwidth shares between
traffic classes (TC) in the devlink-rate API. This new option allows
users to allocate bandwidth across multiple traffic classes in a
single command.

This feature provides a more granular control over traffic management,
especially for scenarios requiring Enhanced Transmission Selection.

Users can now define a relative bandwidth share for each traffic class.
For example, assigning share values of 20 to TC0 (TCP/UDP) and 80 to TC5
(RoCE) will result in TC0 receiving 20% and TC5 receiving 80% of the
total bandwidth. The actual percentage each class receives depends on
the ratio of its share value to the sum of all shares.

Example:
DEV=pci/0000:08:00.0

$ devlink port function rate add $DEV/vfs_group tx_share 10Gbit \
  tx_max 50Gbit tc-bw 0:20 1:0 2:0 3:0 4:0 5:80 6:0 7:0

$ devlink port function rate set $DEV/vfs_group \
  tc-bw 0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0

Example usage with ynl:

./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \
  --do rate-set --json '{
  "bus-name": "pci",
  "dev-name": "0000:08:00.0",
  "port-index": 1,
  "rate-tc-bws": [
    {"rate-tc-index": 0, "rate-tc-bw": 50},
    {"rate-tc-index": 1, "rate-tc-bw": 50},
    {"rate-tc-index": 2, "rate-tc-bw": 0},
    {"rate-tc-index": 3, "rate-tc-bw": 0},
    {"rate-tc-index": 4, "rate-tc-bw": 0},
    {"rate-tc-index": 5, "rate-tc-bw": 0},
    {"rate-tc-index": 6, "rate-tc-bw": 0},
    {"rate-tc-index": 7, "rate-tc-bw": 0}
  ]
}'

./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \
  --do rate-get --json '{
  "bus-name": "pci",
  "dev-name": "0000:08:00.0",
  "port-index": 1
}'

output for rate-get:
{'bus-name': 'pci',
 'dev-name': '0000:08:00.0',
 'port-index': 1,
 'rate-tc-bws': [{'rate-tc-bw': 50, 'rate-tc-index': 0},
                 {'rate-tc-bw': 50, 'rate-tc-index': 1},
                 {'rate-tc-bw': 0, 'rate-tc-index': 2},
                 {'rate-tc-bw': 0, 'rate-tc-index': 3},
                 {'rate-tc-bw': 0, 'rate-tc-index': 4},
                 {'rate-tc-bw': 0, 'rate-tc-index': 5},
                 {'rate-tc-bw': 0, 'rate-tc-index': 6},
                 {'rate-tc-bw': 0, 'rate-tc-index': 7}],
 'rate-tx-max': 0,
 'rate-tx-priority': 0,
 'rate-tx-share': 0,
 'rate-tx-weight': 0,
 'rate-type': 'leaf'}

Signed-off-by: default avatarCarolina Jubran <cjubran@nvidia.com>
Reviewed-by: default avatarCosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: default avatarJiri Pirko <jiri@nvidia.com>
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarMark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20250629142138.361537-3-mbloch@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 42401c42
Loading
Loading
Loading
Loading
+31 −1
Original line number Diff line number Diff line
@@ -224,6 +224,10 @@ definitions:
        value: 10
      -
        name: binary
  -
    name: rate-tc-index-max
    type: const
    value: 7

attribute-sets:
  -
@@ -844,7 +848,23 @@ attribute-sets:
      -
        name: region-direct
        type: flag

      -
        name: rate-tc-bws
        type: nest
        multi-attr: true
        nested-attributes: dl-rate-tc-bws
      -
        name: rate-tc-index
        type: u8
        checks:
          max: rate-tc-index-max
      -
        name: rate-tc-bw
        type: u32
        doc: |
             Specifies the bandwidth share assigned to the Traffic Class.
             The bandwidth for the traffic class is determined
             in proportion to the sum of the shares of all configured classes.
  -
    name: dl-dev-stats
    subset-of: devlink
@@ -1249,6 +1269,14 @@ attribute-sets:
      -
        name: flash
        type: flag
  -
    name: dl-rate-tc-bws
    subset-of: devlink
    attributes:
      -
        name: rate-tc-index
      -
        name: rate-tc-bw

operations:
  enum-model: directional
@@ -2176,6 +2204,7 @@ operations:
            - rate-tx-priority
            - rate-tx-weight
            - rate-parent-node-name
            - rate-tc-bws

    -
      name: rate-new
@@ -2196,6 +2225,7 @@ operations:
            - rate-tx-priority
            - rate-tx-weight
            - rate-parent-node-name
            - rate-tc-bws

    -
      name: rate-del
+8 −0
Original line number Diff line number Diff line
@@ -418,6 +418,14 @@ API allows to configure following rate object's parameters:
  to all node children limits. ``tx_max`` is an upper limit for children.
  ``tx_share`` is a total bandwidth distributed among children.

``tc_bw``
  Allow users to set the bandwidth allocation per traffic class on rate
  objects. This enables fine-grained QoS configurations by assigning a relative
  share value to each traffic class. The bandwidth is distributed in proportion
  to the share value for each class, relative to the sum of all shares.
  When applied to a non-leaf node, tc_bw determines how bandwidth is shared
  among its child elements.

``tx_priority`` and ``tx_weight`` can be used simultaneously. In that case
nodes with the same priority form a WFQ subgroup in the sibling group
and arbitration among them is based on assigned weights.
+8 −0
Original line number Diff line number Diff line
@@ -118,6 +118,8 @@ struct devlink_rate {

	u32 tx_priority;
	u32 tx_weight;

	u32 tc_bw[DEVLINK_RATE_TCS_MAX];
};

struct devlink_port {
@@ -1486,6 +1488,9 @@ struct devlink_ops {
					 u32 tx_priority, struct netlink_ext_ack *extack);
	int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
				       u32 tx_weight, struct netlink_ext_ack *extack);
	int (*rate_leaf_tc_bw_set)(struct devlink_rate *devlink_rate,
				   void *priv, u32 *tc_bw,
				   struct netlink_ext_ack *extack);
	int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
				      u64 tx_share, struct netlink_ext_ack *extack);
	int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
@@ -1494,6 +1499,9 @@ struct devlink_ops {
					 u32 tx_priority, struct netlink_ext_ack *extack);
	int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
				       u32 tx_weight, struct netlink_ext_ack *extack);
	int (*rate_node_tc_bw_set)(struct devlink_rate *devlink_rate,
				   void *priv, u32 *tc_bw,
				   struct netlink_ext_ack *extack);
	int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
			     struct netlink_ext_ack *extack);
	int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
+9 −0
Original line number Diff line number Diff line
@@ -221,6 +221,11 @@ enum devlink_port_flavour {
				      */
};

/* IEEE 802.1Qaz standard supported values. */

#define DEVLINK_RATE_TCS_MAX 8
#define DEVLINK_RATE_TC_INDEX_MAX (DEVLINK_RATE_TCS_MAX - 1)

enum devlink_rate_type {
	DEVLINK_RATE_TYPE_LEAF,
	DEVLINK_RATE_TYPE_NODE,
@@ -629,6 +634,10 @@ enum devlink_attr {

	DEVLINK_ATTR_REGION_DIRECT,		/* flag */

	DEVLINK_ATTR_RATE_TC_BWS,		/* nested */
	DEVLINK_ATTR_RATE_TC_INDEX,		/* u8 */
	DEVLINK_ATTR_RATE_TC_BW,		/* u32 */

	/* Add new attributes above here, update the spec in
	 * Documentation/netlink/specs/devlink.yaml and re-generate
	 * net/devlink/netlink_gen.c.
+11 −4
Original line number Diff line number Diff line
@@ -45,6 +45,11 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
	[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
};

const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = {
	[DEVLINK_ATTR_RATE_TC_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX),
	[DEVLINK_ATTR_RATE_TC_BW] = { .type = NLA_U32, },
};

const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
	[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
};
@@ -523,7 +528,7 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
};

/* DEVLINK_CMD_RATE_SET - do */
static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -532,10 +537,11 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W
	[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
	[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};

/* DEVLINK_CMD_RATE_NEW - do */
static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -544,6 +550,7 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W
	[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
	[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
	[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};

/* DEVLINK_CMD_RATE_DEL - do */
@@ -1191,7 +1198,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
		.doit		= devlink_nl_rate_set_doit,
		.post_doit	= devlink_nl_post_doit,
		.policy		= devlink_rate_set_nl_policy,
		.maxattr	= DEVLINK_ATTR_RATE_TX_WEIGHT,
		.maxattr	= DEVLINK_ATTR_RATE_TC_BWS,
		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
	},
	{
@@ -1201,7 +1208,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
		.doit		= devlink_nl_rate_new_doit,
		.post_doit	= devlink_nl_post_doit,
		.policy		= devlink_rate_new_nl_policy,
		.maxattr	= DEVLINK_ATTR_RATE_TX_WEIGHT,
		.maxattr	= DEVLINK_ATTR_RATE_TC_BWS,
		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
	},
	{
Loading