Commit c0518571 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-lag-table-allocation'



Petr Machata says:

====================
mlxsw: Move allocation of LAG table to the driver

PGT is an in-HW table that maps addresses to sets of ports. Then when some
HW process needs a set of ports as an argument, instead of embedding the
actual set in the dynamic configuration, what gets configured is the
address referencing the set. The HW then works with the appropriate PGT
entry.

Within the PGT is placed a LAG table. That is a contiguous block of PGT
memory where each entry describes which ports are members of the
corresponding LAG port.

The PGT is split to two parts: one managed by the FW, and one managed by
the driver. Historically, the FW part included also the LAG table, referred
to as FW LAG mode. Giving the responsibility for placement of the LAG table
to the driver, referred to as SW LAG mode, makes the whole system more
flexible. The FW currently supports both FW and SW LAG modes. To shed
complexity, the FW should in the future only support SW LAG mode.

Hence this patchset, where support for placement of LAG is added to mlxsw.

There are FW versions out there that do not support SW LAG mode, and on
Spectrum-1 in particular, there is no plan to support it at all. mlxsw will
therefore have to support both modes of operation.

Another aspect is that at least on Spectrum-1, there are FW versions out
there that claim to support driver-placed LAG table, but then reject or
ignore configurations enabling the same. The driver thus has to have a say
in whether an attempt to configure SW LAG mode should even be done.

The feature is therefore expressed in terms of "does the driver prefer SW
LAG mode?", and "what LAG mode the PCI module managed to configure the FW
with". This is unlike current flood mode configuration, where the driver
can give a strict value, and that's what gets configured. But it gives a
chance to the driver to determine whether LAG mode should be enabled at
all.

The "does the driver prefer SW LAG mode?" bit is expressed as a boolean
lag_mode_prefer_sw. The reason for this is largely another feature that
will be introduced in a follow-up patchset: support for CFF flood mode. The
driver currently requires that the FW be configured with what is called
controlled flood mode. But on capable systems, CFF would be preferred. So
there are two values in flight: the preferred flood mode, and the fallback.
This could be expressed with an array of flood modes ordered by preference,
but that looks like an overkill in comparison. This flag/value model is
then reused for LAG mode as well, except the fallback value is absent and
implied to be FW, because there are no other values to choose from.

The patchset progresses as follows:

- Patches #1 to #5 adjust reg.h and cmd.h with new register fields,
  constants and remarks.

- Patches #6 and #7 add the ability to request SW LAG mode and to query the
  LAG mode that was actually negotiated. This is where the abovementioned
  lag_mode_prefer_sw flag is added.

- Patches #7 to #9 generalize PGT allocations to make it possible to
  allocate the LAG table, which is done in patch #10.

- In patch #11, toggle lag_mode_prefer_sw on Spectrum-2 and above, which
  makes the newly-added code live.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fd533a7a b46c1f3f
Loading
Loading
Loading
Loading
+35 −8
Original line number Diff line number Diff line
@@ -276,6 +276,12 @@ MLXSW_ITEM32(cmd_mbox, query_fw, fw_month, 0x14, 8, 8);
 */
MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8);

/* cmd_mbox_query_fw_lag_mode_support
 * 0: CONFIG_PROFILE.lag_mode is not supported by FW
 * 1: CONFIG_PROFILE.lag_mode is supported by FW
 */
MLXSW_ITEM32(cmd_mbox, query_fw, lag_mode_support, 0x18, 1, 1);

/* cmd_mbox_query_fw_clr_int_base_offset
 * Clear Interrupt register's offset from clr_int_bar register
 * in PCI address space.
@@ -659,42 +665,48 @@ MLXSW_ITEM32(cmd_mbox, config_profile,
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);

/* cmd_mbox_config_set_ubridge
/* cmd_mbox_config_profile_set_ubridge
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_ubridge, 0x0C, 22, 1);

/* cmd_mbox_config_set_kvd_linear_size
/* cmd_mbox_config_profile_set_kvd_linear_size
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);

/* cmd_mbox_config_set_kvd_hash_single_size
/* cmd_mbox_config_profile_set_kvd_hash_single_size
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);

/* cmd_mbox_config_set_kvd_hash_double_size
/* cmd_mbox_config_profile_set_kvd_hash_double_size
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);

/* cmd_mbox_config_set_cqe_version
/* cmd_mbox_config_profile_set_cqe_version
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1);

/* cmd_mbox_config_set_cqe_time_stamp_type
/* cmd_mbox_config_profile_set_cqe_time_stamp_type
 * Capability bit. Setting a bit to 1 configures the profile
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_time_stamp_type, 0x08, 2, 1);

/* cmd_mbox_config_profile_set_lag_mode
 * Capability bit. Setting a bit to 1 configures the lag_mode
 * according to the mailbox contents.
 */
MLXSW_ITEM32(cmd_mbox, config_profile, set_lag_mode, 0x08, 7, 1);

/* cmd_mbox_config_profile_max_vepa_channels
 * Maximum number of VEPA channels per port (0 through 16)
 * 0 - multi-channel VEPA is disabled
@@ -840,6 +852,21 @@ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
 */
MLXSW_ITEM32(cmd_mbox, config_profile, ubridge, 0x50, 4, 1);

enum mlxsw_cmd_mbox_config_profile_lag_mode {
	/* FW manages PGT LAG table */
	MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_FW,
	/* SW manages PGT LAG table */
	MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW,
};

/* cmd_mbox_config_profile_lag_mode
 * LAG mode
 * Configured if set_lag_mode is set
 * Supported from Spectrum-2 and above.
 * Supported only when ubridge = 1
 */
MLXSW_ITEM32(cmd_mbox, config_profile, lag_mode, 0x50, 3, 1);

/* cmd_mbox_config_kvd_linear_size
 * KVD Linear Size
 * Valid for Spectrum only
@@ -847,7 +874,7 @@ MLXSW_ITEM32(cmd_mbox, config_profile, ubridge, 0x50, 4, 1);
 */
MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);

/* cmd_mbox_config_kvd_hash_single_size
/* cmd_mbox_config_profile_kvd_hash_single_size
 * KVD Hash single-entries size
 * Valid for Spectrum only
 * Allowed values are 128*N where N=0 or higher
@@ -856,7 +883,7 @@ MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
 */
MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);

/* cmd_mbox_config_kvd_hash_double_size
/* cmd_mbox_config_profile_kvd_hash_double_size
 * KVD Hash double-entries size (units of single-size entries)
 * Valid for Spectrum only
 * Allowed values are 128*N where N=0 or higher
+7 −0
Original line number Diff line number Diff line
@@ -204,6 +204,13 @@ int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag)
}
EXPORT_SYMBOL(mlxsw_core_max_lag);

enum mlxsw_cmd_mbox_config_profile_lag_mode
mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core)
{
	return mlxsw_core->bus->lag_mode(mlxsw_core->bus_priv);
}
EXPORT_SYMBOL(mlxsw_core_lag_mode);

void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core)
{
	return mlxsw_core->driver_priv;
+4 −0
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ struct mlxsw_fw_rev;
unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);

int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag);
enum mlxsw_cmd_mbox_config_profile_lag_mode
mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core);

void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);

@@ -335,6 +337,7 @@ struct mlxsw_config_profile {
	u8	kvd_hash_single_parts;
	u8	kvd_hash_double_parts;
	u8	cqe_time_stamp_type;
	bool	lag_mode_prefer_sw;
	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
};

@@ -485,6 +488,7 @@ struct mlxsw_bus {
	u32 (*read_frc_l)(void *bus_priv);
	u32 (*read_utc_sec)(void *bus_priv);
	u32 (*read_utc_nsec)(void *bus_priv);
	enum mlxsw_cmd_mbox_config_profile_lag_mode (*lag_mode)(void *bus_priv);
	u8 features;
};

+25 −3
Original line number Diff line number Diff line
@@ -105,6 +105,8 @@ struct mlxsw_pci {
	u64 free_running_clock_offset;
	u64 utc_sec_offset;
	u64 utc_nsec_offset;
	bool lag_mode_support;
	enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode;
	struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
	u32 doorbell_offset;
	struct mlxsw_core *core;
@@ -1312,6 +1314,16 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
					profile->cqe_time_stamp_type);
	}

	if (profile->lag_mode_prefer_sw && mlxsw_pci->lag_mode_support) {
		enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode =
			MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_SW;

		mlxsw_cmd_mbox_config_profile_set_lag_mode_set(mbox, 1);
		mlxsw_cmd_mbox_config_profile_lag_mode_set(mbox, lag_mode);
		mlxsw_pci->lag_mode = lag_mode;
	} else {
		mlxsw_pci->lag_mode = MLXSW_CMD_MBOX_CONFIG_PROFILE_LAG_MODE_FW;
	}
	return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
}

@@ -1587,6 +1599,8 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
	mlxsw_pci->utc_nsec_offset =
		mlxsw_cmd_mbox_query_fw_utc_nsec_offset_get(mbox);

	mlxsw_pci->lag_mode_support =
		mlxsw_cmd_mbox_query_fw_lag_mode_support_get(mbox);
	num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
	err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
	if (err)
@@ -1619,9 +1633,8 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
	if (err)
		goto err_config_profile;

	/* Some resources depend on unified bridge model, which is configured
	 * as part of config_profile. Query the resources again to get correct
	 * values.
	/* Some resources depend on details of config_profile, such as unified
	 * bridge model. Query the resources again to get correct values.
	 */
	err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
	if (err)
@@ -1896,6 +1909,14 @@ static u32 mlxsw_pci_read_utc_nsec(void *bus_priv)
	return mlxsw_pci_read32_off(mlxsw_pci, mlxsw_pci->utc_nsec_offset);
}

static enum mlxsw_cmd_mbox_config_profile_lag_mode
mlxsw_pci_lag_mode(void *bus_priv)
{
	struct mlxsw_pci *mlxsw_pci = bus_priv;

	return mlxsw_pci->lag_mode;
}

static const struct mlxsw_bus mlxsw_pci_bus = {
	.kind			= "pci",
	.init			= mlxsw_pci_init,
@@ -1907,6 +1928,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
	.read_frc_l		= mlxsw_pci_read_frc_l,
	.read_utc_sec		= mlxsw_pci_read_utc_sec,
	.read_utc_nsec		= mlxsw_pci_read_utc_nsec,
	.lag_mode		= mlxsw_pci_lag_mode,
	.features		= MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
};

+7 −7
Original line number Diff line number Diff line
@@ -38,18 +38,18 @@ static const struct mlxsw_reg_info mlxsw_reg_##_name = { \

MLXSW_REG_DEFINE(sgcr, MLXSW_REG_SGCR_ID, MLXSW_REG_SGCR_LEN);

/* reg_sgcr_llb
 * Link Local Broadcast (Default=0)
 * When set, all Link Local packets (224.0.0.X) will be treated as broadcast
 * packets and ignore the IGMP snooping entries.
/* reg_sgcr_lag_lookup_pgt_base
 * Base address used for lookup in PGT table
 * Supported when CONFIG_PROFILE.lag_mode = 1
 * Note: when IGCR.ddd_lag_mode=0, the address shall be aligned to 8 entries.
 * Access: RW
 */
MLXSW_ITEM32(reg, sgcr, llb, 0x04, 0, 1);
MLXSW_ITEM32(reg, sgcr, lag_lookup_pgt_base, 0x0C, 0, 16);

static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb)
static inline void mlxsw_reg_sgcr_pack(char *payload, u16 lag_lookup_pgt_base)
{
	MLXSW_REG_ZERO(sgcr, payload);
	mlxsw_reg_sgcr_llb_set(payload, !!llb);
	mlxsw_reg_sgcr_lag_lookup_pgt_base_set(payload, lag_lookup_pgt_base);
}

/* SPAD - Switch Physical Address Register
Loading