Commit 416e53e3 authored by Florian Westphal's avatar Florian Westphal
Browse files

netfilter: nft_set_pipapo_avx2: split lookup function in two parts



Split the main avx2 lookup function into a helper.

This is a preparation patch: followup change will use the new helper
from the insertion path if possible.  This greatly improves insertion
performance when avx2 is supported.

Reviewed-by: default avatarStefano Brivio <sbrivio@redhat.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
parent d11b2640
Loading
Loading
Loading
Loading
+77 −49
Original line number Diff line number Diff line
@@ -1133,56 +1133,35 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
}

/**
 * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
 * @net:	Network namespace
 * @set:	nftables API set representation
 * @key:	nftables API element representation containing key data
 * pipapo_get_avx2() - Lookup function for AVX2 implementation
 * @m:		Storage containing the set elements
 * @data:	Key data to be matched against existing elements
 * @genmask:	If set, check that element is active in given genmask
 * @tstamp:	Timestamp to check for expired elements
 *
 * For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
 *
 * This implementation exploits the repetitive characteristic of the algorithm
 * to provide a fast, vectorised version using the AVX2 SIMD instruction set.
 *
 * Return: true on match, false otherwise.
 * The caller must check that the FPU is usable.
 * This function must be called with BH disabled.
 *
 * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
 */
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
		       const u32 *key)
static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m,
					       const u8 *data, u8 genmask,
					       u64 tstamp)
{
	struct nft_pipapo *priv = nft_set_priv(set);
	const struct nft_set_ext *ext = NULL;
	struct nft_pipapo_scratch *scratch;
	u8 genmask = nft_genmask_cur(net);
	const struct nft_pipapo_match *m;
	const struct nft_pipapo_field *f;
	const u8 *rp = (const u8 *)key;
	unsigned long *res, *fill;
	bool map_index;
	int i;

	local_bh_disable();

	if (unlikely(!irq_fpu_usable())) {
		ext = nft_pipapo_lookup(net, set, key);

		local_bh_enable();
		return ext;
	}

	m = rcu_dereference(priv->match);

	/* Note that we don't need a valid MXCSR state for any of the
	 * operations we use here, so pass 0 as mask and spare a LDMXCSR
	 * instruction.
	 */
	kernel_fpu_begin_mask(0);

	scratch = *raw_cpu_ptr(m->scratch);
	if (unlikely(!scratch)) {
		kernel_fpu_end();
		local_bh_enable();
	if (unlikely(!scratch))
		return NULL;
	}

	map_index = scratch->map_index;

@@ -1191,6 +1170,12 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,

	pipapo_resmap_init_avx2(m, res);

	/* Note that we don't need a valid MXCSR state for any of the
	 * operations we use here, so pass 0 as mask and spare a LDMXCSR
	 * instruction.
	 */
	kernel_fpu_begin_mask(0);

	nft_pipapo_avx2_prepare();

next_match:
@@ -1200,7 +1185,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,

#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n)				\
		(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f,	\
							 ret, rp,	\
							 ret, data,	\
							 first, last))

		if (likely(f->bb == 8)) {
@@ -1216,7 +1201,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
				NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
			} else {
				ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
								  ret, rp,
								  ret, data,
								  first, last);
			}
		} else {
@@ -1232,7 +1217,7 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
				NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
			} else {
				ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f,
								  ret, rp,
								  ret, data,
								  first, last);
			}
		}
@@ -1240,29 +1225,72 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,

#undef NFT_SET_PIPAPO_AVX2_LOOKUP

		if (ret < 0)
			goto out;
		if (ret < 0) {
			scratch->map_index = map_index;
			kernel_fpu_end();
			return NULL;
		}

		if (last) {
			const struct nft_set_ext *e = &f->mt[ret].e->ext;
			struct nft_pipapo_elem *e;

			if (unlikely(nft_set_elem_expired(e) ||
				     !nft_set_elem_active(e, genmask)))
			e = f->mt[ret].e;
			if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
				     !nft_set_elem_active(&e->ext, genmask)))
				goto next_match;

			ext = e;
			goto out;
			scratch->map_index = map_index;
			kernel_fpu_end();
			return e;
		}

		map_index = !map_index;
		swap(res, fill);
		rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
		data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
	}

out:
	if (i % 2)
		scratch->map_index = !map_index;
	kernel_fpu_end();
	local_bh_enable();
	return NULL;
}

/**
 * nft_pipapo_avx2_lookup() - Dataplane frontend for AVX2 implementation
 * @net:	Network namespace
 * @set:	nftables API set representation
 * @key:	nftables API element representation containing key data
 *
 * This function is called from the data path.  It will search for
 * an element matching the given key in the current active copy using
 * the AVX2 routines if the fpu is usable or fall back to the generic
 * implementation of the algorithm otherwise.
 *
 * Return: nftables API extension pointer or NULL if no match.
 */
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
		       const u32 *key)
{
	struct nft_pipapo *priv = nft_set_priv(set);
	u8 genmask = nft_genmask_cur(net);
	const struct nft_pipapo_match *m;
	const u8 *rp = (const u8 *)key;
	const struct nft_pipapo_elem *e;

	local_bh_disable();

	if (unlikely(!irq_fpu_usable())) {
		const struct nft_set_ext *ext;

		ext = nft_pipapo_lookup(net, set, key);

		local_bh_enable();
		return ext;
	}

	m = rcu_dereference(priv->match);

	e = pipapo_get_avx2(m, rp, genmask, get_jiffies_64());
	local_bh_enable();

	return e ? &e->ext : NULL;
}