Commit 67231833 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Paolo Abeni
Browse files

openvswitch: Use nested-BH locking for ovs_pcpu_storage



ovs_pcpu_storage is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.
The data structure can be referenced recursive and there is a recursion
counter to avoid too many recursions.

Add a local_lock_t to the data structure and use
local_lock_nested_bh() for locking. Add an owner of the struct which is
the current task and acquire the lock only if the structure is not owned
by the current task.

Cc: Aaron Conole <aconole@redhat.com>
Cc: Eelco Chaudron <echaudro@redhat.com>
Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: dev@openvswitch.org
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: default avatarAaron Conole <aconole@redhat.com>
Link: https://patch.msgid.link/20250512092736.229935-9-bigeasy@linutronix.de


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 035fcdc4
Loading
Loading
Loading
Loading
+2 −29
Original line number Diff line number Diff line
@@ -39,15 +39,6 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"

struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;
	int actions_len;

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

#define MAX_L2_LEN	(VLAN_ETH_HLEN + 3 * MPLS_HLEN)
struct ovs_frag_data {
	unsigned long dst;
@@ -64,28 +55,10 @@ struct ovs_frag_data {

static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);

#define DEFERRED_ACTION_FIFO_SIZE 10
#define OVS_RECURSION_LIMIT 5
#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

struct action_flow_keys {
	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};

struct ovs_pcpu_storage {
	struct action_fifo action_fifos;
	struct action_flow_keys flow_keys;
	int exec_level;
DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
	.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};

static DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);

/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
 * space. Return NULL if out of key spaces.
 */
+24 −0
Original line number Diff line number Diff line
@@ -244,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
	struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
	const struct vport *p = OVS_CB(skb)->input_vport;
	struct datapath *dp = p->dp;
	struct sw_flow *flow;
	struct sw_flow_actions *sf_acts;
	struct dp_stats_percpu *stats;
	bool ovs_pcpu_locked = false;
	u64 *stats_counter;
	u32 n_mask_hit;
	u32 n_cache_hit;
@@ -290,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)

	ovs_flow_stats_update(flow, key->tp.flags, skb);
	sf_acts = rcu_dereference(flow->sf_acts);
	/* This path can be invoked recursively: Use the current task to
	 * identify recursive invocation - the lock must be acquired only once.
	 * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
	 * Limit the locking to RT to avoid assigning `owner' if it can be
	 * avoided.
	 */
	if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
		local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
		ovs_pcpu->owner = current;
		ovs_pcpu_locked = true;
	}

	error = ovs_execute_actions(dp, skb, sf_acts, key);
	if (unlikely(error))
		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
				    ovs_dp_name(dp), error);
	if (ovs_pcpu_locked) {
		ovs_pcpu->owner = NULL;
		local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
	}

	stats_counter = &stats->n_hit;

@@ -671,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
	sf_acts = rcu_dereference(flow->sf_acts);

	local_bh_disable();
	local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		this_cpu_write(ovs_pcpu_storage.owner, current);
	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		this_cpu_write(ovs_pcpu_storage.owner, NULL);
	local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
	local_bh_enable();
	rcu_read_unlock();

+33 −0
Original line number Diff line number Diff line
@@ -173,6 +173,39 @@ struct ovs_net {
	bool xt_label;
};

struct deferred_action {
	struct sk_buff *skb;
	const struct nlattr *actions;
	int actions_len;

	/* Store pkt_key clone when creating deferred action. */
	struct sw_flow_key pkt_key;
};

#define DEFERRED_ACTION_FIFO_SIZE 10
#define OVS_RECURSION_LIMIT 5
#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)

struct action_fifo {
	int head;
	int tail;
	/* Deferred action fifo queue storage. */
	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
};

struct action_flow_keys {
	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
};

struct ovs_pcpu_storage {
	struct action_fifo action_fifos;
	struct action_flow_keys flow_keys;
	int exec_level;
	struct task_struct *owner;
	local_lock_t bh_lock;
};
DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);

/**
 * enum ovs_pkt_hash_types - hash info to include with a packet
 * to send to userspace.