Commit 3b4cf29b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-rps-misc'



Eric Dumazet says:

====================
net: rps: misc changes

Make RPS/RFS a bit more efficient with better cache locality
and heuristics.

Aso shrink include/linux/netdevice.h a bit.

v2: fixed a build issue in patch 6/8 with CONFIG_RPS=n
    (Jakub and kernel build bots)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d823265d d3ae5f46
Loading
Loading
Loading
Loading
+3 −35
Original line number Diff line number Diff line
@@ -3204,6 +3204,7 @@ struct softnet_data {
	struct softnet_data	*rps_ipi_list;
#endif

	unsigned int		received_rps;
	bool			in_net_rx_action;
	bool			in_napi_threaded_poll;

@@ -3236,11 +3237,11 @@ struct softnet_data {
	unsigned int		cpu;
	unsigned int		input_queue_tail;
#endif
	unsigned int		received_rps;
	unsigned int		dropped;
	struct sk_buff_head	input_pkt_queue;
	struct napi_struct	backlog;

	atomic_t		dropped ____cacheline_aligned_in_smp;

	/* Another possibly contended cache line */
	spinlock_t		defer_lock ____cacheline_aligned_in_smp;
	int			defer_count;
@@ -3249,21 +3250,6 @@ struct softnet_data {
	call_single_data_t	defer_csd;
};

static inline void input_queue_head_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
	sd->input_queue_head++;
#endif
}

static inline void input_queue_tail_incr_save(struct softnet_data *sd,
					      unsigned int *qtail)
{
#ifdef CONFIG_RPS
	*qtail = ++sd->input_queue_tail;
#endif
}

DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);

static inline int dev_recursion_level(void)
@@ -3271,24 +3257,6 @@ static inline int dev_recursion_level(void)
	return this_cpu_read(softnet_data.xmit.recursion);
}

#define XMIT_RECURSION_LIMIT	8
static inline bool dev_xmit_recursion(void)
{
	return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
			XMIT_RECURSION_LIMIT);
}

static inline void dev_xmit_recursion_inc(void)
{
	__this_cpu_inc(softnet_data.xmit.recursion);
}

static inline void dev_xmit_recursion_dec(void)
{
	__this_cpu_dec(softnet_data.xmit.recursion);
}

void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);

+28 −0
Original line number Diff line number Diff line
@@ -122,4 +122,32 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}

static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
	return ++sd->input_queue_tail;
#else
	return 0;
#endif
}

static inline void rps_input_queue_tail_save(u32 *dest, u32 tail)
{
#ifdef CONFIG_RPS
	WRITE_ONCE(*dest, tail);
#endif
}

static inline void rps_input_queue_head_add(struct softnet_data *sd, int val)
{
#ifdef CONFIG_RPS
	WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val);
#endif
}

static inline void rps_input_queue_head_incr(struct softnet_data *sd)
{
	rps_input_queue_head_add(sd, 1);
}

#endif /* _NET_RPS_H */
+42 −31
Original line number Diff line number Diff line
@@ -4528,7 +4528,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
	out:
#endif
		rflow->last_qtail =
			per_cpu(softnet_data, next_cpu).input_queue_head;
			READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head);
	}

	rflow->cpu = next_cpu;
@@ -4610,8 +4610,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
		 */
		if (unlikely(tcpu != next_cpu) &&
		    (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
		      rflow->last_qtail)) >= 0)) {
		     ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
		      READ_ONCE(rflow->last_qtail))) >= 0)) {
			tcpu = next_cpu;
			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
		}
@@ -4665,8 +4665,8 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
		rflow = &flow_table->flows[flow_id];
		cpu = READ_ONCE(rflow->cpu);
		if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
			   rflow->last_qtail) <
		    ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
			   READ_ONCE(rflow->last_qtail)) <
		     (int)(10 * flow_table->mask)))
			expire = false;
	}
@@ -4800,37 +4800,45 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
	struct softnet_data *sd;
	unsigned long flags;
	unsigned int qlen;
	int max_backlog;
	u32 tail;

	reason = SKB_DROP_REASON_NOT_SPECIFIED;
	reason = SKB_DROP_REASON_DEV_READY;
	if (!netif_running(skb->dev))
		goto bad_dev;

	reason = SKB_DROP_REASON_CPU_BACKLOG;
	sd = &per_cpu(softnet_data, cpu);

	qlen = skb_queue_len_lockless(&sd->input_pkt_queue);
	max_backlog = READ_ONCE(net_hotdata.max_backlog);
	if (unlikely(qlen > max_backlog))
		goto cpu_backlog_drop;
	backlog_lock_irq_save(sd, &flags);
	if (!netif_running(skb->dev))
		goto drop;
	qlen = skb_queue_len(&sd->input_pkt_queue);
	if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
	    !skb_flow_limit(skb, qlen)) {
		if (qlen) {
enqueue:
	if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) {
		if (!qlen) {
			/* Schedule NAPI for backlog device. We can use
			 * non atomic operation as we own the queue lock.
			 */
			if (!__test_and_set_bit(NAPI_STATE_SCHED,
						&sd->backlog.state))
				napi_schedule_rps(sd);
		}
		__skb_queue_tail(&sd->input_pkt_queue, skb);
			input_queue_tail_incr_save(sd, qtail);
		tail = rps_input_queue_tail_incr(sd);
		backlog_unlock_irq_restore(sd, &flags);
			return NET_RX_SUCCESS;
		}

		/* Schedule NAPI for backlog device
		 * We can use non atomic operation since we own the queue lock
		 */
		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
			napi_schedule_rps(sd);
		goto enqueue;
		/* save the tail outside of the critical section */
		rps_input_queue_tail_save(qtail, tail);
		return NET_RX_SUCCESS;
	}
	reason = SKB_DROP_REASON_CPU_BACKLOG;

drop:
	sd->dropped++;
	backlog_unlock_irq_restore(sd, &flags);

cpu_backlog_drop:
	atomic_inc(&sd->dropped);
bad_dev:
	dev_core_stats_rx_dropped_inc(skb->dev);
	kfree_skb_reason(skb, reason);
	return NET_RX_DROP;
@@ -5900,7 +5908,7 @@ static void flush_backlog(struct work_struct *work)
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
			__skb_unlink(skb, &sd->input_pkt_queue);
			dev_kfree_skb_irq(skb);
			input_queue_head_incr(sd);
			rps_input_queue_head_incr(sd);
		}
	}
	backlog_unlock_irq_enable(sd);
@@ -5909,7 +5917,7 @@ static void flush_backlog(struct work_struct *work)
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
			__skb_unlink(skb, &sd->process_queue);
			kfree_skb(skb);
			input_queue_head_incr(sd);
			rps_input_queue_head_incr(sd);
		}
	}
	local_bh_enable();
@@ -6037,9 +6045,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
			rcu_read_lock();
			__netif_receive_skb(skb);
			rcu_read_unlock();
			input_queue_head_incr(sd);
			if (++work >= quota)
			if (++work >= quota) {
				rps_input_queue_head_add(sd, work);
				return work;
			}

		}

@@ -6062,6 +6071,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
		backlog_unlock_irq_enable(sd);
	}

	if (work)
		rps_input_queue_head_add(sd, work);
	return work;
}

@@ -11451,11 +11462,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
	/* Process offline CPU's input_pkt_queue */
	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
		netif_rx(skb);
		input_queue_head_incr(oldsd);
		rps_input_queue_head_incr(oldsd);
	}
	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
		netif_rx(skb);
		input_queue_head_incr(oldsd);
		rps_input_queue_head_incr(oldsd);
	}

	return 0;
+20 −3
Original line number Diff line number Diff line
@@ -4,11 +4,9 @@

#include <linux/types.h>
#include <linux/rwsem.h>
#include <linux/netdevice.h>

struct net;
struct net_device;
struct netdev_bpf;
struct netdev_phys_item_id;
struct netlink_ext_ack;
struct cpumask;

@@ -150,4 +148,23 @@ static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif

struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);

#define XMIT_RECURSION_LIMIT	8
static inline bool dev_xmit_recursion(void)
{
	return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
			XMIT_RECURSION_LIMIT);
}

static inline void dev_xmit_recursion_inc(void)
{
	__this_cpu_inc(softnet_data.xmit.recursion);
}

static inline void dev_xmit_recursion_dec(void)
{
	__this_cpu_dec(softnet_data.xmit.recursion);
}

#endif
+2 −1
Original line number Diff line number Diff line
@@ -144,7 +144,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
	seq_printf(seq,
		   "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
		   "%08x %08x\n",
		   sd->processed, sd->dropped, sd->time_squeeze, 0,
		   sd->processed, atomic_read(&sd->dropped),
		   sd->time_squeeze, 0,
		   0, 0, 0, 0, /* was fastroute */
		   0,	/* was cpu_collision */
		   sd->received_rps, flow_limit_count,