Commit 12cc5240 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

 - Removal of dead code (TT mode leftovers, etc)

 - Fixes for the network vector driver

 - Fixes for time-travel mode

* tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
  um: fix time-travel syscall scheduling hack
  um: Remove outdated asm/sysrq.h header
  um: Remove the declaration of user_thread function
  um: Remove the call to SUBARCH_EXECVE1 macro
  um: Remove unused mm_fd field from mm_id
  um: Remove unused fields from thread_struct
  um: Remove the redundant newpage check in update_pte_range
  um: Remove unused kpte_clear_flush macro
  um: Remove obsoleted declaration for execute_syscall_skas
  user_mode_linux_howto_v2: add VDE vector support in doc
  vector_user: add VDE support
  um: remove ARCH_NO_PREEMPT_DYNAMIC
  um: vector: Fix NAPI budget handling
  um: vector: Replace locks guarding queue depth with atomics
  um: remove variable stack array in os_rcv_fd_msg()
parents 0c33037c 381d2f95
Loading
Loading
Loading
Loading
+37 −0
Original line number Diff line number Diff line
@@ -217,6 +217,8 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+
| fd        | vector | dependent on fd type               | varies     |
+-----------+--------+------------------------------------+------------+
| vde       | vector | dep. on VDE VPN: Virt.Net Locator  | varies     |
+-----------+--------+------------------------------------+------------+
| tuntap    | legacy | none                               | ~ 500Mbit  |
+-----------+--------+------------------------------------+------------+
| daemon    | legacy | none                               | ~ 450Mbit  |
@@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports

BESS transport does not require any special privileges.

VDE vector transport
--------------------

Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a
highly flexible support for virtual networking.

http://wiki.virtualsquare.org/#/tutorials/vdebasics

Common usages of VDE include fast prototyping and teaching.

Examples:

   ``vecX:transport=vde,vnl=tap://tap0``

use tap0

   ``vecX:transport=vde,vnl=slirp://``

use slirp

   ``vec0:transport=vde,vnl=vde:///tmp/switch``

connect to a vde switch

   ``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"``

connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp)
https://github.com/virtualsquare/sshlirp

   ``vec0:transport=vde,vnl=vxvde://234.0.0.1``

connect to a local area cloud (all the UML nodes using the same
multicast address running on hosts in the same multicast domain (LAN)
will be automagically connected together to a virtual LAN.

Configuring Legacy transports
=============================

+0 −1
Original line number Diff line number Diff line
@@ -11,7 +11,6 @@ config UML
	select ARCH_HAS_KCOV
	select ARCH_HAS_STRNCPY_FROM_USER
	select ARCH_HAS_STRNLEN_USER
	select ARCH_NO_PREEMPT_DYNAMIC
	select HAVE_ARCH_AUDITSYSCALL
	select HAVE_ARCH_KASAN if X86_64
	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
+109 −103
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/interrupt.h>
#include <linux/firmware.h>
#include <linux/fs.h>
#include <asm/atomic.h>
#include <uapi/linux/filter.h>
#include <init.h>
#include <irq_kern.h>
@@ -102,18 +103,33 @@ static const struct {

static void vector_reset_stats(struct vector_private *vp)
{
	/* We reuse the existing queue locks for stats */

	/* RX stats are modified with RX head_lock held
	 * in vector_poll.
	 */

	spin_lock(&vp->rx_queue->head_lock);
	vp->estats.rx_queue_max = 0;
	vp->estats.rx_queue_running_average = 0;
	vp->estats.tx_queue_max = 0;
	vp->estats.tx_queue_running_average = 0;
	vp->estats.rx_encaps_errors = 0;
	vp->estats.sg_ok = 0;
	vp->estats.sg_linearized = 0;
	spin_unlock(&vp->rx_queue->head_lock);

	/* TX stats are modified with TX head_lock held
	 * in vector_send.
	 */

	spin_lock(&vp->tx_queue->head_lock);
	vp->estats.tx_timeout_count = 0;
	vp->estats.tx_restart_queue = 0;
	vp->estats.tx_kicks = 0;
	vp->estats.tx_flow_control_xon = 0;
	vp->estats.tx_flow_control_xoff = 0;
	vp->estats.sg_ok = 0;
	vp->estats.sg_linearized = 0;
	vp->estats.tx_queue_max = 0;
	vp->estats.tx_queue_running_average = 0;
	spin_unlock(&vp->tx_queue->head_lock);
}

static int get_mtu(struct arglist *def)
@@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def)

static char *drop_buffer;

/* Array backed queues optimized for bulk enqueue/dequeue and
 * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
 * For more details and full design rationale see
 * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
 */


/*
 * Advance the mmsg queue head by n = advance. Resets the queue to
@@ -247,27 +257,13 @@ static char *drop_buffer;

static int vector_advancehead(struct vector_queue *qi, int advance)
{
	int queue_depth;

	qi->head =
		(qi->head + advance)
			% qi->max_depth;


	spin_lock(&qi->tail_lock);
	qi->queue_depth -= advance;

	/* we are at 0, use this to
	 * reset head and tail so we can use max size vectors
	 */

	if (qi->queue_depth == 0) {
		qi->head = 0;
		qi->tail = 0;
	}
	queue_depth = qi->queue_depth;
	spin_unlock(&qi->tail_lock);
	return queue_depth;
	atomic_sub(advance, &qi->queue_depth);
	return atomic_read(&qi->queue_depth);
}

/*	Advance the queue tail by n = advance.
@@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance)

static int vector_advancetail(struct vector_queue *qi, int advance)
{
	int queue_depth;

	qi->tail =
		(qi->tail + advance)
			% qi->max_depth;
	spin_lock(&qi->head_lock);
	qi->queue_depth += advance;
	queue_depth = qi->queue_depth;
	spin_unlock(&qi->head_lock);
	return queue_depth;
	atomic_add(advance, &qi->queue_depth);
	return atomic_read(&qi->queue_depth);
}

static int prep_msg(struct vector_private *vp,
@@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
	int iov_count;

	spin_lock(&qi->tail_lock);
	spin_lock(&qi->head_lock);
	queue_depth = qi->queue_depth;
	spin_unlock(&qi->head_lock);
	queue_depth = atomic_read(&qi->queue_depth);

	if (skb)
		packet_len = skb->len;
@@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
		mmsg_vector->msg_hdr.msg_iovlen = iov_count;
		mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
		mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
		wmb(); /* Make the packet visible to the NAPI poll thread */
		queue_depth = vector_advancetail(qi, 1);
	} else
		goto drop;
@@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count)
}

/*
 * Generic vector deque via sendmmsg with support for forming headers
 * Generic vector dequeue via sendmmsg with support for forming headers
 * using transport specific callback. Allows GRE, L2TPv3, RAW and
 * other transports to use a common dequeue procedure in vector mode
 */
@@ -408,16 +398,13 @@ static int vector_send(struct vector_queue *qi)
{
	struct vector_private *vp = netdev_priv(qi->dev);
	struct mmsghdr *send_from;
	int result = 0, send_len, queue_depth = qi->max_depth;
	int result = 0, send_len;

	if (spin_trylock(&qi->head_lock)) {
		if (spin_trylock(&qi->tail_lock)) {
		/* update queue_depth to current value */
			queue_depth = qi->queue_depth;
			spin_unlock(&qi->tail_lock);
			while (queue_depth > 0) {
		while (atomic_read(&qi->queue_depth) > 0) {
			/* Calculate the start of the vector */
				send_len = queue_depth;
			send_len = atomic_read(&qi->queue_depth);
			send_from = qi->mmsg_vector;
			send_from += qi->head;
			/* Adjust vector size if wraparound */
@@ -447,7 +434,6 @@ static int vector_send(struct vector_queue *qi)
				result = send_len;
			}
			if (result > 0) {
					queue_depth =
				consume_vector_skbs(qi, result);
				/* This is equivalent to an TX IRQ.
				 * Restart the upper layers to feed us
@@ -460,17 +446,16 @@ static int vector_send(struct vector_queue *qi)
			}
			netif_wake_queue(qi->dev);
			/* if TX is busy, break out of the send loop,
				 *  poll write IRQ will reschedule xmit for us
			 *  poll write IRQ will reschedule xmit for us.
			 */
			if (result != send_len) {
				vp->estats.tx_restart_queue++;
				break;
			}
		}
		}
		spin_unlock(&qi->head_lock);
	}
	return queue_depth;
	return atomic_read(&qi->queue_depth);
}

/* Queue destructor. Deliberately stateless so we can use
@@ -589,7 +574,7 @@ static struct vector_queue *create_queue(
	}
	spin_lock_init(&result->head_lock);
	spin_lock_init(&result->tail_lock);
	result->queue_depth = 0;
	atomic_set(&result->queue_depth, 0);
	result->head = 0;
	result->tail = 0;
	return result;
@@ -675,11 +660,20 @@ static void prep_queue_for_rx(struct vector_queue *qi)
	struct vector_private *vp = netdev_priv(qi->dev);
	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
	void **skbuff_vector = qi->skbuff_vector;
	int i;
	int i, queue_depth;

	if (qi->queue_depth == 0)
	queue_depth = atomic_read(&qi->queue_depth);

	if (queue_depth == 0)
		return;
	for (i = 0; i < qi->queue_depth; i++) {

	/* RX is always emptied 100% during each cycle, so we do not
	 * have to do the tail wraparound math for it.
	 */

	qi->head = qi->tail = 0;

	for (i = 0; i < queue_depth; i++) {
		/* it is OK if allocation fails - recvmmsg with NULL data in
		 * iov argument still performs an RX, just drops the packet
		 * This allows us stop faffing around with a "drop buffer"
@@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
		skbuff_vector++;
		mmsg_vector++;
	}
	qi->queue_depth = 0;
	atomic_set(&qi->queue_depth, 0);
}

static struct vector_device *find_device(int n)
@@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
		budget = qi->max_depth;

	packet_count = uml_vector_recvmmsg(
		vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
		vp->fds->rx_fd, qi->mmsg_vector, budget, 0);

	if (packet_count < 0)
		vp->in_error = true;
@@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
	 * many do we need to prep the next time prep_queue_for_rx() is called.
	 */

	qi->queue_depth = packet_count;
	atomic_add(packet_count, &qi->queue_depth);

	for (i = 0; i < packet_count; i++) {
		skb = (*skbuff_vector);
@@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget)

	if ((vp->options & VECTOR_TX) != 0)
		tx_enqueued = (vector_send(vp->tx_queue) > 0);
	spin_lock(&vp->rx_queue->head_lock);
	if ((vp->options & VECTOR_RX) > 0)
		err = vector_mmsg_rx(vp, budget);
	else {
@@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget)
		if (err > 0)
			err = 1;
	}
	spin_unlock(&vp->rx_queue->head_lock);
	if (err > 0)
		work_done += err;

	if (tx_enqueued || err > 0)
		napi_schedule(napi);
	if (work_done < budget)
	if (work_done <= budget)
		napi_complete_done(napi, work_done);
	return work_done;
}
@@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev)
			vp->rx_header_size,
			MAX_IOV_SIZE
		);
		vp->rx_queue->queue_depth = get_depth(vp->parsed);
		atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed));
	} else {
		vp->header_rxbuffer = kmalloc(
			vp->rx_header_size,
@@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev,
{
	struct vector_private *vp = netdev_priv(dev);

	/* Stats are modified in the dequeue portions of
	 * rx/tx which are protected by the head locks
	 * grabbing these locks here ensures they are up
	 * to date.
	 */

	spin_lock(&vp->tx_queue->head_lock);
	spin_lock(&vp->rx_queue->head_lock);
	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
	spin_unlock(&vp->rx_queue->head_lock);
	spin_unlock(&vp->tx_queue->head_lock);
}

static int vector_get_coalesce(struct net_device *netdev,
+3 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/ctype.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <asm/atomic.h>

#include "vector_user.h"

@@ -44,7 +45,8 @@ struct vector_queue {
	struct net_device *dev;
	spinlock_t head_lock;
	spinlock_t tail_lock;
	int queue_depth, head, tail, max_depth, max_iov_frags;
	atomic_t queue_depth;
	int head, tail, max_depth, max_iov_frags;
	short options;
};

+83 −0
Original line number Diff line number Diff line
@@ -46,6 +46,9 @@
#define TRANS_FD "fd"
#define TRANS_FD_LEN strlen(TRANS_FD)

#define TRANS_VDE "vde"
#define TRANS_VDE_LEN strlen(TRANS_VDE)

#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
@@ -434,6 +437,84 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
	return NULL;
}

/* enough char to store an int type */
#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2)
#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3)
/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */
#define VDE_MAX_ARGC 12
#define VDE_SEQPACKET_HEAD "seqpacket://"
#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1)
#define VDE_DEFAULT_DESCRIPTION "UML"

static struct vector_fds *user_init_vde_fds(struct arglist *ifspec)
{
	char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1];
	char *argv[VDE_MAX_ARGC] = {"vde_plug"};
	int argc = 1;
	int rv;
	int sv[2];
	struct vector_fds *result = NULL;

	char *vnl = uml_vector_fetch_arg(ifspec,"vnl");
	char *descr = uml_vector_fetch_arg(ifspec,"descr");
	char *port = uml_vector_fetch_arg(ifspec,"port");
	char *mode = uml_vector_fetch_arg(ifspec,"mode");
	char *group = uml_vector_fetch_arg(ifspec,"group");
	if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION;

	argv[argc++] = "--descr";
	argv[argc++] = descr;
	if (port != NULL) {
		argv[argc++] = "--port2";
		argv[argc++] = port;
	}
	if (mode != NULL) {
		argv[argc++] = "--mod2";
		argv[argc++] = mode;
	}
	if (group != NULL) {
		argv[argc++] = "--group2";
		argv[argc++] = group;
	}
	argv[argc++] = seqpacketvnl;
	argv[argc++] = vnl;
	argv[argc++] = NULL;

	rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
	if (rv  < 0) {
		printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno);
		return NULL;
	}
	rv = os_set_exec_close(sv[0]);
	if (rv  < 0) {
		printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno);
		goto vde_cleanup_sv;
	}
	snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]);

	run_helper(NULL, NULL, argv);

	close(sv[1]);

	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
	if (result == NULL) {
		printk(UM_KERN_ERR "fd open: allocation failed");
		goto vde_cleanup;
	}

	result->rx_fd = sv[0];
	result->tx_fd = sv[0];
	result->remote_addr_size = 0;
	result->remote_addr = NULL;
	return result;

vde_cleanup_sv:
	close(sv[1]);
vde_cleanup:
	close(sv[0]);
	return NULL;
}

static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
{
	int rxfd = -1, txfd = -1;
@@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open(
		return user_init_unix_fds(parsed, ID_BESS);
	if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
		return user_init_fd_fds(parsed);
	if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0)
		return user_init_vde_fds(parsed);
	return NULL;
}

Loading