Commit ad73b9a1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'uml-for-linus-6.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML fixes from Richard Weinberger:

 - Align signal stack correctly

 - Convert to raw spinlocks where needed (irq and virtio)

 - FPU related fixes

* tag 'uml-for-linus-6.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
  um: convert irq_lock to raw spinlock
  um: virtio_uml: use raw spinlock
  um: virt-pci: don't use kmalloc()
  um: fix execve stub execution on old host OSs
  um: properly align signal stack on x86_64
  um: avoid copying FP state from init_task
  um: add back support for FXSAVE registers
parents 5784d8c9 96178631
Loading
Loading
Loading
Loading
+102 −96
Original line number Diff line number Diff line
@@ -25,8 +25,10 @@
#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
#define NUM_IRQ_MSGS	10

#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
struct um_pci_message_buffer {
	struct virtio_pcidev_msg hdr;
	u8 data[8];
};

struct um_pci_device {
	struct virtio_device *vdev;
@@ -36,6 +38,11 @@ struct um_pci_device {

	struct virtqueue *cmd_vq, *irq_vq;

#define UM_PCI_WRITE_BUFS	20
	struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1];
	void *extra_ptrs[UM_PCI_WRITE_BUFS + 1];
	DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS);

#define UM_PCI_STAT_WAITING	0
	unsigned long status;

@@ -61,12 +68,40 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
static unsigned int um_pci_max_delay_us = 40000;
module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644);

struct um_pci_message_buffer {
	struct virtio_pcidev_msg hdr;
	u8 data[8];
};
static int um_pci_get_buf(struct um_pci_device *dev, bool *posted)
{
	int i;

static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
	for (i = 0; i < UM_PCI_WRITE_BUFS; i++) {
		if (!test_and_set_bit(i, dev->used_bufs))
			return i;
	}

	*posted = false;
	return UM_PCI_WRITE_BUFS;
}

static void um_pci_free_buf(struct um_pci_device *dev, void *buf)
{
	int i;

	if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) {
		kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]);
		dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL;
		return;
	}

	for (i = 0; i < UM_PCI_WRITE_BUFS; i++) {
		if (buf == &dev->bufs[i]) {
			kfree(dev->extra_ptrs[i]);
			dev->extra_ptrs[i] = NULL;
			WARN_ON(!test_and_clear_bit(i, dev->used_bufs));
			return;
		}
	}

	WARN_ON(1);
}

static int um_pci_send_cmd(struct um_pci_device *dev,
			   struct virtio_pcidev_msg *cmd,
@@ -82,7 +117,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
	};
	struct um_pci_message_buffer *buf;
	int delay_count = 0;
	bool bounce_out;
	int ret, len;
	int buf_idx;
	bool posted;

	if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
@@ -101,26 +138,28 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
		break;
	}

	buf = get_cpu_var(um_pci_msg_bufs);
	if (buf)
	bounce_out = !posted && cmd_size <= sizeof(*cmd) &&
		     out && out_size <= sizeof(buf->data);

	buf_idx = um_pci_get_buf(dev, &posted);
	buf = &dev->bufs[buf_idx];
	memcpy(buf, cmd, cmd_size);

	if (posted) {
		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
	if (posted && extra && extra_size > sizeof(buf) - cmd_size) {
		dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size,
						   GFP_ATOMIC);

		if (ncmd) {
			memcpy(ncmd, cmd, cmd_size);
			if (extra)
				memcpy(ncmd + cmd_size, extra, extra_size);
			cmd = (void *)ncmd;
		if (!dev->extra_ptrs[buf_idx]) {
			um_pci_free_buf(dev, buf);
			return -ENOMEM;
		}
		extra = dev->extra_ptrs[buf_idx];
	} else if (extra && extra_size <= sizeof(buf) - cmd_size) {
		memcpy((u8 *)buf + cmd_size, extra, extra_size);
		cmd_size += extra_size;
			extra = NULL;
		extra_size = 0;
		} else {
			/* try without allocating memory */
			posted = false;
		extra = NULL;
		cmd = (void *)buf;
		}
	} else {
		cmd = (void *)buf;
	}
@@ -128,39 +167,40 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
	sg_init_one(&out_sg, cmd, cmd_size);
	if (extra)
		sg_init_one(&extra_sg, extra, extra_size);
	if (out)
	/* allow stack for small buffers */
	if (bounce_out)
		sg_init_one(&in_sg, buf->data, out_size);
	else if (out)
		sg_init_one(&in_sg, out, out_size);

	/* add to internal virtio queue */
	ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
				extra ? 2 : 1,
				out ? 1 : 0,
				posted ? cmd : HANDLE_NO_FREE(cmd),
				GFP_ATOMIC);
				cmd, GFP_ATOMIC);
	if (ret) {
		if (posted)
			kfree(cmd);
		goto out;
		um_pci_free_buf(dev, buf);
		return ret;
	}

	if (posted) {
		virtqueue_kick(dev->cmd_vq);
		ret = 0;
		goto out;
		return 0;
	}

	/* kick and poll for getting a response on the queue */
	set_bit(UM_PCI_STAT_WAITING, &dev->status);
	virtqueue_kick(dev->cmd_vq);
	ret = 0;

	while (1) {
		void *completed = virtqueue_get_buf(dev->cmd_vq, &len);

		if (completed == HANDLE_NO_FREE(cmd))
		if (completed == buf)
			break;

		if (completed && !HANDLE_IS_NO_FREE(completed))
			kfree(completed);
		if (completed)
			um_pci_free_buf(dev, completed);

		if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
			      ++delay_count > um_pci_max_delay_us,
@@ -172,8 +212,11 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
	}
	clear_bit(UM_PCI_STAT_WAITING, &dev->status);

out:
	put_cpu_var(um_pci_msg_bufs);
	if (bounce_out)
		memcpy(out, buf->data, out_size);

	um_pci_free_buf(dev, buf);

	return ret;
}

@@ -187,20 +230,13 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
		.size = size,
		.addr = offset,
	};
	/* buf->data is maximum size - we may only use parts of it */
	struct um_pci_message_buffer *buf;
	u8 *data;
	unsigned long ret = ULONG_MAX;
	size_t bytes = sizeof(buf->data);
	/* max 8, we might not use it all */
	u8 data[8];

	if (!dev)
		return ULONG_MAX;

	buf = get_cpu_var(um_pci_msg_bufs);
	data = buf->data;

	if (buf)
		memset(data, 0xff, bytes);
	memset(data, 0xff, sizeof(data));

	switch (size) {
	case 1:
@@ -212,34 +248,26 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
		break;
	default:
		WARN(1, "invalid config space read size %d\n", size);
		goto out;
		return ULONG_MAX;
	}

	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
		goto out;
	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size))
		return ULONG_MAX;

	switch (size) {
	case 1:
		ret = data[0];
		break;
		return data[0];
	case 2:
		ret = le16_to_cpup((void *)data);
		break;
		return le16_to_cpup((void *)data);
	case 4:
		ret = le32_to_cpup((void *)data);
		break;
		return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
	case 8:
		ret = le64_to_cpup((void *)data);
		break;
		return le64_to_cpup((void *)data);
#endif
	default:
		break;
		return ULONG_MAX;
	}

out:
	put_cpu_var(um_pci_msg_bufs);
	return ret;
}

static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
@@ -312,13 +340,8 @@ static void um_pci_bar_copy_from(void *priv, void *buffer,
static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
				     int size)
{
	/* buf->data is maximum size - we may only use parts of it */
	struct um_pci_message_buffer *buf;
	u8 *data;
	unsigned long ret = ULONG_MAX;

	buf = get_cpu_var(um_pci_msg_bufs);
	data = buf->data;
	/* 8 is maximum size - we may only use parts of it */
	u8 data[8];

	switch (size) {
	case 1:
@@ -330,33 +353,25 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
		break;
	default:
		WARN(1, "invalid config space read size %d\n", size);
		goto out;
		return ULONG_MAX;
	}

	um_pci_bar_copy_from(priv, data, offset, size);

	switch (size) {
	case 1:
		ret = data[0];
		break;
		return data[0];
	case 2:
		ret = le16_to_cpup((void *)data);
		break;
		return le16_to_cpup((void *)data);
	case 4:
		ret = le32_to_cpup((void *)data);
		break;
		return le32_to_cpup((void *)data);
#ifdef CONFIG_64BIT
	case 8:
		ret = le64_to_cpup((void *)data);
		break;
		return le64_to_cpup((void *)data);
#endif
	default:
		break;
		return ULONG_MAX;
	}

out:
	put_cpu_var(um_pci_msg_bufs);
	return ret;
}

static void um_pci_bar_copy_to(void *priv, unsigned int offset,
@@ -523,11 +538,8 @@ static void um_pci_cmd_vq_cb(struct virtqueue *vq)
	if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
		return;

	while ((cmd = virtqueue_get_buf(vq, &len))) {
		if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
			continue;
		kfree(cmd);
	}
	while ((cmd = virtqueue_get_buf(vq, &len)))
		um_pci_free_buf(dev, cmd);
}

static void um_pci_irq_vq_cb(struct virtqueue *vq)
@@ -1006,10 +1018,6 @@ static int __init um_pci_init(void)
		 "No virtio device ID configured for PCI - no PCI support\n"))
		return 0;

	um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
	if (!um_pci_msg_bufs)
		return -ENOMEM;

	bridge = pci_alloc_host_bridge(0);
	if (!bridge) {
		err = -ENOMEM;
@@ -1070,7 +1078,6 @@ static int __init um_pci_init(void)
		pci_free_resource_list(&bridge->windows);
		pci_free_host_bridge(bridge);
	}
	free_percpu(um_pci_msg_bufs);
	return err;
}
module_init(um_pci_init);
@@ -1082,6 +1089,5 @@ static void __exit um_pci_exit(void)
	irq_domain_remove(um_pci_inner_domain);
	pci_free_resource_list(&bridge->windows);
	pci_free_host_bridge(bridge);
	free_percpu(um_pci_msg_bufs);
}
module_exit(um_pci_exit);
+4 −4
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ struct virtio_uml_device {
	struct platform_device *pdev;
	struct virtio_uml_platform_data *pdata;

	spinlock_t sock_lock;
	raw_spinlock_t sock_lock;
	int sock, req_fd, irq;
	u64 features;
	u64 protocol_features;
@@ -246,7 +246,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev,
	if (request_ack)
		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;

	spin_lock_irqsave(&vu_dev->sock_lock, flags);
	raw_spin_lock_irqsave(&vu_dev->sock_lock, flags);
	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
	if (rc < 0)
		goto out;
@@ -266,7 +266,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev,
	}

out:
	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
	raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
	return rc;
}

@@ -1239,7 +1239,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
		goto error_free;
	vu_dev->sock = rc;

	spin_lock_init(&vu_dev->sock_lock);
	raw_spin_lock_init(&vu_dev->sock_lock);

	rc = vhost_user_init(vu_dev);
	if (rc)
+47 −32
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ struct irq_entry {
	bool sigio_workaround;
};

static DEFINE_SPINLOCK(irq_lock);
static DEFINE_RAW_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
@@ -257,7 +257,7 @@ static struct irq_entry *get_irq_entry_by_fd(int fd)
	return NULL;
}

static void free_irq_entry(struct irq_entry *to_free, bool remove)
static void remove_irq_entry(struct irq_entry *to_free, bool remove)
{
	if (!to_free)
		return;
@@ -265,7 +265,6 @@ static void free_irq_entry(struct irq_entry *to_free, bool remove)
	if (remove)
		os_del_epoll_fd(to_free->fd);
	list_del(&to_free->list);
	kfree(to_free);
}

static bool update_irq_entry(struct irq_entry *entry)
@@ -286,17 +285,19 @@ static bool update_irq_entry(struct irq_entry *entry)
	return false;
}

static void update_or_free_irq_entry(struct irq_entry *entry)
static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry)
{
	if (!update_irq_entry(entry))
		free_irq_entry(entry, false);
	if (update_irq_entry(entry))
		return NULL;
	remove_irq_entry(entry, false);
	return entry;
}

static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
		       void (*timetravel_handler)(int, int, void *,
						  struct time_travel_event *))
{
	struct irq_entry *irq_entry;
	struct irq_entry *irq_entry, *to_free = NULL;
	int err, events = os_event_mask(type);
	unsigned long flags;

@@ -304,9 +305,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
	if (err < 0)
		goto out;

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	irq_entry = get_irq_entry_by_fd(fd);
	if (irq_entry) {
already:
		/* cannot register the same FD twice with the same type */
		if (WARN_ON(irq_entry->reg[type].events)) {
			err = -EALREADY;
@@ -316,11 +318,22 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
		/* temporarily disable to avoid IRQ-side locking */
		os_del_epoll_fd(fd);
	} else {
		irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
		if (!irq_entry) {
			err = -ENOMEM;
			goto out_unlock;
		struct irq_entry *new;

		/* don't restore interrupts */
		raw_spin_unlock(&irq_lock);
		new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
		if (!new) {
			local_irq_restore(flags);
			return -ENOMEM;
		}
		raw_spin_lock(&irq_lock);
		irq_entry = get_irq_entry_by_fd(fd);
		if (irq_entry) {
			to_free = new;
			goto already;
		}
		irq_entry = new;
		irq_entry->fd = fd;
		list_add_tail(&irq_entry->list, &active_fds);
		maybe_sigio_broken(fd);
@@ -339,12 +352,11 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
#endif

	WARN_ON(!update_irq_entry(irq_entry));
	spin_unlock_irqrestore(&irq_lock, flags);

	return 0;
	err = 0;
out_unlock:
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
out:
	kfree(to_free);
	return err;
}

@@ -358,19 +370,20 @@ void free_irq_by_fd(int fd)
	struct irq_entry *to_free;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	to_free = get_irq_entry_by_fd(fd);
	free_irq_entry(to_free, true);
	spin_unlock_irqrestore(&irq_lock, flags);
	remove_irq_entry(to_free, true);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
	kfree(to_free);
}
EXPORT_SYMBOL(free_irq_by_fd);

static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
	struct irq_entry *entry;
	struct irq_entry *entry, *to_free = NULL;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
		enum um_irq_type i;

@@ -386,12 +399,13 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)

			os_del_epoll_fd(entry->fd);
			reg->events = 0;
			update_or_free_irq_entry(entry);
			to_free = update_or_remove_irq_entry(entry);
			goto out;
		}
	}
out:
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
	kfree(to_free);
}

void deactivate_fd(int fd, int irqnum)
@@ -402,7 +416,7 @@ void deactivate_fd(int fd, int irqnum)

	os_del_epoll_fd(fd);

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	entry = get_irq_entry_by_fd(fd);
	if (!entry)
		goto out;
@@ -414,9 +428,10 @@ void deactivate_fd(int fd, int irqnum)
			entry->reg[i].events = 0;
	}

	update_or_free_irq_entry(entry);
	entry = update_or_remove_irq_entry(entry);
out:
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
	kfree(entry);

	ignore_sigio_fd(fd);
}
@@ -546,7 +561,7 @@ void um_irqs_suspend(void)

	irqs_suspended = true;

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
		enum um_irq_type t;
		bool clear = true;
@@ -579,7 +594,7 @@ void um_irqs_suspend(void)
				!__ignore_sigio_fd(entry->fd);
		}
	}
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
}

void um_irqs_resume(void)
@@ -588,7 +603,7 @@ void um_irqs_resume(void)
	unsigned long flags;


	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
		if (entry->suspended) {
			int err = os_set_fd_async(entry->fd);
@@ -602,7 +617,7 @@ void um_irqs_resume(void)
			}
		}
	}
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);

	irqs_suspended = false;
	send_sigio_to_self();
@@ -613,7 +628,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
	struct irq_entry *entry;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	raw_spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
		enum um_irq_type t;

@@ -628,7 +643,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
		}
	}
unlock:
	spin_unlock_irqrestore(&irq_lock, flags);
	raw_spin_unlock_irqrestore(&irq_lock, flags);
	return 0;
}
#else
+9 −1
Original line number Diff line number Diff line
@@ -191,7 +191,15 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
int arch_dup_task_struct(struct task_struct *dst,
			 struct task_struct *src)
{
	/* init_task is not dynamically sized (missing FPU state) */
	if (unlikely(src == &init_task)) {
		memcpy(dst, src, sizeof(init_task));
		memset((void *)dst + sizeof(init_task), 0,
		       arch_task_struct_size - sizeof(init_task));
	} else {
		memcpy(dst, src, arch_task_struct_size);
	}

	return 0;
}

+13 −3
Original line number Diff line number Diff line
@@ -181,6 +181,10 @@ extern char __syscall_stub_start[];

static int stub_exe_fd;

#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC	(1U << 2)
#endif

static int userspace_tramp(void *stack)
{
	char *const argv[] = { "uml-userspace", NULL };
@@ -202,8 +206,12 @@ static int userspace_tramp(void *stack)
	init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
	init_data.stub_data_offset = MMAP_OFFSET(offset);

	/* Set CLOEXEC on all FDs and then unset on all memory related FDs */
	close_range(0, ~0U, CLOSE_RANGE_CLOEXEC);
	/*
	 * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs
	 * and then unsetting it on all memory related FDs.
	 * This is not strictly necessary from a safety perspective.
	 */
	syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);

	fcntl(init_data.stub_data_fd, F_SETFD, 0);
	for (iomem = iomem_regions; iomem; iomem = iomem->next)
@@ -224,7 +232,9 @@ static int userspace_tramp(void *stack)
	if (ret != sizeof(init_data))
		exit(4);

	execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH);
	/* Raw execveat for compatibility with older libc versions */
	syscall(__NR_execveat, stub_exe_fd, (unsigned long)"",
		(unsigned long)argv, NULL, AT_EMPTY_PATH);

	exit(5);
}
Loading