Commit 330b0cce authored by Vincent Donnefort's avatar Vincent Donnefort Committed by Steven Rostedt (Google)
Browse files

tracing: Add non-consuming read to trace remotes

Allow reading the trace file for trace remotes. This performs a
non-consuming read of the trace buffer.

Link: https://patch.msgid.link/20260309162516.2623589-8-vdonnefort@google.com


Reviewed-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: default avatarVincent Donnefort <vdonnefort@google.com>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent 9af4ab0e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3856,7 +3856,7 @@ static int s_show(struct seq_file *m, void *v)
 * Should be used after trace_array_get(), trace_types_lock
 * ensures that i_cdev was already initialized.
 */
static inline int tracing_get_cpu(struct inode *inode)
int tracing_get_cpu(struct inode *inode)
{
	if (inode->i_cdev) /* See trace_create_cpu_file() */
		return (long)inode->i_cdev - 1;
+1 −0
Original line number Diff line number Diff line
@@ -695,6 +695,7 @@ struct dentry *trace_create_cpu_file(const char *name,
				     void *data,
				     long cpu,
				     const struct file_operations *fops);
int tracing_get_cpu(struct inode *inode);


/**
+324 −15
Original line number Diff line number Diff line
@@ -18,14 +18,23 @@
#define TRACEFS_MODE_WRITE	0640
#define TRACEFS_MODE_READ	0440

enum tri_type {
	TRI_CONSUMING,
	TRI_NONCONSUMING,
};

struct trace_remote_iterator {
	struct trace_remote		*remote;
	struct trace_seq		seq;
	struct delayed_work		poll_work;
	unsigned long			lost_events;
	u64				ts;
	struct ring_buffer_iter		*rb_iter;
	struct ring_buffer_iter		**rb_iters;
	int				cpu;
	int				evt_cpu;
	loff_t				pos;
	enum tri_type			type;
};

struct trace_remote {
@@ -36,6 +45,8 @@ struct trace_remote {
	unsigned long			trace_buffer_size;
	struct ring_buffer_remote	rb_remote;
	struct mutex			lock;
	struct rw_semaphore		reader_lock;
	struct rw_semaphore		*pcpu_reader_locks;
	unsigned int			nr_readers;
	unsigned int			poll_ms;
	bool				tracing_on;
@@ -230,6 +241,20 @@ static int trace_remote_get(struct trace_remote *remote, int cpu)
	if (ret)
		return ret;

	if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
		int lock_cpu;

		remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
						    GFP_KERNEL);
		if (!remote->pcpu_reader_locks) {
			trace_remote_try_unload(remote);
			return -ENOMEM;
		}

		for_each_possible_cpu(lock_cpu)
			init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
	}

	remote->nr_readers++;

	return 0;
@@ -244,6 +269,9 @@ static void trace_remote_put(struct trace_remote *remote)
	if (remote->nr_readers)
		return;

	kfree(remote->pcpu_reader_locks);
	remote->pcpu_reader_locks = NULL;

	trace_remote_try_unload(remote);
}

@@ -258,13 +286,55 @@ static void __poll_remote(struct work_struct *work)
			      msecs_to_jiffies(iter->remote->poll_ms));
}

static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu)
static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
{
	if (cpu != RING_BUFFER_ALL_CPUS) {
		ring_buffer_read_finish(iter->rb_iter);
		return;
	}

	for_each_possible_cpu(cpu) {
		if (iter->rb_iters[cpu])
			ring_buffer_read_finish(iter->rb_iters[cpu]);
	}

	kfree(iter->rb_iters);
}

static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
{
	if (cpu != RING_BUFFER_ALL_CPUS) {
		iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);

		return iter->rb_iter ? 0 : -ENOMEM;
	}

	iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
	if (!iter->rb_iters)
		return -ENOMEM;

	for_each_possible_cpu(cpu) {
		iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
							     GFP_KERNEL);
		if (!iter->rb_iters[cpu]) {
			__free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
			return -ENOMEM;
		}
	}

	return 0;
}

static struct trace_remote_iterator
*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
{
	struct trace_remote_iterator *iter = NULL;
	int ret;

	lockdep_assert_held(&remote->lock);

	if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
		return NULL;

	ret = trace_remote_get(remote, cpu);
	if (ret)
@@ -279,9 +349,21 @@ static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remo
	if (iter) {
		iter->remote = remote;
		iter->cpu = cpu;
		iter->type = type;
		trace_seq_init(&iter->seq);

		switch (type) {
		case TRI_CONSUMING:
			INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
			schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
			break;
		case TRI_NONCONSUMING:
			ret = __alloc_ring_buffer_iter(iter, cpu);
			break;
		}

		if (ret)
			goto err;

		return iter;
	}
@@ -305,10 +387,100 @@ static void trace_remote_iter_free(struct trace_remote_iterator *iter)

	lockdep_assert_held(&remote->lock);

	switch (iter->type) {
	case TRI_CONSUMING:
		cancel_delayed_work_sync(&iter->poll_work);
		break;
	case TRI_NONCONSUMING:
		__free_ring_buffer_iter(iter, iter->cpu);
		break;
	}

	kfree(iter);
	trace_remote_put(remote);
}

static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
{
	struct trace_remote *remote = iter->remote;
	int cpu = iter->cpu;

	/* Acquire global reader lock */
	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
		down_write(&remote->reader_lock);
	else
		down_read(&remote->reader_lock);

	if (cpu == RING_BUFFER_ALL_CPUS)
		return;

	/*
	 * No need for the remote lock here, iter holds a reference on
	 * remote->nr_readers
	 */

	/* Get the per-CPU one */
	if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
		return;

	if (iter->type == TRI_CONSUMING)
		down_write(&remote->pcpu_reader_locks[cpu]);
	else
		down_read(&remote->pcpu_reader_locks[cpu]);
}

static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
{
	struct trace_remote *remote = iter->remote;
	int cpu = iter->cpu;

	/* Release per-CPU reader lock */
	if (cpu != RING_BUFFER_ALL_CPUS) {
		/*
		 * No need for the remote lock here, iter holds a reference on
		 * remote->nr_readers
		 */
		if (iter->type == TRI_CONSUMING)
			up_write(&remote->pcpu_reader_locks[cpu]);
		else
			up_read(&remote->pcpu_reader_locks[cpu]);
	}

	/* Release global reader lock */
	if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
		up_write(&remote->reader_lock);
	else
		up_read(&remote->reader_lock);
}

static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
{
	return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
}

static struct ring_buffer_event *
__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
{
	struct ring_buffer_event *rb_evt;
	struct ring_buffer_iter *rb_iter;

	switch (iter->type) {
	case TRI_CONSUMING:
		return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
	case TRI_NONCONSUMING:
		rb_iter = __get_rb_iter(iter, cpu);
		rb_evt = ring_buffer_iter_peek(rb_iter, ts);
		if (!rb_evt)
			return NULL;

		*lost_events = ring_buffer_iter_dropped(rb_iter);

		return rb_evt;
	}

	return NULL;
}

static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
{
	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
@@ -318,7 +490,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
		if (ring_buffer_empty_cpu(trace_buffer, cpu))
			return false;

		if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
		if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
			return false;

		iter->evt_cpu = cpu;
@@ -333,7 +505,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
		if (ring_buffer_empty_cpu(trace_buffer, cpu))
			continue;

		if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
		if (!__peek_event(iter, cpu, &ts, &lost_events))
			continue;

		if (ts >= iter->ts)
@@ -347,6 +519,20 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
	return iter->ts != U64_MAX;
}

static void trace_remote_iter_move(struct trace_remote_iterator *iter)
{
	struct trace_buffer *trace_buffer = iter->remote->trace_buffer;

	switch (iter->type) {
	case TRI_CONSUMING:
		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
		break;
	case TRI_NONCONSUMING:
		ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
		break;
	}
}

static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
{
	unsigned long usecs_rem;
@@ -369,13 +555,14 @@ static int trace_pipe_open(struct inode *inode, struct file *filp)
{
	struct trace_remote *remote = inode->i_private;
	struct trace_remote_iterator *iter;
	int cpu = RING_BUFFER_ALL_CPUS;

	if (inode->i_cdev)
		cpu = (long)inode->i_cdev - 1;
	int cpu = tracing_get_cpu(inode);

	guard(mutex)(&remote->lock);
	iter = trace_remote_iter(remote, cpu);

	iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
	if (IS_ERR(iter))
		return PTR_ERR(iter);

	filp->private_data = iter;

	return IS_ERR(iter) ? PTR_ERR(iter) : 0;
@@ -410,6 +597,8 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
	if (ret < 0)
		return ret;

	trace_remote_iter_read_start(iter);

	while (trace_remote_iter_read_event(iter)) {
		int prev_len = iter->seq.seq.len;

@@ -418,9 +607,11 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
			break;
		}

		ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
		trace_remote_iter_move(iter);
	}

	trace_remote_iter_read_finished(iter);

	goto copy_to_user;
}

@@ -430,14 +621,127 @@ static const struct file_operations trace_pipe_fops = {
	.release	= trace_pipe_release,
};

static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
{
	struct trace_remote_iterator *iter = m->private;

	++*pos;

	if (!iter || !trace_remote_iter_read_event(iter))
		return NULL;

	trace_remote_iter_move(iter);
	iter->pos++;

	return iter;
}

static void *trace_start(struct seq_file *m, loff_t *pos)
{
	struct trace_remote_iterator *iter = m->private;
	loff_t i;

	if (!iter)
		return NULL;

	trace_remote_iter_read_start(iter);

	if (!*pos) {
		iter->pos = -1;
		return trace_next(m, NULL, &i);
	}

	i = iter->pos;
	while (i < *pos) {
		iter = trace_next(m, NULL, &i);
		if (!iter)
			return NULL;
	}

	return iter;
}

static int trace_show(struct seq_file *m, void *v)
{
	struct trace_remote_iterator *iter = v;

	trace_seq_init(&iter->seq);

	if (trace_remote_iter_print_event(iter)) {
		seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
		return 0;
	}

	return trace_print_seq(m, &iter->seq);
}

static void trace_stop(struct seq_file *m, void *v)
{
	struct trace_remote_iterator *iter = m->private;

	if (iter)
		trace_remote_iter_read_finished(iter);
}

static const struct seq_operations trace_sops = {
	.start		= trace_start,
	.next		= trace_next,
	.show		= trace_show,
	.stop		= trace_stop,
};

static int trace_open(struct inode *inode, struct file *filp)
{
	struct trace_remote *remote = inode->i_private;
	struct trace_remote_iterator *iter = NULL;
	int cpu = tracing_get_cpu(inode);
	int ret;

	if (!(filp->f_mode & FMODE_READ))
		return 0;

	guard(mutex)(&remote->lock);

	iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
	if (IS_ERR(iter))
		return PTR_ERR(iter);

	ret = seq_open(filp, &trace_sops);
	if (ret) {
		trace_remote_iter_free(iter);
		return ret;
	}

	((struct seq_file *)filp->private_data)->private = (void *)iter;

	return 0;
}

static int trace_release(struct inode *inode, struct file *filp)
{
	struct trace_remote_iterator *iter;

	if (!(filp->f_mode & FMODE_READ))
		return 0;

	iter = ((struct seq_file *)filp->private_data)->private;
	seq_release(inode, filp);

	if (!iter)
		return 0;

	guard(mutex)(&iter->remote->lock);

	trace_remote_iter_free(iter);

	return 0;
}

static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
{
	struct inode *inode = file_inode(filp);
	struct trace_remote *remote = inode->i_private;
	int cpu = RING_BUFFER_ALL_CPUS;

	if (inode->i_cdev)
		cpu = (long)inode->i_cdev - 1;
	int cpu = tracing_get_cpu(inode);

	guard(mutex)(&remote->lock);

@@ -447,7 +751,11 @@ static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cn
}

static const struct file_operations trace_fops = {
	.open		= trace_open,
	.write		= trace_write,
	.read		= seq_read,
	.read_iter	= seq_read_iter,
	.release	= trace_release,
};

static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
@@ -565,6 +873,7 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs,
	remote->trace_buffer_size = 7 << 10;
	remote->poll_ms = 100;
	mutex_init(&remote->lock);
	init_rwsem(&remote->reader_lock);

	if (trace_remote_init_tracefs(name, remote)) {
		kfree(remote);