Commit d6f5841a authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-allow-utf-8-literals-in-bpf_bprintf_prepare'

Yihan Ding says:

====================
bpf: allow UTF-8 literals in bpf_bprintf_prepare()

bpf_bprintf_prepare() currently rejects any non-ASCII byte in format
strings, so helpers such as bpf_trace_printk() fail to emit UTF-8
literal text even when those bytes are not part of a format specifier.

Keep plain text permissive while continuing to parse '%' sequences as
ASCII-only. Patch 1 updates snprintf_negative() at the same time so the
selftests stay consistent during bisection. Patch 2 then extends
trace_printk coverage for both the valid UTF-8 literal case and the
invalid non-ASCII-after-'%' case.

Changes in v3:
- drop Suggested-by trailers and move review credit into this changelog
- update test_snprintf_negative() in patch 1/2 so plain non-ASCII text is
  accepted while non-ASCII after '%' is still rejected, keeping
  ./test_progs -t snprintf aligned with the new behavior.
- clarify the trace_printk negative case with an explicit invalid format
  string and comment
- address Paul Chaignon's review feedback and keep the negative coverage
  requested earlier by Alan Maguire

Changes in v2:
- split the core change and selftest updates into two patches
- drop unnecessary isspace()/ispunct() casts
- add comments to clarify plain-text vs format-specifier handling
- add a negative selftest for non-ASCII bytes inside '%' sequences

Testing:
- Reproduced on x86_64 without the core fix: ASCII trace output works,
  while UTF-8 literal text in bpf_trace_printk() is rejected and
  produces no trace output
- Verified with tools/testing/selftests/bpf: ./test_progs -t trace_printk
- Verified with tools/testing/selftests/bpf: ./test_progs -t snprintf
====================

Link: https://patch.msgid.link/20260416120142.1420646-1-dingyihan@uniontech.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 766bf026 4198ff31
Loading
Loading
Loading
Loading
+16 −1
Original line number Diff line number Diff line
@@ -845,7 +845,13 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
		data->buf = buffers->buf;

	for (i = 0; i < fmt_size; i++) {
		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
		unsigned char c = fmt[i];

		/*
		 * Permit bytes >= 0x80 in plain text so UTF-8 literals can pass
		 * through unchanged, while still rejecting ASCII control bytes.
		 */
		if (isascii(c) && !isprint(c) && !isspace(c)) {
			err = -EINVAL;
			goto out;
		}
@@ -867,6 +873,15 @@ int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
		 * always access fmt[i + 1], in the worst case it will be a 0
		 */
		i++;
		c = fmt[i];
		/*
		 * The format parser below only understands ASCII conversion
		 * specifiers and modifiers, so reject non-ASCII after '%'.
		 */
		if (!isascii(c)) {
			err = -EINVAL;
			goto out;
		}

		/* skip optional "[0 +-][num]" width formatting field */
		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
+2 −1
Original line number Diff line number Diff line
@@ -114,7 +114,8 @@ static void test_snprintf_negative(void)
	ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
	ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
	ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
	ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
	ASSERT_OK(load_single_snprintf("\x80"), "non ascii plain text");
	ASSERT_ERR(load_single_snprintf("%\x80"), "non ascii in specifier");
	ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
	ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
	ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
+22 −6
Original line number Diff line number Diff line
@@ -6,18 +6,21 @@
#include "trace_printk.lskel.h"

#define SEARCHMSG	"testing,testing"
#define SEARCHMSG_UTF8	"中文,测试"

static void trace_pipe_cb(const char *str, void *data)
{
	if (strstr(str, SEARCHMSG) != NULL)
		(*(int *)data)++;
		((int *)data)[0]++;
	if (strstr(str, SEARCHMSG_UTF8))
		((int *)data)[1]++;
}

void serial_test_trace_printk(void)
{
	struct trace_printk_lskel__bss *bss;
	struct trace_printk_lskel *skel;
	int err = 0, found = 0;
	int err = 0, found[2] = {};

	skel = trace_printk_lskel__open();
	if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
@@ -46,11 +49,24 @@ void serial_test_trace_printk(void)
	if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
		goto cleanup;

	/* verify our search string is in the trace buffer */
	ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
	if (!ASSERT_GT(bss->trace_printk_utf8_ran, 0, "bss->trace_printk_utf8_ran"))
		goto cleanup;

	if (!ASSERT_GT(bss->trace_printk_utf8_ret, 0, "bss->trace_printk_utf8_ret"))
		goto cleanup;

	if (!ASSERT_LT(bss->trace_printk_invalid_spec_ret, 0,
		       "bss->trace_printk_invalid_spec_ret"))
		goto cleanup;

	/* verify our search strings are in the trace buffer */
	ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, found, 1000),
		  "read_trace_pipe_iter");

	if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
	if (!ASSERT_EQ(found[0], bss->trace_printk_ran, "found"))
		goto cleanup;

	if (!ASSERT_EQ(found[1], bss->trace_printk_utf8_ran, "found_utf8"))
		goto cleanup;

cleanup:
+10 −0
Original line number Diff line number Diff line
@@ -10,13 +10,23 @@ char _license[] SEC("license") = "GPL";

int trace_printk_ret = 0;
int trace_printk_ran = 0;
int trace_printk_invalid_spec_ret = 0;
int trace_printk_utf8_ret = 0;
int trace_printk_utf8_ran = 0;

const char fmt[] = "Testing,testing %d\n";
static const char utf8_fmt[] = "中文,测试 %d\n";
/* Non-ASCII bytes after '%' must still be rejected. */
static const char invalid_spec_fmt[] = "%\x80\n";

SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int sys_enter(void *ctx)
{
	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
					    ++trace_printk_ran);
	trace_printk_utf8_ret = bpf_trace_printk(utf8_fmt, sizeof(utf8_fmt),
						 ++trace_printk_utf8_ran);
	trace_printk_invalid_spec_ret = bpf_trace_printk(invalid_spec_fmt,
							 sizeof(invalid_spec_fmt));
	return 0;
}