Commit 5ba15d41 authored by Feng Jiang's avatar Feng Jiang Committed by Paul Walmsley
Browse files

riscv: lib: add strnlen() implementation



Add an optimized strnlen() implementation for RISC-V. This version
includes a generic optimization and a Zbb-powered optimization using
the 'orc.b' instruction, derived from the strlen() implementation.

Benchmark results (QEMU TCG, rv64):
  Length | Original (MB/s) | Optimized (MB/s) | Improvement
  -------|-----------------|------------------|------------
  16 B   | 179             | 309              | +72.6%
  512 B  | 347             | 1562             | +350.1%
  4096 B | 356             | 1878             | +427.5%

Suggested-by: default avatarQingfang Deng <dqfext@gmail.com>
Signed-off-by: default avatarFeng Jiang <jiangfeng@kylinos.cn>
Link: https://patch.msgid.link/20260130025018.172925-7-jiangfeng@kylinos.cn


Signed-off-by: default avatarPaul Walmsley <pjw@kernel.org>
parent e73bcb37
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -28,6 +28,9 @@ extern asmlinkage __kernel_size_t strlen(const char *);

#define __HAVE_ARCH_STRNCMP
extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);

#define __HAVE_ARCH_STRNLEN
extern asmlinkage __kernel_size_t strnlen(const char *, size_t);
#endif

/* For those files which don't want to check by kasan. */
+1 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
lib-y			+= strcmp.o
lib-y			+= strlen.o
lib-y			+= strncmp.o
lib-y			+= strnlen.o
endif
lib-y			+= csum.o
ifeq ($(CONFIG_MMU), y)
+164 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */

/*
 * Base on arch/riscv/lib/strlen.S
 *
 * Copyright (C) Feng Jiang <jiangfeng@kylinos.cn>
 */

#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/alternative-macros.h>
#include <asm/hwcap.h>

/* size_t strnlen(const char *s, size_t count) */
SYM_FUNC_START(strnlen)

	__ALTERNATIVE_CFG("nop", "j strnlen_zbb", 0, RISCV_ISA_EXT_ZBB,
		IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))


	/*
	 * Returns
	 *   a0 - String length
	 *
	 * Parameters
	 *   a0 - String to measure
	 *   a1 - Max length of string
	 *
	 * Clobbers
	 *   t0, t1, t2
	 */
	addi	t1, a0, -1
	add	t2, a0, a1
1:
	addi	t1, t1, 1
	beq	t1, t2, 2f
	lbu	t0, 0(t1)
	bnez	t0, 1b
2:
	sub	a0, t1, a0
	ret


/*
 * Variant of strnlen using the ZBB extension if available
 */
#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
strnlen_zbb:

#ifdef CONFIG_CPU_BIG_ENDIAN
# define CZ	clz
# define SHIFT	sll
#else
# define CZ	ctz
# define SHIFT	srl
#endif

.option push
.option arch,+zbb

	/*
	 * Returns
	 *   a0 - String length
	 *
	 * Parameters
	 *   a0 - String to measure
	 *   a1 - Max length of string
	 *
	 * Clobbers
	 *   t0, t1, t2, t3, t4
	 */

	/* If maxlen is 0, return 0. */
	beqz	a1, 3f

	/* Number of irrelevant bytes in the first word. */
	andi	t2, a0, SZREG-1

	/* Align pointer. */
	andi	t0, a0, -SZREG

	li	t3, SZREG
	sub	t3, t3, t2
	slli	t2, t2, 3

	/* Aligned boundary. */
	add	t4, a0, a1
	andi	t4, t4, -SZREG

	/* Get the first word.  */
	REG_L	t1, 0(t0)

	/*
	 * Shift away the partial data we loaded to remove the irrelevant bytes
	 * preceding the string with the effect of adding NUL bytes at the
	 * end of the string's first word.
	 */
	SHIFT	t1, t1, t2

	/* Convert non-NUL into 0xff and NUL into 0x00. */
	orc.b	t1, t1

	/* Convert non-NUL into 0x00 and NUL into 0xff. */
	not	t1, t1

	/*
	 * Search for the first set bit (corresponding to a NUL byte in the
	 * original chunk).
	 */
	CZ	t1, t1

	/*
	 * The first chunk is special: compare against the number
	 * of valid bytes in this chunk.
	 */
	srli	a0, t1, 3

	/* Limit the result by maxlen. */
	minu	a0, a0, a1

	bgtu	t3, a0, 2f

	/* Prepare for the word comparison loop. */
	addi	t2, t0, SZREG
	li	t3, -1

	/*
	 * Our critical loop is 4 instructions and processes data in
	 * 4 byte or 8 byte chunks.
	 */
	.p2align 3
1:
	REG_L	t1, SZREG(t0)
	addi	t0, t0, SZREG
	orc.b	t1, t1
	bgeu	t0, t4, 4f
	beq	t1, t3, 1b
4:
	not	t1, t1
	CZ	t1, t1
	srli	t1, t1, 3

	/* Get number of processed bytes. */
	sub	t2, t0, t2

	/* Add number of characters in the first word.  */
	add	a0, a0, t2

	/* Add number of characters in the last word.  */
	add	a0, a0, t1

	/* Ensure the final result does not exceed maxlen. */
	minu	a0, a0, a1
2:
	ret
3:
	mv	a0, a1
	ret

.option pop
#endif
SYM_FUNC_END(strnlen)
SYM_FUNC_ALIAS(__pi_strnlen, strnlen)
EXPORT_SYMBOL(strnlen)
+4 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@

purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
purgatory-y += strcmp.o strlen.o strncmp.o
purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o
endif

targets += $(purgatory-y)
@@ -32,6 +32,9 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
	$(call if_changed_rule,cc_o_c)

$(obj)/strnlen.o: $(srctree)/arch/riscv/lib/strnlen.S FORCE
	$(call if_changed_rule,as_o_S)

CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS