Commit 0bfbc914 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'riscv-for-linus-6.10-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:

 - Add byte/half-word compare-and-exchange, emulated via LR/SC loops

 - Support for Rust

 - Support for Zihintpause in hwprobe

 - Add PR_RISCV_SET_ICACHE_FLUSH_CTX prctl()

 - Support lockless lockrefs

* tag 'riscv-for-linus-6.10-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (42 commits)
  riscv: defconfig: Enable CONFIG_CLK_SOPHGO_CV1800
  riscv: select ARCH_HAS_FAST_MULTIPLIER
  riscv: mm: still create swiotlb buffer for kmalloc() bouncing if required
  riscv: Annotate pgtable_l{4,5}_enabled with __ro_after_init
  riscv: Remove redundant CONFIG_64BIT from pgtable_l{4,5}_enabled
  riscv: mm: Always use an ASID to flush mm contexts
  riscv: mm: Preserve global TLB entries when switching contexts
  riscv: mm: Make asid_bits a local variable
  riscv: mm: Use a fixed layout for the MM context ID
  riscv: mm: Introduce cntx2asid/cntx2version helper macros
  riscv: Avoid TLB flush loops when affected by SiFive CIP-1200
  riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma
  riscv: mm: Combine the SMP and UP TLB flush code
  riscv: Only send remote fences when some other CPU is online
  riscv: mm: Broadcast kernel TLB flushes only when needed
  riscv: Use IPIs for remote cache/TLB flushes by default
  riscv: Factor out page table TLB synchronization
  riscv: Flush the instruction cache during SMP bringup
  riscv: hwprobe: export Zihintpause ISA extension
  riscv: misaligned: remove CONFIG_RISCV_M_MODE specific code
  ...
parents 4f05e820 92cce919
Loading
Loading
Loading
Loading
+98 −0
Original line number Diff line number Diff line
.. SPDX-License-Identifier: GPL-2.0

==============================================================================
Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
==============================================================================

CMODX is a programming technique where a program executes instructions that were
modified by the program itself. Instruction storage and the instruction cache
(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
program must enforce its own synchronization with the unprivileged fence.i
instruction.

However, the default Linux ABI prohibits the use of fence.i in userspace
applications. At any point the scheduler may migrate a task onto a new hart. If
migration occurs after the userspace synchronized the icache and instruction
storage with fence.i, the icache on the new hart will no longer be clean. This
is due to the behavior of fence.i only affecting the hart that it is called on.
Thus, the hart that the task has been migrated to may not have synchronized
instruction storage and icache.

There are two ways to solve this problem: use the riscv_flush_icache() syscall,
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
userspace. The syscall performs a one-off icache flushing operation. The prctl
changes the Linux ABI to allow userspace to emit icache flushing operations.

As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
At the time of writing, this only occurs during the riscv_flush_icache() syscall
and when the kernel uses copy_to_user_page(). These deferred flushes happen only
when the memory map being used by a hart changes. If the prctl() context caused
an icache flush, this deferred icache flush will be skipped as it is redundant.
Therefore, there will be no additional flush when using the riscv_flush_icache()
syscall inside of the prctl() context.

prctl() Interface
---------------------

Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
remaining arguments will be delegated to the riscv_set_icache_flush_ctx
function detailed below.

.. kernel-doc:: arch/riscv/mm/cacheflush.c
	:identifiers: riscv_set_icache_flush_ctx

Example usage:

The following files are meant to be compiled and linked with each other. The
modify_instruction() function replaces an add with 0 with an add with one,
causing the instruction sequence in get_value() to change from returning a zero
to returning a one.

cmodx.c::

	#include <stdio.h>
	#include <sys/prctl.h>

	extern int get_value();
	extern void modify_instruction();

	int main()
	{
		int value = get_value();
		printf("Value before cmodx: %d\n", value);

		// Call prctl before first fence.i is called inside modify_instruction
		prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
		modify_instruction();
		// Call prctl after final fence.i is called in process
		prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);

		value = get_value();
		printf("Value after cmodx: %d\n", value);
		return 0;
	}

cmodx.S::

	.option norvc

	.text
	.global modify_instruction
	modify_instruction:
	lw a0, new_insn
	lui a5,%hi(old_insn)
	sw  a0,%lo(old_insn)(a5)
	fence.i
	ret

	.section modifiable, "awx"
	.global get_value
	get_value:
	li a0, 0
	old_insn:
	addi a0, a0, 0
	ret

	.data
	new_insn:
	addi a0, a0, 1
+4 −0
Original line number Diff line number Diff line
@@ -188,6 +188,10 @@ The following keys are defined:
       manual starting from commit 95cf1f9 ("Add changes requested by Ved
       during signoff")

  * :c:macro:`RISCV_HWPROBE_EXT_ZIHINTPAUSE`: The Zihintpause extension is
       supported as defined in the RISC-V ISA manual starting from commit
       d8ab5c78c207 ("Zihintpause is ratified").

* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
  information about the selected set of processors.

+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ RISC-V architecture
    patch-acceptance
    uabi
    vector
    cmodx

    features

+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ Architecture Level of support Constraints
=============  ================  ==============================================
``arm64``      Maintained        Little Endian only.
``loongarch``  Maintained        \-
``riscv``      Maintained        ``riscv64`` only.
``um``         Maintained        ``x86_64`` only.
``x86``        Maintained        ``x86_64`` only.
=============  ================  ==============================================
+14 −8
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@ config RISCV
	select ARCH_HAS_DEBUG_VIRTUAL if MMU
	select ARCH_HAS_DEBUG_VM_PGTABLE
	select ARCH_HAS_DEBUG_WX
	select ARCH_HAS_FAST_MULTIPLIER
	select ARCH_HAS_FORTIFY_SOURCE
	select ARCH_HAS_GCOV_PROFILE_ALL
	select ARCH_HAS_GIGANTIC_PAGE
@@ -57,10 +58,11 @@ config RISCV
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
	select ARCH_USE_MEMTEST
	select ARCH_USE_QUEUED_RWLOCKS
	select ARCH_USES_CFI_TRAPS if CFI_CLANG
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU
	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
	select ARCH_WANT_FRAME_POINTERS
	select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
@@ -71,7 +73,7 @@ config RISCV
	select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
	select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
	select BUILDTIME_TABLE_SORT if MMU
	select CLINT_TIMER if !MMU
	select CLINT_TIMER if RISCV_M_MODE
	select CLONE_BACKWARDS
	select COMMON_CLK
	select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
@@ -155,6 +157,7 @@ config RISCV
	select HAVE_REGS_AND_STACK_ACCESS_API
	select HAVE_RETHOOK if !XIP_KERNEL
	select HAVE_RSEQ
	select HAVE_RUST if 64BIT
	select HAVE_SAMPLE_FTRACE_DIRECT
	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
	select HAVE_STACKPROTECTOR
@@ -231,8 +234,12 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX

# set if we run in machine mode, cleared if we run in supervisor mode
config RISCV_M_MODE
	bool
	default !MMU
	bool "Build a kernel that runs in machine mode"
	depends on !MMU
	default y
	help
	  Select this option if you want to run the kernel in M-mode,
	  without the assistance of any other firmware.

# set if we are running in S-mode and can use SBI calls
config RISCV_SBI
@@ -249,8 +256,9 @@ config MMU

config PAGE_OFFSET
	hex
	default 0xC0000000 if 32BIT && MMU
	default 0x80000000 if !MMU
	default 0x80000000 if !MMU && RISCV_M_MODE
	default 0x80200000 if !MMU
	default 0xc0000000 if 32BIT
	default 0xff60000000000000 if 64BIT

config KASAN_SHADOW_OFFSET
@@ -598,7 +606,6 @@ config TOOLCHAIN_HAS_VECTOR_CRYPTO
config RISCV_ISA_ZBB
	bool "Zbb extension support for bit manipulation instructions"
	depends on TOOLCHAIN_HAS_ZBB
	depends on MMU
	depends on RISCV_ALTERNATIVE
	default y
	help
@@ -630,7 +637,6 @@ config RISCV_ISA_ZICBOM

config RISCV_ISA_ZICBOZ
	bool "Zicboz extension support for faster zeroing of memory"
	depends on MMU
	depends on RISCV_ALTERNATIVE
	default y
	help
Loading