Commit 20516d6e authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

x86: Stop using weak symbols for __iowrite32_copy()

Start switching iomap_copy routines over to use #define and arch provided
inline/macro functions instead of weak symbols.

Inline functions allow more compiler optimization and this is often a
driver hot path.

x86 has the only weak implementation for __iowrite32_copy(), so replace it
with a static inline containing the same single instruction inline
assembly. The compiler will generate the "mov edx,ecx" in a more optimal
way.

Remove iomap_copy_64.S

Link: https://lore.kernel.org/r/1-v3-1893cd8b9369+1925-mlx5_arm_wc_jgg@nvidia.com


Acked-by: default avatarArnd Bergmann <arnd@arndb.de>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 1a633bdc
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -209,6 +209,23 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io

#ifdef CONFIG_X86_64
/*
 * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
 * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
 * loop.
 */
static inline void __iowrite32_copy(void __iomem *to, const void *from,
				    size_t count)
{
	asm volatile("rep ; movsl"
		     : "=&c"(count), "=&D"(to), "=&S"(from)
		     : "0"(count), "1"(to), "2"(from)
		     : "memory");
}
#define __iowrite32_copy __iowrite32_copy
#endif

/*
 * ISA space is 'always mapped' on a typical x86 system, no need to
 * explicitly ioremap() it. The fact that the ISA IO space is mapped
+0 −1
Original line number Diff line number Diff line
@@ -53,7 +53,6 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
        lib-y += atomic64_386_32.o
endif
else
        obj-y += iomap_copy_64.o
ifneq ($(CONFIG_GENERIC_CSUM),y)
        lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
endif

arch/x86/lib/iomap_copy_64.S

deleted100644 → 0
+0 −15
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Copyright 2006 PathScale, Inc.  All Rights Reserved.
 */

#include <linux/linkage.h>

/*
 * override generic version in lib/iomap_copy.c
 */
SYM_FUNC_START(__iowrite32_copy)
	movl %edx,%ecx
	rep movsl
	RET
SYM_FUNC_END(__iowrite32_copy)
+4 −1
Original line number Diff line number Diff line
@@ -16,7 +16,10 @@
struct device;
struct resource;

__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
#ifndef __iowrite32_copy
void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
#endif

void __ioread32_copy(void *to, const void __iomem *from, size_t count);
void __iowrite64_copy(void __iomem *to, const void *from, size_t count);

+3 −3
Original line number Diff line number Diff line
@@ -16,9 +16,8 @@
 * time.  Order of access is not guaranteed, nor is a memory barrier
 * performed afterwards.
 */
void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
					    const void *from,
					    size_t count)
#ifndef __iowrite32_copy
void __iowrite32_copy(void __iomem *to, const void *from, size_t count)
{
	u32 __iomem *dst = to;
	const u32 *src = from;
@@ -28,6 +27,7 @@ void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
		__raw_writel(*src++, dst++);
}
EXPORT_SYMBOL_GPL(__iowrite32_copy);
#endif

/**
 * __ioread32_copy - copy data from MMIO space, in 32-bit units