Commit 1fa8c4b1 authored by Ard Biesheuvel's avatar Ard Biesheuvel
Browse files

ARM: module: implement support for PC-relative group relocations

Add support for the R_ARM_ALU_PC_Gn_NC and R_ARM_LDR_PC_G2 group
relocations [0] so we can use them in modules. These will be used to
load the current task pointer from a global variable without having to
rely on a literal pool entry to carry the address of this variable,
which may have a significant negative impact on cache utilization for
variables that are used often and in many different places, as each
occurrence will result in a literal pool entry and therefore a line in
the D-cache.

[0] 'ELF for the ARM architecture'
    https://github.com/ARM-software/abi-aa/releases



Acked-by: default avatarLinus Walleij <linus.walleij@linaro.org>
Acked-by: default avatarNicolas Pitre <nico@fluxnic.net>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Tested-by: default avatarMarc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
parent 831a469b
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -61,6 +61,9 @@ typedef struct user_fp elf_fpregset_t;
#define R_ARM_MOVT_ABS		44
#define R_ARM_MOVW_PREL_NC	45
#define R_ARM_MOVT_PREL		46
#define R_ARM_ALU_PC_G0_NC	57
#define R_ARM_ALU_PC_G1_NC	59
#define R_ARM_LDR_PC_G2		63

#define R_ARM_THM_CALL		10
#define R_ARM_THM_JUMP24	30
+85 −0
Original line number Diff line number Diff line
@@ -68,6 +68,42 @@ bool module_exit_section(const char *name)
		strstarts(name, ".ARM.exidx.exit");
}

/*
 * This implements the partitioning algorithm for group relocations as
 * documented in the ARM AArch32 ELF psABI (IHI 0044).
 *
 * A single PC-relative symbol reference is divided in up to 3 add or subtract
 * operations, where the final one could be incorporated into a load/store
 * instruction with immediate offset. E.g.,
 *
 *   ADD	Rd, PC, #...		or	ADD	Rd, PC, #...
 *   ADD	Rd, Rd, #...			ADD	Rd, Rd, #...
 *   LDR	Rd, [Rd, #...]			ADD	Rd, Rd, #...
 *
 * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is
 * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of
 * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for
 * any in-kernel symbol reference (unless module PLTs are being used).
 *
 * The main advantage of this approach over the typical pattern using a literal
 * load is that literal loads may miss in the D-cache, and generally lead to
 * lower cache efficiency for variables that are referenced often from many
 * different places in the code.
 */
static u32 get_group_rem(u32 group, u32 *offset)
{
	u32 val = *offset;
	u32 shift;
	do {
		shift = val ? (31 - __fls(val)) & ~1 : 32;
		*offset = val;
		if (!val)
			break;
		val &= 0xffffff >> shift;
	} while (group--);
	return shift;
}

int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
	       unsigned int relindex, struct module *module)
@@ -82,6 +118,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
		unsigned long loc;
		Elf32_Sym *sym;
		const char *symname;
		u32 shift, group = 1;
		s32 offset;
		u32 tmp;
#ifdef CONFIG_THUMB2_KERNEL
@@ -212,6 +249,54 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
			*(u32 *)loc = __opcode_to_mem_arm(tmp);
			break;

		case R_ARM_ALU_PC_G0_NC:
			group = 0;
			fallthrough;
		case R_ARM_ALU_PC_G1_NC:
			tmp = __mem_to_opcode_arm(*(u32 *)loc);
			offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7);
			if (tmp & BIT(22))
				offset = -offset;
			offset += sym->st_value - loc;
			if (offset < 0) {
				offset = -offset;
				tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode
			} else {
				tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode
			}

			shift = get_group_rem(group, &offset);
			if (shift < 24) {
				offset >>= 24 - shift;
				offset |= (shift + 8) << 7;
			}
			*(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
			break;

		case R_ARM_LDR_PC_G2:
			tmp = __mem_to_opcode_arm(*(u32 *)loc);
			offset = tmp & 0xfff;
			if (~tmp & BIT(23))		// U bit cleared?
				offset = -offset;
			offset += sym->st_value - loc;
			if (offset < 0) {
				offset = -offset;
				tmp &= ~BIT(23);	// clear U bit
			} else {
				tmp |= BIT(23);		// set U bit
			}
			get_group_rem(2, &offset);

			if (offset > 0xfff) {
				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
				       module->name, relindex, i, symname,
				       ELF32_R_TYPE(rel->r_info), loc,
				       sym->st_value);
				return -ENOEXEC;
			}
			*(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
			break;

#ifdef CONFIG_THUMB2_KERNEL
		case R_ARM_THM_CALL:
		case R_ARM_THM_JUMP24: