Commit dd161f74 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'trace-sorttable-v6.15' of...

Merge tag 'trace-sorttable-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing / sorttable updates from Steven Rostedt:

 - Implement arm64 build time sorting of the mcount location table

   When gcc is used to build arm64, the mcount_loc section is all zeros
   in the vmlinux elf file. The addresses are stored in the Elf_Rela
   location.

   To sort at build time, an array is allocated and the addresses are
   added to it via the content of the mcount_loc section as well as he
   Elf_Rela data. After sorting, the information is put back into the
   Elf_Rela which now has the section sorted.

 - Make sorting of mcount location table for arm64 work with clang as
   well

   When clang is used, the mcount_loc section contains the addresses,
   unlike the gcc build. An array is still created and the sorting works
   for both methods.

 - Remove weak functions from the mcount_loc section

   Have the sorttable code pass in the data of functions defined via
   'nm -S' which shows the functions as well as their sizes. Using this
   information the sorttable code can determine if a function in the
   mcount_loc section was weak and overridden. If the function is not
   found, it is set to be zero. On boot, when the mcount_loc section is
   read and the ftrace table is created, if the address in the
   mcount_loc is not in the kernel core text then it is removed and not
   added to the ftrace_filter_functions (the functions that can be
   attached by ftrace callbacks).

 - Update and fix the reporting of how much data is used for ftrace
   functions

   On boot, a report of how many pages were used by the ftrace table as
   well as how they were grouped (the table holds a list of sections
   that are groups of pages that were able to be allocated). The
   removing of the weak functions required the accounting to be updated.

* tag 'trace-sorttable-v6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  scripts/sorttable: Allow matches to functions before function entry
  scripts/sorttable: Use normal sort if theres no relocs in the mcount section
  ftrace: Check against is_kernel_text() instead of kaslr_offset()
  ftrace: Test mcount_loc addr before calling ftrace_call_addr()
  ftrace: Have ftrace pages output reflect freed pages
  ftrace: Update the mcount_loc check of skipped entries
  scripts/sorttable: Zero out weak functions in mcount_loc table
  scripts/sorttable: Always use an array for the mcount_loc sorting
  scripts/sorttable: Have mcount rela sort use direct values
  arm64: scripts/sorttable: Implement sorting mcount_loc at boot for arm64
parents 5c2a430e dc208c69
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -218,6 +218,7 @@ config ARM64
		if DYNAMIC_FTRACE_WITH_ARGS
	select HAVE_SAMPLE_FTRACE_DIRECT
	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
	select HAVE_BUILDTIME_MCOUNT_SORT
	select HAVE_EFFICIENT_UNALIGNED_ACCESS
	select HAVE_GUP_FAST
	select HAVE_FTRACE_GRAPH_FUNC
+50 −5
Original line number Diff line number Diff line
@@ -7016,6 +7016,7 @@ static int ftrace_process_locs(struct module *mod,
	unsigned long *p;
	unsigned long addr;
	unsigned long flags = 0; /* Shut up gcc */
	unsigned long pages;
	int ret = -ENOMEM;

	count = end - start;
@@ -7023,6 +7024,8 @@ static int ftrace_process_locs(struct module *mod,
	if (!count)
		return 0;

	pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);

	/*
	 * Sorting mcount in vmlinux at build time depend on
	 * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
@@ -7067,7 +7070,9 @@ static int ftrace_process_locs(struct module *mod,
	pg = start_pg;
	while (p < end) {
		unsigned long end_offset;
		addr = ftrace_call_adjust(*p++);

		addr = *p++;

		/*
		 * Some architecture linkers will pad between
		 * the different mcount_loc sections of different
@@ -7079,6 +7084,19 @@ static int ftrace_process_locs(struct module *mod,
			continue;
		}

		/*
		 * If this is core kernel, make sure the address is in core
		 * or inittext, as weak functions get zeroed and KASLR can
		 * move them to something other than zero. It just will not
		 * move it to an area where kernel text is.
		 */
		if (!mod && !(is_kernel_text(addr) || is_kernel_inittext(addr))) {
			skipped++;
			continue;
		}

		addr = ftrace_call_adjust(addr);

		end_offset = (pg->index+1) * sizeof(pg->records[0]);
		if (end_offset > PAGE_SIZE << pg->order) {
			/* We should have allocated enough */
@@ -7118,11 +7136,41 @@ static int ftrace_process_locs(struct module *mod,

	/* We should have used all pages unless we skipped some */
	if (pg_unuse) {
		WARN_ON(!skipped);
		unsigned long pg_remaining, remaining = 0;
		unsigned long skip;

		/* Count the number of entries unused and compare it to skipped. */
		pg_remaining = (ENTRIES_PER_PAGE << pg->order) - pg->index;

		if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) {

			skip = skipped - pg_remaining;

			for (pg = pg_unuse; pg; pg = pg->next)
				remaining += 1 << pg->order;

			pages -= remaining;

			skip = DIV_ROUND_UP(skip, ENTRIES_PER_PAGE);

			/*
			 * Check to see if the number of pages remaining would
			 * just fit the number of entries skipped.
			 */
			WARN(skip != remaining, "Extra allocated pages for ftrace: %lu with %lu skipped",
			     remaining, skipped);
		}
		/* Need to synchronize with ftrace_location_range() */
		synchronize_rcu();
		ftrace_free_pages(pg_unuse);
	}

	if (!mod) {
		count -= skipped;
		pr_info("ftrace: allocating %ld entries in %ld pages\n",
			count, pages);
	}

	return ret;
}

@@ -7768,9 +7816,6 @@ void __init ftrace_init(void)
		goto failed;
	}

	pr_info("ftrace: allocating %ld entries in %ld pages\n",
		count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));

	ret = ftrace_process_locs(NULL,
				  __start_mcount_loc,
				  __stop_mcount_loc);
+3 −1
Original line number Diff line number Diff line
@@ -173,12 +173,14 @@ mksysmap()

sorttable()
{
	${objtree}/scripts/sorttable ${1}
	${NM} -S ${1} > .tmp_vmlinux.nm-sort
	${objtree}/scripts/sorttable -s .tmp_vmlinux.nm-sort ${1}
}

cleanup()
{
	rm -f .btf.*
	rm -f .tmp_vmlinux.nm-sort
	rm -f System.map
	rm -f vmlinux
	rm -f vmlinux.map
+403 −8
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
@@ -79,10 +80,16 @@ typedef union {
	Elf64_Sym	e64;
} Elf_Sym;

typedef union {
	Elf32_Rela	e32;
	Elf64_Rela	e64;
} Elf_Rela;

static uint32_t (*r)(const uint32_t *);
static uint16_t (*r2)(const uint16_t *);
static uint64_t (*r8)(const uint64_t *);
static void (*w)(uint32_t, uint32_t *);
static void (*w8)(uint64_t, uint64_t *);
typedef void (*table_sort_t)(char *, int);

static struct elf_funcs {
@@ -102,6 +109,10 @@ static struct elf_funcs {
	uint32_t (*sym_name)(Elf_Sym *sym);
	uint64_t (*sym_value)(Elf_Sym *sym);
	uint16_t (*sym_shndx)(Elf_Sym *sym);
	uint64_t (*rela_offset)(Elf_Rela *rela);
	uint64_t (*rela_info)(Elf_Rela *rela);
	uint64_t (*rela_addend)(Elf_Rela *rela);
	void (*rela_write_addend)(Elf_Rela *rela, uint64_t val);
} e;

static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr)
@@ -262,6 +273,38 @@ SYM_ADDR(value)
SYM_WORD(name)
SYM_HALF(shndx)

#define __maybe_unused			__attribute__((__unused__))

#define RELA_ADDR(fn_name)					\
static uint64_t rela64_##fn_name(Elf_Rela *rela)		\
{								\
	return r8((uint64_t *)&rela->e64.r_##fn_name);		\
}								\
								\
static uint64_t rela32_##fn_name(Elf_Rela *rela)		\
{								\
	return r((uint32_t *)&rela->e32.r_##fn_name);		\
}								\
								\
static uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela)	\
{								\
	return e.rela_##fn_name(rela);				\
}

RELA_ADDR(offset)
RELA_ADDR(info)
RELA_ADDR(addend)

static void rela64_write_addend(Elf_Rela *rela, uint64_t val)
{
	w8(val, (uint64_t *)&rela->e64.r_addend);
}

static void rela32_write_addend(Elf_Rela *rela, uint64_t val)
{
	w(val, (uint32_t *)&rela->e32.r_addend);
}

/*
 * Get the whole file as a programming convenience in order to avoid
 * malloc+lseek+read+free of many pieces.  If successful, then mmap
@@ -341,6 +384,16 @@ static void wle(uint32_t val, uint32_t *x)
	put_unaligned_le32(val, x);
}

static void w8be(uint64_t val, uint64_t *x)
{
	put_unaligned_be64(val, x);
}

static void w8le(uint64_t val, uint64_t *x)
{
	put_unaligned_le64(val, x);
}

/*
 * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of
 * the way to -256..-1, to avoid conflicting with real section
@@ -398,13 +451,12 @@ static inline void *get_index(void *start, int entsize, int index)
static int extable_ent_size;
static int long_size;

#define ERRSTR_MAXSZ	256

#ifdef UNWINDER_ORC_ENABLED
/* ORC unwinder only support X86_64 */
#include <asm/orc_types.h>

#define ERRSTR_MAXSZ	256

static char g_err[ERRSTR_MAXSZ];
static int *g_orc_ip_table;
static struct orc_entry *g_orc_table;
@@ -499,7 +551,136 @@ static void *sort_orctable(void *arg)
#endif

#ifdef MCOUNT_SORT_ENABLED

static int compare_values_64(const void *a, const void *b)
{
	uint64_t av = *(uint64_t *)a;
	uint64_t bv = *(uint64_t *)b;

	if (av < bv)
		return -1;
	return av > bv;
}

static int compare_values_32(const void *a, const void *b)
{
	uint32_t av = *(uint32_t *)a;
	uint32_t bv = *(uint32_t *)b;

	if (av < bv)
		return -1;
	return av > bv;
}

static int (*compare_values)(const void *a, const void *b);

/* Only used for sorting mcount table */
static void rela_write_addend(Elf_Rela *rela, uint64_t val)
{
	e.rela_write_addend(rela, val);
}

struct func_info {
	uint64_t	addr;
	uint64_t	size;
};

/* List of functions created by: nm -S vmlinux */
static struct func_info *function_list;
static int function_list_size;

/* Allocate functions in 1k blocks */
#define FUNC_BLK_SIZE	1024
#define FUNC_BLK_MASK	(FUNC_BLK_SIZE - 1)

static int add_field(uint64_t addr, uint64_t size)
{
	struct func_info *fi;
	int fsize = function_list_size;

	if (!(fsize & FUNC_BLK_MASK)) {
		fsize += FUNC_BLK_SIZE;
		fi = realloc(function_list, fsize * sizeof(struct func_info));
		if (!fi)
			return -1;
		function_list = fi;
	}
	fi = &function_list[function_list_size++];
	fi->addr = addr;
	fi->size = size;
	return 0;
}

/* Used for when mcount/fentry is before the function entry */
static int before_func;

/* Only return match if the address lies inside the function size */
static int cmp_func_addr(const void *K, const void *A)
{
	uint64_t key = *(const uint64_t *)K;
	const struct func_info *a = A;

	if (key + before_func < a->addr)
		return -1;
	return key >= a->addr + a->size;
}

/* Find the function in function list that is bounded by the function size */
static int find_func(uint64_t key)
{
	return bsearch(&key, function_list, function_list_size,
		       sizeof(struct func_info), cmp_func_addr) != NULL;
}

static int cmp_funcs(const void *A, const void *B)
{
	const struct func_info *a = A;
	const struct func_info *b = B;

	if (a->addr < b->addr)
		return -1;
	return a->addr > b->addr;
}

static int parse_symbols(const char *fname)
{
	FILE *fp;
	char addr_str[20]; /* Only need 17, but round up to next int size */
	char size_str[20];
	char type;

	fp = fopen(fname, "r");
	if (!fp) {
		perror(fname);
		return -1;
	}

	while (fscanf(fp, "%16s %16s %c %*s\n", addr_str, size_str, &type) == 3) {
		uint64_t addr;
		uint64_t size;

		/* Only care about functions */
		if (type != 't' && type != 'T' && type != 'W')
			continue;

		addr = strtoull(addr_str, NULL, 16);
		size = strtoull(size_str, NULL, 16);
		if (add_field(addr, size) < 0)
			return -1;
	}
	fclose(fp);

	qsort(function_list, function_list_size, sizeof(struct func_info), cmp_funcs);

	return 0;
}

static pthread_t mcount_sort_thread;
static bool sort_reloc;

static long rela_type;

static char m_err[ERRSTR_MAXSZ];

struct elf_mcount_loc {
	Elf_Ehdr *ehdr;
@@ -508,17 +689,197 @@ struct elf_mcount_loc {
	uint64_t stop_mcount_loc;
};

/* Fill the array with the content of the relocs */
static int fill_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
{
	Elf_Shdr *shdr_start;
	Elf_Rela *rel;
	unsigned int shnum;
	unsigned int count = 0;
	int shentsize;
	void *array_end = ptr + size;

	shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
	shentsize = ehdr_shentsize(ehdr);

	shnum = ehdr_shnum(ehdr);
	if (shnum == SHN_UNDEF)
		shnum = shdr_size(shdr_start);

	for (int i = 0; i < shnum; i++) {
		Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
		void *end;

		if (shdr_type(shdr) != SHT_RELA)
			continue;

		rel = (void *)ehdr + shdr_offset(shdr);
		end = (void *)rel + shdr_size(shdr);

		for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
			uint64_t offset = rela_offset(rel);

			if (offset >= start_loc && offset < start_loc + size) {
				if (ptr + long_size > array_end) {
					snprintf(m_err, ERRSTR_MAXSZ,
						 "Too many relocations");
					return -1;
				}

				/* Make sure this has the correct type */
				if (rela_info(rel) != rela_type) {
					snprintf(m_err, ERRSTR_MAXSZ,
						"rela has type %lx but expected %lx\n",
						(long)rela_info(rel), rela_type);
					return -1;
				}

				if (long_size == 4)
					*(uint32_t *)ptr = rela_addend(rel);
				else
					*(uint64_t *)ptr = rela_addend(rel);
				ptr += long_size;
				count++;
			}
		}
	}
	return count;
}

/* Put the sorted vals back into the relocation elements */
static void replace_relocs(void *ptr, uint64_t size, Elf_Ehdr *ehdr, uint64_t start_loc)
{
	Elf_Shdr *shdr_start;
	Elf_Rela *rel;
	unsigned int shnum;
	int shentsize;

	shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr));
	shentsize = ehdr_shentsize(ehdr);

	shnum = ehdr_shnum(ehdr);
	if (shnum == SHN_UNDEF)
		shnum = shdr_size(shdr_start);

	for (int i = 0; i < shnum; i++) {
		Elf_Shdr *shdr = get_index(shdr_start, shentsize, i);
		void *end;

		if (shdr_type(shdr) != SHT_RELA)
			continue;

		rel = (void *)ehdr + shdr_offset(shdr);
		end = (void *)rel + shdr_size(shdr);

		for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) {
			uint64_t offset = rela_offset(rel);

			if (offset >= start_loc && offset < start_loc + size) {
				if (long_size == 4)
					rela_write_addend(rel, *(uint32_t *)ptr);
				else
					rela_write_addend(rel, *(uint64_t *)ptr);
				ptr += long_size;
			}
		}
	}
}

static int fill_addrs(void *ptr, uint64_t size, void *addrs)
{
	void *end = ptr + size;
	int count = 0;

	for (; ptr < end; ptr += long_size, addrs += long_size, count++) {
		if (long_size == 4)
			*(uint32_t *)ptr = r(addrs);
		else
			*(uint64_t *)ptr = r8(addrs);
	}
	return count;
}

static void replace_addrs(void *ptr, uint64_t size, void *addrs)
{
	void *end = ptr + size;

	for (; ptr < end; ptr += long_size, addrs += long_size) {
		if (long_size == 4)
			w(*(uint32_t *)ptr, addrs);
		else
			w8(*(uint64_t *)ptr, addrs);
	}
}

/* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */
static void *sort_mcount_loc(void *arg)
{
	struct elf_mcount_loc *emloc = (struct elf_mcount_loc *)arg;
	uint64_t offset = emloc->start_mcount_loc - shdr_addr(emloc->init_data_sec)
					+ shdr_offset(emloc->init_data_sec);
	uint64_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc;
	uint64_t size = emloc->stop_mcount_loc - emloc->start_mcount_loc;
	unsigned char *start_loc = (void *)emloc->ehdr + offset;
	Elf_Ehdr *ehdr = emloc->ehdr;
	void *e_msg = NULL;
	void *vals;
	int count;

	qsort(start_loc, count/long_size, long_size, compare_extable);
	return NULL;
	vals = malloc(long_size * size);
	if (!vals) {
		snprintf(m_err, ERRSTR_MAXSZ, "Failed to allocate sort array");
		pthread_exit(m_err);
	}

	if (sort_reloc) {
		count = fill_relocs(vals, size, ehdr, emloc->start_mcount_loc);
		/* gcc may use relocs to save the addresses, but clang does not. */
		if (!count) {
			count = fill_addrs(vals, size, start_loc);
			sort_reloc = 0;
		}
	} else
		count = fill_addrs(vals, size, start_loc);

	if (count < 0) {
		e_msg = m_err;
		goto out;
	}

	if (count != size / long_size) {
		snprintf(m_err, ERRSTR_MAXSZ, "Expected %u mcount elements but found %u\n",
			(int)(size / long_size), count);
		e_msg = m_err;
		goto out;
	}

	/* zero out any locations not found by function list */
	if (function_list_size) {
		for (void *ptr = vals; ptr < vals + size; ptr += long_size) {
			uint64_t key;

			key = long_size == 4 ? r((uint32_t *)ptr) : r8((uint64_t *)ptr);
			if (!find_func(key)) {
				if (long_size == 4)
					*(uint32_t *)ptr = 0;
				else
					*(uint64_t *)ptr = 0;
			}
		}
	}

	compare_values = long_size == 4 ? compare_values_32 : compare_values_64;

	qsort(vals, count, long_size, compare_values);

	if (sort_reloc)
		replace_relocs(vals, size, ehdr, emloc->start_mcount_loc);
	else
		replace_addrs(vals, size, start_loc);

out:
	free(vals);

	pthread_exit(e_msg);
}

/* Get the address of __start_mcount_loc and __stop_mcount_loc in System.map */
@@ -555,6 +916,8 @@ static void get_mcount_loc(struct elf_mcount_loc *emloc, Elf_Shdr *symtab_sec,
		return;
	}
}
#else /* MCOUNT_SORT_ENABLED */
static inline int parse_symbols(const char *fname) { return 0; }
#endif

static int do_sort(Elf_Ehdr *ehdr,
@@ -866,12 +1229,14 @@ static int do_file(char const *const fname, void *addr)
		r2	= r2le;
		r8	= r8le;
		w	= wle;
		w8	= w8le;
		break;
	case ELFDATA2MSB:
		r	= rbe;
		r2	= r2be;
		r8	= r8be;
		w	= wbe;
		w8	= w8be;
		break;
	default:
		fprintf(stderr, "unrecognized ELF data encoding %d: %s\n",
@@ -887,8 +1252,15 @@ static int do_file(char const *const fname, void *addr)
	}

	switch (r2(&ehdr->e32.e_machine)) {
	case EM_386:
	case EM_AARCH64:
#ifdef MCOUNT_SORT_ENABLED
		sort_reloc = true;
		rela_type = 0x403;
		/* arm64 uses patchable function entry placing before function */
		before_func = 8;
#endif
		/* fallthrough */
	case EM_386:
	case EM_LOONGARCH:
	case EM_RISCV:
	case EM_S390:
@@ -932,6 +1304,10 @@ static int do_file(char const *const fname, void *addr)
			.sym_name		= sym32_name,
			.sym_value		= sym32_value,
			.sym_shndx		= sym32_shndx,
			.rela_offset		= rela32_offset,
			.rela_info		= rela32_info,
			.rela_addend		= rela32_addend,
			.rela_write_addend	= rela32_write_addend,
		};

		e = efuncs;
@@ -965,6 +1341,10 @@ static int do_file(char const *const fname, void *addr)
			.sym_name		= sym64_name,
			.sym_value		= sym64_value,
			.sym_shndx		= sym64_shndx,
			.rela_offset		= rela64_offset,
			.rela_info		= rela64_info,
			.rela_addend		= rela64_addend,
			.rela_write_addend	= rela64_write_addend,
		};

		e = efuncs;
@@ -995,14 +1375,29 @@ int main(int argc, char *argv[])
	int i, n_error = 0;  /* gcc-4.3.0 false positive complaint */
	size_t size = 0;
	void *addr = NULL;
	int c;

	while ((c = getopt(argc, argv, "s:")) >= 0) {
		switch (c) {
		case 's':
			if (parse_symbols(optarg) < 0) {
				fprintf(stderr, "Could not parse %s\n", optarg);
				return -1;
			}
			break;
		default:
			fprintf(stderr, "usage: sorttable [-s nm-file] vmlinux...\n");
			return 0;
		}
	}

	if (argc < 2) {
	if ((argc - optind) < 1) {
		fprintf(stderr, "usage: sorttable vmlinux...\n");
		return 0;
	}

	/* Process each file in turn, allowing deep failure. */
	for (i = 1; i < argc; i++) {
	for (i = optind; i < argc; i++) {
		addr = mmap_file(argv[i], &size);
		if (!addr) {
			++n_error;