Commit 10e9510a authored by Sami Tolvanen's avatar Sami Tolvanen Committed by Masahiro Yamada
Browse files

gendwarfksyms: Add a separate pass to resolve FQNs



Using dwarf_getscopes_die to resolve fully-qualified names turns out to
be rather slow, and also results in duplicate scopes being processed,
which doesn't help. Simply adding an extra pass to resolve names for all
DIEs before processing exports is noticeably faster.

For the object files with the most exports in a defconfig+Rust build,
the performance improvement is consistently >50%:

rust/bindings.o: 1038 exports
    before: 9.5980 +- 0.0183 seconds time elapsed  ( +-  0.19% )
     after: 4.3116 +- 0.0287 seconds time elapsed  ( +-  0.67% )

rust/core.o: 424 exports
    before: 5.3584 +- 0.0204 seconds time elapsed  ( +-  0.38% )
     after: 0.05348 +- 0.00129 seconds time elapsed  ( +-  2.42% )
            ^ Not a mistake.

net/core/dev.o: 190 exports
    before: 9.0507 +- 0.0297 seconds time elapsed  ( +-  0.33% )
     after: 3.2882 +- 0.0165 seconds time elapsed  ( +-  0.50% )

rust/kernel.o: 129 exports
    before: 6.8571 +- 0.0317 seconds time elapsed  ( +-  0.46% )
     after: 2.9096 +- 0.0316 seconds time elapsed  ( +-  1.09% )

net/core/skbuff.o: 120 exports
    before: 5.4805 +- 0.0291 seconds time elapsed  ( +-  0.53% )
     after: 2.0339 +- 0.0231 seconds time elapsed  ( +-  1.14% )

drivers/gpu/drm/display/drm_dp_helper.o: 101 exports
    before: 1.7877 +- 0.0187 seconds time elapsed  ( +-  1.05% )
     after: 0.69245 +- 0.00994 seconds time elapsed  ( +-  1.44% )

net/core/sock.o: 97 exports
    before: 5.8327 +- 0.0653 seconds time elapsed  ( +-  1.12% )
     after: 2.0784 +- 0.0291 seconds time elapsed  ( +-  1.40% )

drivers/net/phy/phy_device.o: 95 exports
    before: 3.0671 +- 0.0371 seconds time elapsed  ( +-  1.21% )
     after: 1.2127 +- 0.0207 seconds time elapsed  ( +-  1.70% )

drivers/pci/pci.o: 93 exports
    before: 1.1130 +- 0.0113 seconds time elapsed  ( +-  1.01% )
     after: 0.4848 +- 0.0127 seconds time elapsed  ( +-  2.63% )

kernel/sched/core.o: 83 exports
    before: 3.5092 +- 0.0223 seconds time elapsed  ( +-  0.64% )
     after: 1.1231 +- 0.0145 seconds time elapsed  ( +-  1.29% )

Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8%
faster with this patch applied on my test system. Without Rust, there's
still a 10.4% improvement in build time when gendwarfksyms is used.

Note that symbol versions are unchanged with this patch.

Suggested-by: default avatarGiuliano Procida <gprocida@google.com>
Signed-off-by: default avatarSami Tolvanen <samitolvanen@google.com>
Signed-off-by: default avatarMasahiro Yamada <masahiroy@kernel.org>
parent 80e54e84
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
#include <string.h>
#include "gendwarfksyms.h"

#define DIE_HASH_BITS 15
#define DIE_HASH_BITS 16

/* {die->addr, state} -> struct die * */
static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
+82 −70
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
 * Copyright (C) 2024 Google LLC
 */

#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
@@ -193,79 +194,17 @@ static void process_fmt(struct die *cache, const char *fmt, ...)
	va_end(args);
}

#define MAX_FQN_SIZE 64

/* Get a fully qualified name from DWARF scopes */
static char *get_fqn(Dwarf_Die *die)
static void update_fqn(struct die *cache, Dwarf_Die *die)
{
	const char *list[MAX_FQN_SIZE];
	Dwarf_Die *scopes = NULL;
	bool has_name = false;
	char *fqn = NULL;
	char *p;
	int count = 0;
	int len = 0;
	int res;
	int i;

	res = checkp(dwarf_getscopes_die(die, &scopes));
	if (!res) {
		list[count] = get_name_attr(die);

		if (!list[count])
			return NULL;

		len += strlen(list[count]);
		count++;

		goto done;
	}

	for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
		if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
			continue;

		list[count] = get_name_attr(&scopes[i]);

		if (list[count]) {
			has_name = true;
		} else {
			list[count] = "<anonymous>";
			has_name = false;
		}

		len += strlen(list[count]);
		count++;

		if (i > 0) {
			list[count++] = "::";
			len += 2;
		}
	}

	free(scopes);

	if (count == MAX_FQN_SIZE)
		warn("increase MAX_FQN_SIZE: reached the maximum");
	struct die *fqn;

	/* Consider the DIE unnamed if the last scope doesn't have a name */
	if (!has_name)
		return NULL;
done:
	fqn = xmalloc(len + 1);
	*fqn = '\0';

	p = fqn;
	for (i = 0; i < count; i++)
		p = stpcpy(p, list[i]);

	return fqn;
	if (!cache->fqn) {
		if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) &&
		    *fqn->fqn)
			cache->fqn = xstrdup(fqn->fqn);
		else
			cache->fqn = "";
	}

static void update_fqn(struct die *cache, Dwarf_Die *die)
{
	if (!cache->fqn)
		cache->fqn = get_fqn(die) ?: "";
}

static void process_fqn(struct die *cache, Dwarf_Die *die)
@@ -1148,8 +1087,81 @@ static void process_symbol_ptr(struct symbol *sym, void *arg)
	cache_free(&state.expansion_cache);
}

static int resolve_fqns(struct state *parent, struct die *unused,
			Dwarf_Die *die)
{
	struct state state;
	struct die *cache;
	const char *name;
	bool use_prefix;
	char *prefix = NULL;
	char *fqn = "";
	int tag;

	if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache))
		return 0;

	tag = dwarf_tag(die);

	/*
	 * Only namespaces and structures need to pass a prefix to the next
	 * scope.
	 */
	use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type ||
		     tag == DW_TAG_structure_type;

	state.expand.current_fqn = NULL;
	name = get_name_attr(die);

	if (parent && parent->expand.current_fqn && (use_prefix || name)) {
		/*
		 * The fqn for the current DIE, and if needed, a prefix for the
		 * next scope.
		 */
		if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn,
			     name ? name : "<anonymous>") < 0)
			error("asprintf failed");

		if (use_prefix)
			state.expand.current_fqn = prefix;

		/*
		 * Use fqn only if the DIE has a name. Otherwise fqn will
		 * remain empty.
		 */
		if (name) {
			fqn = prefix;
			/* prefix will be freed by die_map. */
			prefix = NULL;
		}
	} else if (name) {
		/* No prefix from the previous scope. Use only the name. */
		fqn = xstrdup(name);

		if (use_prefix)
			state.expand.current_fqn = fqn;
	}

	/* If the DIE has a non-empty name, cache it. */
	if (*fqn) {
		cache = die_map_get(die, DIE_FQN);
		/* Move ownership of fqn to die_map. */
		cache->fqn = fqn;
		cache->state = DIE_FQN;
	}

	check(process_die_container(&state, NULL, die, resolve_fqns,
				    match_all));

	free(prefix);
	return 0;
}

void process_cu(Dwarf_Die *cudie)
{
	check(process_die_container(NULL, NULL, cudie, resolve_fqns,
				    match_all));

	check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
				    match_all));

+2 −0
Original line number Diff line number Diff line
@@ -139,6 +139,7 @@ void symbol_free(void);

enum die_state {
	DIE_INCOMPLETE,
	DIE_FQN,
	DIE_UNEXPANDED,
	DIE_COMPLETE,
	DIE_SYMBOL,
@@ -170,6 +171,7 @@ static inline const char *die_state_name(enum die_state state)
{
	switch (state) {
	CASE_CONST_TO_STR(DIE_INCOMPLETE)
	CASE_CONST_TO_STR(DIE_FQN)
	CASE_CONST_TO_STR(DIE_UNEXPANDED)
	CASE_CONST_TO_STR(DIE_COMPLETE)
	CASE_CONST_TO_STR(DIE_SYMBOL)
+1 −1
Original line number Diff line number Diff line
@@ -248,7 +248,7 @@ static char *get_type_name(struct die *cache)
		warn("found incomplete cache entry: %p", cache);
		return NULL;
	}
	if (cache->state == DIE_SYMBOL)
	if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN)
		return NULL;
	if (!cache->fqn || !*cache->fqn)
		return NULL;