tools lib: Adopt list_sort() from the kernel sources (92ec3cc9) · Commits · git / linux-net

tools/include/linux/list_sort.h

0 → 100644

+14 −0

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0 */
		#ifndef _LINUX_LIST_SORT_H
		#define _LINUX_LIST_SORT_H

		#include <linux/types.h>

		struct list_head;

		typedef int __attribute__((nonnull(2,3))) (list_cmp_func_t)(void ,
		const struct list_head , const struct list_head );

		__attribute__((nonnull(2,3)))
		void list_sort(void priv, struct list_head head, list_cmp_func_t cmp);
		#endif

tools/lib/list_sort.c

0 → 100644

+252 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0
		#include <linux/kernel.h>
		#include <linux/compiler.h>
		#include <linux/export.h>
		#include <linux/string.h>
		#include <linux/list_sort.h>
		#include <linux/list.h>

		/*
		* Returns a list organized in an intermediate format suited
		* to chaining of merge() calls: null-terminated, no reserved or
		* sentinel head node, "prev" links not maintained.
		*/
		__attribute__((nonnull(2,3,4)))
		static struct list_head merge(void priv, list_cmp_func_t cmp,
		struct list_head a, struct list_head b)
		{
		struct list_head head, *tail = &head;

		for (;;) {
		/* if equal, take 'a' -- important for sort stability */
		if (cmp(priv, a, b) <= 0) {
		*tail = a;
		tail = &a->next;
		a = a->next;
		if (!a) {
		*tail = b;
		break;
		}
		} else {
		*tail = b;
		tail = &b->next;
		b = b->next;
		if (!b) {
		*tail = a;
		break;
		}
		}
		}
		return head;
		}

		/*
		* Combine final list merge with restoration of standard doubly-linked
		* list structure. This approach duplicates code from merge(), but
		* runs faster than the tidier alternatives of either a separate final
		* prev-link restoration pass, or maintaining the prev links
		* throughout.
		*/
		__attribute__((nonnull(2,3,4,5)))
		static void merge_final(void priv, list_cmp_func_t cmp, struct list_head head,
		struct list_head a, struct list_head b)
		{
		struct list_head *tail = head;
		u8 count = 0;

		for (;;) {
		/* if equal, take 'a' -- important for sort stability */
		if (cmp(priv, a, b) <= 0) {
		tail->next = a;
		a->prev = tail;
		tail = a;
		a = a->next;
		if (!a)
		break;
		} else {
		tail->next = b;
		b->prev = tail;
		tail = b;
		b = b->next;
		if (!b) {
		b = a;
		break;
		}
		}
		}

		/* Finish linking remainder of list b on to tail */
		tail->next = b;
		do {
		/*
		* If the merge is highly unbalanced (e.g. the input is
		* already sorted), this loop may run many iterations.
		* Continue callbacks to the client even though no
		* element comparison is needed, so the client's cmp()
		* routine can invoke cond_resched() periodically.
		*/
		if (unlikely(!++count))
		cmp(priv, b, b);
		b->prev = tail;
		tail = b;
		b = b->next;
		} while (b);

		/* And the final links to make a circular doubly-linked list */
		tail->next = head;
		head->prev = tail;
		}

		/**
		* list_sort - sort a list
		* @priv: private data, opaque to list_sort(), passed to @cmp
		* @head: the list to sort
		* @cmp: the elements comparison function
		*
		* The comparison function @cmp must return > 0 if @a should sort after
		* @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
		* sort before @b or their original order should be preserved. It is
		* always called with the element that came first in the input in @a,
		* and list_sort is a stable sort, so it is not necessary to distinguish
		* the @a < @b and @a == @b cases.
		*
		* This is compatible with two styles of @cmp function:
		* - The traditional style which returns <0 / =0 / >0, or
		* - Returning a boolean 0/1.
		* The latter offers a chance to save a few cycles in the comparison
		* (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
		*
		* A good way to write a multi-word comparison is::
		*
		* if (a->high != b->high)
		* return a->high > b->high;
		* if (a->middle != b->middle)
		* return a->middle > b->middle;
		* return a->low > b->low;
		*
		*
		* This mergesort is as eager as possible while always performing at least
		* 2:1 balanced merges. Given two pending sublists of size 2^k, they are
		* merged to a size-2^(k+1) list as soon as we have 2^k following elements.
		*
		* Thus, it will avoid cache thrashing as long as 3*2^k elements can
		* fit into the cache. Not quite as good as a fully-eager bottom-up
		* mergesort, but it does use 0.2*n fewer comparisons, so is faster in
		* the common case that everything fits into L1.
		*
		*
		* The merging is controlled by "count", the number of elements in the
		* pending lists. This is beautifully simple code, but rather subtle.
		*
		* Each time we increment "count", we set one bit (bit k) and clear
		* bits k-1 .. 0. Each time this happens (except the very first time
		* for each bit, when count increments to 2^k), we merge two lists of
		* size 2^k into one list of size 2^(k+1).
		*
		* This merge happens exactly when the count reaches an odd multiple of
		* 2^k, which is when we have 2^k elements pending in smaller lists,
		* so it's safe to merge away two lists of size 2^k.
		*
		* After this happens twice, we have created two lists of size 2^(k+1),
		* which will be merged into a list of size 2^(k+2) before we create
		* a third list of size 2^(k+1), so there are never more than two pending.
		*
		* The number of pending lists of size 2^k is determined by the
		* state of bit k of "count" plus two extra pieces of information:
		*
		* - The state of bit k-1 (when k == 0, consider bit -1 always set), and
		* - Whether the higher-order bits are zero or non-zero (i.e.
		* is count >= 2^(k+1)).
		*
		* There are six states we distinguish. "x" represents some arbitrary
		* bits, and "y" represents some arbitrary non-zero bits:
		* 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k
		* 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
		* 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k
		* 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
		* 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k
		* 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
		* (merge and loop back to state 2)
		*
		* We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
		* bit k-1 is set while the more significant bits are non-zero) and
		* merge them away in the 5->2 transition. Note in particular that just
		* before the 5->2 transition, all lower-order bits are 11 (state 3),
		* so there is one list of each smaller size.
		*
		* When we reach the end of the input, we merge all the pending
		* lists, from smallest to largest. If you work through cases 2 to
		* 5 above, you can see that the number of elements we merge with a list
		* of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
		* 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
		*/
		__attribute__((nonnull(2,3)))
		void list_sort(void priv, struct list_head head, list_cmp_func_t cmp)
		{
		struct list_head list = head->next, pending = NULL;
		size_t count = 0; /* Count of pending */

		if (list == head->prev) /* Zero or one elements */
		return;

		/* Convert to a null-terminated singly-linked list. */
		head->prev->next = NULL;

		/*
		* Data structure invariants:
		* - All lists are singly linked and null-terminated; prev
		* pointers are not maintained.
		* - pending is a prev-linked "list of lists" of sorted
		* sublists awaiting further merging.
		* - Each of the sorted sublists is power-of-two in size.
		* - Sublists are sorted by size and age, smallest & newest at front.
		* - There are zero to two sublists of each size.
		* - A pair of pending sublists are merged as soon as the number
		* of following pending elements equals their size (i.e.
		* each time count reaches an odd multiple of that size).
		* That ensures each later final merge will be at worst 2:1.
		* - Each round consists of:
		* - Merging the two sublists selected by the highest bit
		* which flips when count is incremented, and
		* - Adding an element from the input as a size-1 sublist.
		*/
		do {
		size_t bits;
		struct list_head **tail = &pending;

		/* Find the least-significant clear bit in count */
		for (bits = count; bits & 1; bits >>= 1)
		tail = &(*tail)->prev;
		/* Do the indicated merge */
		if (likely(bits)) {
		struct list_head a = tail, *b = a->prev;

		a = merge(priv, cmp, b, a);
		/* Install the merged result in place of the inputs */
		a->prev = b->prev;
		*tail = a;
		}

		/* Move one element from input list to pending */
		list->prev = pending;
		pending = list;
		list = list->next;
		pending->next = NULL;
		count++;
		} while (list);

		/* End of input; merge together all the pending lists. */
		list = pending;
		pending = pending->prev;
		for (;;) {
		struct list_head *next = pending->prev;

		if (!next)
		break;
		list = merge(priv, cmp, pending, list);
		pending = next;
		}
		/* The final merge, rebuilding prev links */
		merge_final(priv, cmp, head, pending, list);
		}
		EXPORT_SYMBOL(list_sort);

tools/perf/MANIFEST

+1 −0

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@ tools/lib/symbol/kallsyms.c
		tools/lib/symbol/kallsyms.h
		tools/lib/find_bit.c
		tools/lib/bitmap.c
		tools/lib/list_sort.c
		tools/lib/str_error_r.c
		tools/lib/vsprintf.c
		tools/lib/zalloc.c

tools/perf/check-headers.sh

+2 −0

Original line number	Diff line number	Diff line
		@@ -26,6 +26,7 @@ include/vdso/bits.h
		include/linux/const.h
		include/vdso/const.h
		include/linux/hash.h
		include/linux/list-sort.h
		include/uapi/linux/hw_breakpoint.h
		arch/x86/include/asm/disabled-features.h
		arch/x86/include/asm/required-features.h
		@@ -150,6 +151,7 @@ check include/uapi/linux/mman.h '-I "^#include <$uapi/$*asm/mman.h>"'
		check include/linux/build_bug.h '-I "^#$ifndef\\|endif$$ \/\/$* static_assert$"'
		check include/linux/ctype.h '-I "isdigit("'
		check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
		check lib/list_sort.c '-I "^#include <linux/bug.h>"'

		# diff non-symmetric files
		check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl

tools/perf/util/Build

+5 −0

Original line number	Diff line number	Diff line
		@@ -138,6 +138,7 @@ perf-y += expr.o
		perf-y += branch.o
		perf-y += mem2node.o
		perf-y += clockid.o
		perf-y += list_sort.o

		perf-$(CONFIG_LIBBPF) += bpf-loader.o
		perf-$(CONFIG_LIBBPF) += bpf_map.o
		@@ -315,3 +316,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
		$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
		$(call rule_mkdir)
		$(call if_changed_dep,cc_o_c)

		$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
		$(call rule_mkdir)
		$(call if_changed_dep,cc_o_c)