Commit 212fcf36 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Joerg Roedel
Browse files

iommu/pages: Move from struct page to struct ioptdesc and folio



This brings the iommu page table allocator into the modern world of having
its own private page descriptor and not re-using fields from struct page
for its own purpose. It follows the basic pattern of struct ptdesc which
did this transformation for the CPU page table allocator.

Currently iommu-pages is pretty basic so this isn't a huge benefit,
however I see a coming need for features that CPU allocator has, like sub
PAGE_SIZE allocations, and RCU freeing. This provides the base
infrastructure to implement those cleanly.

Remove numa_node_id() calls from the inlines and instead use NUMA_NO_NODE
which will get switched to numa_mem_id(), which seems to be the right ID
to use for memory allocations.

Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Tested-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Tested-by: default avatarAlejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/13-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 27bc9f71
Loading
Loading
Loading
Loading
+38 −16
Original line number Diff line number Diff line
@@ -7,6 +7,21 @@
#include <linux/gfp.h>
#include <linux/mm.h>

#define IOPTDESC_MATCH(pg_elm, elm)                    \
	static_assert(offsetof(struct page, pg_elm) == \
		      offsetof(struct ioptdesc, elm))
IOPTDESC_MATCH(flags, __page_flags);
IOPTDESC_MATCH(lru, iopt_freelist_elm); /* Ensure bit 0 is clear */
IOPTDESC_MATCH(mapping, __page_mapping);
IOPTDESC_MATCH(private, _private);
IOPTDESC_MATCH(page_type, __page_type);
IOPTDESC_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
IOPTDESC_MATCH(memcg_data, memcg_data);
#endif
#undef IOPTDESC_MATCH
static_assert(sizeof(struct ioptdesc) <= sizeof(struct page));

/**
 * iommu_alloc_pages_node - Allocate a zeroed page of a given order from
 *                          specific NUMA node
@@ -20,10 +35,17 @@
void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order)
{
	const unsigned long pgcnt = 1UL << order;
	struct page *page;
	struct folio *folio;

	page = alloc_pages_node(nid, gfp | __GFP_ZERO | __GFP_COMP, order);
	if (unlikely(!page))
	/*
	 * __folio_alloc_node() does not handle NUMA_NO_NODE like
	 * alloc_pages_node() did.
	 */
	if (nid == NUMA_NO_NODE)
		nid = numa_mem_id();

	folio = __folio_alloc_node(gfp | __GFP_ZERO, order, nid);
	if (unlikely(!folio))
		return NULL;

	/*
@@ -35,21 +57,21 @@ void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order)
	 * This is necessary for the proper accounting as IOMMU state can be
	 * rather large, i.e. multiple gigabytes in size.
	 */
	mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, pgcnt);
	mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, pgcnt);
	mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, pgcnt);
	lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, pgcnt);

	return page_address(page);
	return folio_address(folio);
}
EXPORT_SYMBOL_GPL(iommu_alloc_pages_node);

static void __iommu_free_page(struct page *page)
static void __iommu_free_desc(struct ioptdesc *iopt)
{
	unsigned int order = folio_order(page_folio(page));
	const unsigned long pgcnt = 1UL << order;
	struct folio *folio = ioptdesc_folio(iopt);
	const unsigned long pgcnt = 1UL << folio_order(folio);

	mod_node_page_state(page_pgdat(page), NR_IOMMU_PAGES, -pgcnt);
	mod_lruvec_page_state(page, NR_SECONDARY_PAGETABLE, -pgcnt);
	put_page(page);
	mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, -pgcnt);
	lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, -pgcnt);
	folio_put(folio);
}

/**
@@ -62,7 +84,7 @@ void iommu_free_pages(void *virt)
{
	if (!virt)
		return;
	__iommu_free_page(virt_to_page(virt));
	__iommu_free_desc(virt_to_ioptdesc(virt));
}
EXPORT_SYMBOL_GPL(iommu_free_pages);

@@ -74,9 +96,9 @@ EXPORT_SYMBOL_GPL(iommu_free_pages);
 */
void iommu_put_pages_list(struct iommu_pages_list *list)
{
	struct page *p, *tmp;
	struct ioptdesc *iopt, *tmp;

	list_for_each_entry_safe(p, tmp, &list->pages, lru)
		__iommu_free_page(p);
	list_for_each_entry_safe(iopt, tmp, &list->pages, iopt_freelist_elm)
		__iommu_free_desc(iopt);
}
EXPORT_SYMBOL_GPL(iommu_put_pages_list);
+40 −3
Original line number Diff line number Diff line
@@ -9,6 +9,43 @@

#include <linux/iommu.h>

/**
 * struct ioptdesc - Memory descriptor for IOMMU page tables
 * @iopt_freelist_elm: List element for a struct iommu_pages_list
 *
 * This struct overlays struct page for now. Do not modify without a good
 * understanding of the issues.
 */
struct ioptdesc {
	unsigned long __page_flags;

	struct list_head iopt_freelist_elm;
	unsigned long __page_mapping;
	pgoff_t __index;
	void *_private;

	unsigned int __page_type;
	atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
	unsigned long memcg_data;
#endif
};

static inline struct ioptdesc *folio_ioptdesc(struct folio *folio)
{
	return (struct ioptdesc *)folio;
}

static inline struct folio *ioptdesc_folio(struct ioptdesc *iopt)
{
	return (struct folio *)iopt;
}

static inline struct ioptdesc *virt_to_ioptdesc(void *virt)
{
	return folio_ioptdesc(virt_to_folio(virt));
}

void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order);
void iommu_free_pages(void *virt);
void iommu_put_pages_list(struct iommu_pages_list *list);
@@ -21,7 +58,7 @@ void iommu_put_pages_list(struct iommu_pages_list *list);
static inline void iommu_pages_list_add(struct iommu_pages_list *list,
					void *virt)
{
	list_add_tail(&virt_to_page(virt)->lru, &list->pages);
	list_add_tail(&virt_to_ioptdesc(virt)->iopt_freelist_elm, &list->pages);
}

/**
@@ -56,7 +93,7 @@ static inline bool iommu_pages_list_empty(struct iommu_pages_list *list)
 */
static inline void *iommu_alloc_pages(gfp_t gfp, int order)
{
	return iommu_alloc_pages_node(numa_node_id(), gfp, order);
	return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, order);
}

/**
@@ -79,7 +116,7 @@ static inline void *iommu_alloc_page_node(int nid, gfp_t gfp)
 */
static inline void *iommu_alloc_page(gfp_t gfp)
{
	return iommu_alloc_pages_node(numa_node_id(), gfp, 0);
	return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, 0);
}

#endif	/* __IOMMU_PAGES_H */