Commit b3efacc4 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Joerg Roedel
Browse files

iommu/pages: Allow sub page sizes to be passed into the allocator



Generally drivers have a specific idea what their HW structure size should
be. In a lot of cases this is related to PAGE_SIZE, but not always. ARM64,
for example, allows a 4K IO page table size on a 64K CPU page table
system.

Currently we don't have any good support for sub page allocations, but
make the API accommodate this by accepting a sub page size from the caller
and rounding up internally.

This is done by moving away from order as the size input and using size:
  size == 1 << (order + PAGE_SHIFT)

Following patches convert drivers away from using order and try to specify
allocation sizes independent of PAGE_SIZE.

Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Tested-by: default avatarAlejandro Jimenez <alejandro.j.jimenez@oracle.com>
Tested-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/15-v4-c8663abbb606+3f7-iommu_pages_jgg@nvidia.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 580ccca4
Loading
Loading
Loading
Loading
+19 −10
Original line number Diff line number Diff line
@@ -23,24 +23,32 @@ IOPTDESC_MATCH(memcg_data, memcg_data);
static_assert(sizeof(struct ioptdesc) <= sizeof(struct page));

/**
 * iommu_alloc_pages_node - Allocate a zeroed page of a given order from
 * iommu_alloc_pages_node_sz - Allocate a zeroed page of a given size from
 *                             specific NUMA node
 * @nid: memory NUMA node id
 * @gfp: buddy allocator flags
 * @order: page order
 * @size: Memory size to allocate, rounded up to a power of 2
 *
 * Returns the virtual address of the allocated page. The page must be
 * freed either by calling iommu_free_pages() or via iommu_put_pages_list().
 * Returns the virtual address of the allocated page. The page must be freed
 * either by calling iommu_free_pages() or via iommu_put_pages_list(). The
 * returned allocation is round_up_pow_two(size) big, and is physically aligned
 * to its size.
 */
void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order)
void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size)
{
	const unsigned long pgcnt = 1UL << order;
	unsigned long pgcnt;
	struct folio *folio;
	unsigned int order;

	/* This uses page_address() on the memory. */
	if (WARN_ON(gfp & __GFP_HIGHMEM))
		return NULL;

	/*
	 * Currently sub page allocations result in a full page being returned.
	 */
	order = get_order(size);

	/*
	 * __folio_alloc_node() does not handle NUMA_NO_NODE like
	 * alloc_pages_node() did.
@@ -61,12 +69,13 @@ void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order)
	 * This is necessary for the proper accounting as IOMMU state can be
	 * rather large, i.e. multiple gigabytes in size.
	 */
	pgcnt = 1UL << order;
	mod_node_page_state(folio_pgdat(folio), NR_IOMMU_PAGES, pgcnt);
	lruvec_stat_mod_folio(folio, NR_SECONDARY_PAGETABLE, pgcnt);

	return folio_address(folio);
}
EXPORT_SYMBOL_GPL(iommu_alloc_pages_node);
EXPORT_SYMBOL_GPL(iommu_alloc_pages_node_sz);

static void __iommu_free_desc(struct ioptdesc *iopt)
{
@@ -82,7 +91,7 @@ static void __iommu_free_desc(struct ioptdesc *iopt)
 * iommu_free_pages - free pages
 * @virt: virtual address of the page to be freed.
 *
 * The page must have have been allocated by iommu_alloc_pages_node()
 * The page must have have been allocated by iommu_alloc_pages_node_sz()
 */
void iommu_free_pages(void *virt)
{
@@ -96,7 +105,7 @@ EXPORT_SYMBOL_GPL(iommu_free_pages);
 * iommu_put_pages_list - free a list of pages.
 * @list: The list of pages to be freed
 *
 * Frees a list of pages allocated by iommu_alloc_pages_node().
 * Frees a list of pages allocated by iommu_alloc_pages_node_sz().
 */
void iommu_put_pages_list(struct iommu_pages_list *list)
{
+39 −5
Original line number Diff line number Diff line
@@ -46,14 +46,14 @@ static inline struct ioptdesc *virt_to_ioptdesc(void *virt)
	return folio_ioptdesc(virt_to_folio(virt));
}

void *iommu_alloc_pages_node(int nid, gfp_t gfp, unsigned int order);
void *iommu_alloc_pages_node_sz(int nid, gfp_t gfp, size_t size);
void iommu_free_pages(void *virt);
void iommu_put_pages_list(struct iommu_pages_list *list);

/**
 * iommu_pages_list_add - add the page to a iommu_pages_list
 * @list: List to add the page to
 * @virt: Address returned from iommu_alloc_pages_node()
 * @virt: Address returned from iommu_alloc_pages_node_sz()
 */
static inline void iommu_pages_list_add(struct iommu_pages_list *list,
					void *virt)
@@ -84,16 +84,48 @@ static inline bool iommu_pages_list_empty(struct iommu_pages_list *list)
	return list_empty(&list->pages);
}

/**
 * iommu_alloc_pages_node - Allocate a zeroed page of a given order from
 *                          specific NUMA node
 * @nid: memory NUMA node id
 * @gfp: buddy allocator flags
 * @order: page order
 *
 * Returns the virtual address of the allocated page.
 * Prefer to use iommu_alloc_pages_node_lg2()
 */
static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp,
					   unsigned int order)
{
	return iommu_alloc_pages_node_sz(nid, gfp, 1 << (order + PAGE_SHIFT));
}

/**
 * iommu_alloc_pages - allocate a zeroed page of a given order
 * @gfp: buddy allocator flags
 * @order: page order
 *
 * returns the virtual address of the allocated page
 * Prefer to use iommu_alloc_pages_lg2()
 */
static inline void *iommu_alloc_pages(gfp_t gfp, int order)
{
	return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, order);
	return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp,
					 1 << (order + PAGE_SHIFT));
}

/**
 * iommu_alloc_pages_sz - Allocate a zeroed page of a given size from
 *                          specific NUMA node
 * @nid: memory NUMA node id
 * @gfp: buddy allocator flags
 * @size: Memory size to allocate, this is rounded up to a power of 2
 *
 * Returns the virtual address of the allocated page.
 */
static inline void *iommu_alloc_pages_sz(gfp_t gfp, size_t size)
{
	return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, size);
}

/**
@@ -102,10 +134,11 @@ static inline void *iommu_alloc_pages(gfp_t gfp, int order)
 * @gfp: buddy allocator flags
 *
 * returns the virtual address of the allocated page
 * Prefer to use iommu_alloc_pages_node_lg2()
 */
static inline void *iommu_alloc_page_node(int nid, gfp_t gfp)
{
	return iommu_alloc_pages_node(nid, gfp, 0);
	return iommu_alloc_pages_node_sz(nid, gfp, PAGE_SIZE);
}

/**
@@ -113,10 +146,11 @@ static inline void *iommu_alloc_page_node(int nid, gfp_t gfp)
 * @gfp: buddy allocator flags
 *
 * returns the virtual address of the allocated page
 * Prefer to use iommu_alloc_pages_lg2()
 */
static inline void *iommu_alloc_page(gfp_t gfp)
{
	return iommu_alloc_pages_node(NUMA_NO_NODE, gfp, 0);
	return iommu_alloc_pages_node_sz(NUMA_NO_NODE, gfp, PAGE_SIZE);
}

#endif	/* __IOMMU_PAGES_H */
+3 −3
Original line number Diff line number Diff line
@@ -342,9 +342,9 @@ typedef unsigned int ioasid_t;
#define IOMMU_DIRTY_NO_CLEAR (1 << 0)

/*
 * Pages allocated through iommu_alloc_pages_node() can be placed on this list
 * using iommu_pages_list_add(). Note: ONLY pages from iommu_alloc_pages_node()
 * can be used this way!
 * Pages allocated through iommu_alloc_pages_node_sz() can be placed on this
 * list using iommu_pages_list_add(). Note: ONLY pages from
 * iommu_alloc_pages_node_sz() can be used this way!
 */
struct iommu_pages_list {
	struct list_head pages;