Commit 097e8db5 authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton
Browse files

mm: update hugetlbfs to use VMA flags on mmap_prepare

In order to update all mmap_prepare users to utilising the new VMA flags
type vma_flags_t and associated helper functions, we start by updating
hugetlbfs which has a lot of additional logic that requires updating to
make this change.

This is laying the groundwork for eliminating the vm_flags_t from struct
vm_area_desc and using vma_flags_t only, which further lays the ground for
removing the deprecated vm_flags_t type altogether.

No functional changes intended.

Link: https://lkml.kernel.org/r/9226bec80c9aa3447cc2b83354f733841dba8a50.1769097829.git.lorenzo.stoakes@oracle.com


Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Yury Norov <ynorov@nvidia.com>
Cc: Chris Mason <clm@fb.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent bae0ba7c
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -109,7 +109,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
	loff_t len, vma_len;
	int ret;
	struct hstate *h = hstate_file(file);
	vm_flags_t vm_flags;
	vma_flags_t vma_flags;

	/*
	 * vma address alignment (but not the pgoff alignment) has
@@ -119,7 +119,7 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
	 * way when do_mmap unwinds (may be important on powerpc
	 * and ia64).
	 */
	desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
	vma_desc_set_flags(desc, VMA_HUGETLB_BIT, VMA_DONTEXPAND_BIT);
	desc->vm_ops = &hugetlb_vm_ops;

	/*
@@ -148,23 +148,23 @@ static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)

	ret = -ENOMEM;

	vm_flags = desc->vm_flags;
	vma_flags = desc->vma_flags;
	/*
	 * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
	 * reserving here. Note: only for SHM hugetlbfs file, the inode
	 * flag S_PRIVATE is set.
	 */
	if (inode->i_flags & S_PRIVATE)
		vm_flags |= VM_NORESERVE;
		vma_flags_set(&vma_flags, VMA_NORESERVE_BIT);

	if (hugetlb_reserve_pages(inode,
			desc->pgoff >> huge_page_order(h),
			len >> huge_page_shift(h), desc,
			vm_flags) < 0)
			vma_flags) < 0)
		goto out;

	ret = 0;
	if ((desc->vm_flags & VM_WRITE) && inode->i_size < len)
	if (vma_desc_test_flags(desc, VMA_WRITE_BIT) && inode->i_size < len)
		i_size_write(inode, len);
out:
	inode_unlock(inode);
@@ -1527,7 +1527,7 @@ static int get_hstate_idx(int page_size_log)
 * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
 */
struct file *hugetlb_file_setup(const char *name, size_t size,
				vm_flags_t acctflag, int creat_flags,
				vma_flags_t acctflag, int creat_flags,
				int page_size_log)
{
	struct inode *inode;
+3 −3
Original line number Diff line number Diff line
@@ -150,7 +150,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
			     struct folio **foliop);
#endif /* CONFIG_USERFAULTFD */
long hugetlb_reserve_pages(struct inode *inode, long from, long to,
			   struct vm_area_desc *desc, vm_flags_t vm_flags);
			   struct vm_area_desc *desc, vma_flags_t vma_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
						long freed);
bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list);
@@ -529,7 +529,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
}

extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct file *hugetlb_file_setup(const char *name, size_t size, vma_flags_t acct,
				int creat_flags, int page_size_log);

static inline bool is_file_hugepages(const struct file *file)
@@ -545,7 +545,7 @@ static inline struct hstate *hstate_inode(struct inode *i)

#define is_file_hugepages(file)			false
static inline struct file *
hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
hugetlb_file_setup(const char *name, size_t size, vma_flags_t acctflag,
		int creat_flags, int page_size_log)
{
	return ERR_PTR(-ENOSYS);
+10 −0
Original line number Diff line number Diff line
@@ -11,6 +11,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
	return !!(vm_flags & VM_HUGETLB);
}

static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
	return vma_flags_test(flags, VMA_HUGETLB_BIT);
}

#else

static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
@@ -18,6 +23,11 @@ static inline bool is_vm_hugetlb_flags(vm_flags_t vm_flags)
	return false;
}

static inline bool is_vma_hugetlb_flags(const vma_flags_t *flags)
{
	return false;
}

#endif

static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
+7 −5
Original line number Diff line number Diff line
@@ -707,9 +707,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
	int error;
	struct shmid_kernel *shp;
	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
	const bool has_no_reserve = shmflg & SHM_NORESERVE;
	struct file *file;
	char name[13];
	vm_flags_t acctflag = 0;

	if (size < SHMMIN || size > ns->shm_ctlmax)
		return -EINVAL;
@@ -738,6 +738,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)

	sprintf(name, "SYSV%08x", key);
	if (shmflg & SHM_HUGETLB) {
		vma_flags_t acctflag = EMPTY_VMA_FLAGS;
		struct hstate *hs;
		size_t hugesize;

@@ -749,17 +750,18 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
		hugesize = ALIGN(size, huge_page_size(hs));

		/* hugetlb_file_setup applies strict accounting */
		if (shmflg & SHM_NORESERVE)
			acctflag = VM_NORESERVE;
		if (has_no_reserve)
			vma_flags_set(&acctflag, VMA_NORESERVE_BIT);
		file = hugetlb_file_setup(name, hugesize, acctflag,
				HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
	} else {
		vm_flags_t acctflag = 0;

		/*
		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
		 * if it's asked for.
		 */
		if  ((shmflg & SHM_NORESERVE) &&
				sysctl_overcommit_memory != OVERCOMMIT_NEVER)
		if  (has_no_reserve && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
			acctflag = VM_NORESERVE;
		file = shmem_kernel_file_setup(name, size, acctflag);
	}
+11 −11
Original line number Diff line number Diff line
@@ -1193,16 +1193,16 @@ static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)

static void set_vma_desc_resv_map(struct vm_area_desc *desc, struct resv_map *map)
{
	VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
	VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
	VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));

	desc->private_data = map;
}

static void set_vma_desc_resv_flags(struct vm_area_desc *desc, unsigned long flags)
{
	VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
	VM_WARN_ON_ONCE(desc->vm_flags & VM_MAYSHARE);
	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));
	VM_WARN_ON_ONCE(vma_desc_test_flags(desc, VMA_MAYSHARE_BIT));

	desc->private_data = (void *)((unsigned long)desc->private_data | flags);
}
@@ -1216,7 +1216,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)

static bool is_vma_desc_resv_set(struct vm_area_desc *desc, unsigned long flag)
{
	VM_WARN_ON_ONCE(!is_vm_hugetlb_flags(desc->vm_flags));
	VM_WARN_ON_ONCE(!is_vma_hugetlb_flags(&desc->vma_flags));

	return ((unsigned long)desc->private_data) & flag;
}
@@ -6571,7 +6571,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
long hugetlb_reserve_pages(struct inode *inode,
		long from, long to,
		struct vm_area_desc *desc,
		vm_flags_t vm_flags)
		vma_flags_t vma_flags)
{
	long chg = -1, add = -1, spool_resv, gbl_resv;
	struct hstate *h = hstate_inode(inode);
@@ -6592,7 +6592,7 @@ long hugetlb_reserve_pages(struct inode *inode,
	 * attempt will be made for VM_NORESERVE to allocate a page
	 * without using reserves
	 */
	if (vm_flags & VM_NORESERVE)
	if (vma_flags_test(&vma_flags, VMA_NORESERVE_BIT))
		return 0;

	/*
@@ -6601,7 +6601,7 @@ long hugetlb_reserve_pages(struct inode *inode,
	 * to reserve the full area even if read-only as mprotect() may be
	 * called to make the mapping read-write. Assume !desc is a shm mapping
	 */
	if (!desc || desc->vm_flags & VM_MAYSHARE) {
	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
		/*
		 * resv_map can not be NULL as hugetlb_reserve_pages is only
		 * called for inodes for which resv_maps were created (see
@@ -6635,7 +6635,7 @@ long hugetlb_reserve_pages(struct inode *inode,
	if (err < 0)
		goto out_err;

	if (desc && !(desc->vm_flags & VM_MAYSHARE) && h_cg) {
	if (desc && !vma_desc_test_flags(desc, VMA_MAYSHARE_BIT) && h_cg) {
		/* For private mappings, the hugetlb_cgroup uncharge info hangs
		 * of the resv_map.
		 */
@@ -6672,7 +6672,7 @@ long hugetlb_reserve_pages(struct inode *inode,
	 * consumed reservations are stored in the map. Hence, nothing
	 * else has to be done for private mappings here
	 */
	if (!desc || desc->vm_flags & VM_MAYSHARE) {
	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT)) {
		add = region_add(resv_map, from, to, regions_needed, h, h_cg);

		if (unlikely(add < 0)) {
@@ -6727,7 +6727,7 @@ long hugetlb_reserve_pages(struct inode *inode,
	hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
					    chg * pages_per_huge_page(h), h_cg);
out_err:
	if (!desc || desc->vm_flags & VM_MAYSHARE)
	if (!desc || vma_desc_test_flags(desc, VMA_MAYSHARE_BIT))
		/* Only call region_abort if the region_chg succeeded but the
		 * region_add failed or didn't run.
		 */
Loading