Commit 4f3d4dd1 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: define the on-disk format for the metadir feature



Define the on-disk layout and feature flags for the metadata inode
directory feature.  Add a xfs_sb_version_hasmetadir for benefit of
xfs_repair, which needs to know where the new end of the superblock
lies.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent ecc8065d
Loading
Loading
Loading
Loading
+83 −12
Original line number Diff line number Diff line
@@ -174,6 +174,8 @@ typedef struct xfs_sb {
	xfs_lsn_t	sb_lsn;		/* last write sequence */
	uuid_t		sb_meta_uuid;	/* metadata file system unique id */

	xfs_ino_t	sb_metadirino;	/* metadata directory tree root */

	/* must be padded to 64 bit alignment */
} xfs_sb_t;

@@ -259,6 +261,8 @@ struct xfs_dsb {
	__be64		sb_lsn;		/* last write sequence */
	uuid_t		sb_meta_uuid;	/* metadata file system unique id */

	__be64		sb_metadirino;	/* metadata directory tree root */

	/* must be padded to 64 bit alignment */
};

@@ -374,6 +378,7 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_NREXT64	(1 << 5)  /* large extent counters */
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE	(1 << 6)  /* exchangerange supported */
#define XFS_SB_FEAT_INCOMPAT_PARENT	(1 << 7)  /* parent pointers */
#define XFS_SB_FEAT_INCOMPAT_METADIR	(1 << 8)  /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ALL \
		(XFS_SB_FEAT_INCOMPAT_FTYPE | \
		 XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -790,6 +795,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
	return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
}

enum xfs_metafile_type {
	XFS_METAFILE_UNKNOWN,		/* unknown */
	XFS_METAFILE_DIR,		/* metadir directory */
	XFS_METAFILE_USRQUOTA,		/* user quota */
	XFS_METAFILE_GRPQUOTA,		/* group quota */
	XFS_METAFILE_PRJQUOTA,		/* project quota */
	XFS_METAFILE_RTBITMAP,		/* rt bitmap */
	XFS_METAFILE_RTSUMMARY,		/* rt summary */

	XFS_METAFILE_MAX
} __packed;

#define XFS_METAFILE_TYPE_STR \
	{ XFS_METAFILE_UNKNOWN,		"unknown" }, \
	{ XFS_METAFILE_DIR,		"dir" }, \
	{ XFS_METAFILE_USRQUOTA,	"usrquota" }, \
	{ XFS_METAFILE_GRPQUOTA,	"grpquota" }, \
	{ XFS_METAFILE_PRJQUOTA,	"prjquota" }, \
	{ XFS_METAFILE_RTBITMAP,	"rtbitmap" }, \
	{ XFS_METAFILE_RTSUMMARY,	"rtsummary" }

/*
 * On-disk inode structure.
 *
@@ -812,7 +838,7 @@ struct xfs_dinode {
	__be16		di_mode;	/* mode and type of file */
	__u8		di_version;	/* inode version */
	__u8		di_format;	/* format of di_c data */
	__be16		di_onlink;	/* old number of links to file */
	__be16		di_metatype;	/* XFS_METAFILE_*; was di_onlink */
	__be32		di_uid;		/* owner's user id */
	__be32		di_gid;		/* owner's group id */
	__be32		di_nlink;	/* number of links to file */
@@ -1088,21 +1114,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 * Values for di_flags2 These start by being exposed to userspace in the upper
 * 16 bits of the XFS_XFLAG_s range.
 */
#define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
#define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
#define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
#define XFS_DIFLAG2_BIGTIME_BIT	3	/* big timestamps */
#define XFS_DIFLAG2_NREXT64_BIT 4	/* large extent counters */
/* use DAX for this inode */
#define XFS_DIFLAG2_DAX_BIT		0

/* file's blocks may be shared */
#define XFS_DIFLAG2_REFLINK_BIT		1

/* copy on write extent size hint */
#define XFS_DIFLAG2_COWEXTSIZE_BIT	2

/* big timestamps */
#define XFS_DIFLAG2_BIGTIME_BIT		3

/* large extent counters */
#define XFS_DIFLAG2_NREXT64_BIT		4

/*
 * The inode contains filesystem metadata and can be found through the metadata
 * directory tree.  Metadata inodes must satisfy the following constraints:
 *
 * - V5 filesystem (and ftype) are enabled;
 * - The only valid modes are regular files and directories;
 * - The access bits must be zero;
 * - DMAPI event and state masks are zero;
 * - The user and group IDs must be zero;
 * - The project ID can be used as a u32 annotation;
 * - The immutable, sync, noatime, nodump, nodefrag flags must be set.
 * - The dax flag must not be set.
 * - Directories must have nosymlinks set.
 *
 * These requirements are chosen defensively to minimize the ability of
 * userspace to read or modify the contents, should a metadata file ever
 * escape to userspace.
 *
 * There are further constraints on the directory tree itself:
 *
 * - Metadata inodes must never be resolvable through the root directory;
 * - They must never be accessed by userspace;
 * - Metadata directory entries must have correct ftype.
 *
 * Superblock-rooted metadata files must have the METADATA iflag set even
 * though they do not have a parent directory.
 */
#define XFS_DIFLAG2_METADATA_BIT	5

#define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
#define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
#define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
#define XFS_DIFLAG2_BIGTIME	(1 << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_NREXT64	(1 << XFS_DIFLAG2_NREXT64_BIT)
#define XFS_DIFLAG2_DAX		(1ULL << XFS_DIFLAG2_DAX_BIT)
#define XFS_DIFLAG2_REFLINK	(1ULL << XFS_DIFLAG2_REFLINK_BIT)
#define XFS_DIFLAG2_COWEXTSIZE	(1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
#define XFS_DIFLAG2_BIGTIME	(1ULL << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_NREXT64	(1ULL << XFS_DIFLAG2_NREXT64_BIT)
#define XFS_DIFLAG2_METADATA	(1ULL << XFS_DIFLAG2_METADATA_BIT)

#define XFS_DIFLAG2_ANY \
	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
	 XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
	 XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA)

static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -1117,6 +1182,12 @@ static inline bool xfs_dinode_has_large_extent_counts(
	       (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64));
}

static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
{
	return dip->di_version >= 3 &&
	       (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA));
}

/*
 * Inode number format:
 * low inopblog bits - offset in block
+15 −5
Original line number Diff line number Diff line
@@ -209,12 +209,15 @@ xfs_inode_from_disk(
	 * They will also be unconditionally written back to disk as v2 inodes.
	 */
	if (unlikely(from->di_version == 1)) {
		set_nlink(inode, be16_to_cpu(from->di_onlink));
		/* di_metatype used to be di_onlink */
		set_nlink(inode, be16_to_cpu(from->di_metatype));
		ip->i_projid = 0;
	} else {
		set_nlink(inode, be32_to_cpu(from->di_nlink));
		ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
					be16_to_cpu(from->di_projid_lo);
		if (xfs_dinode_is_metadir(from))
			ip->i_metatype = be16_to_cpu(from->di_metatype);
	}

	i_uid_write(inode, be32_to_cpu(from->di_uid));
@@ -315,7 +318,10 @@ xfs_inode_to_disk(
	struct inode		*inode = VFS_I(ip);

	to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
	to->di_onlink = 0;
	if (xfs_is_metadir_inode(ip))
		to->di_metatype = cpu_to_be16(ip->i_metatype);
	else
		to->di_metatype = 0;

	to->di_format = xfs_ifork_format(&ip->i_df);
	to->di_uid = cpu_to_be32(i_uid_read(inode));
@@ -523,8 +529,11 @@ xfs_dinode_verify(
	 * di_nlink==0 on a V1 inode.  V2/3 inodes would get written out with
	 * di_onlink==0, so we can check that.
	 */
	if (dip->di_version >= 2) {
		if (dip->di_onlink)
	if (dip->di_version == 2) {
		if (dip->di_metatype)
			return __this_address;
	} else if (dip->di_version >= 3) {
		if (!xfs_dinode_is_metadir(dip) && dip->di_metatype)
			return __this_address;
	}

@@ -546,7 +555,8 @@ xfs_dinode_verify(
			if (dip->di_nlink)
				return __this_address;
		} else {
			if (dip->di_onlink)
			/* di_metatype used to be di_onlink */
			if (dip->di_metatype)
				return __this_address;
		}
	}
+2 −0
Original line number Diff line number Diff line
@@ -224,6 +224,8 @@ xfs_inode_inherit_flags2(
	}
	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
		ip->i_diflags2 |= XFS_DIFLAG2_DAX;
	if (xfs_is_metadir_inode(pip))
		ip->i_diflags2 |= XFS_DIFLAG2_METADATA;

	/* Don't let invalid cowextsize hints propagate. */
	failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+1 −1
Original line number Diff line number Diff line
@@ -404,7 +404,7 @@ struct xfs_log_dinode {
	uint16_t	di_mode;	/* mode and type of file */
	int8_t		di_version;	/* inode version */
	int8_t		di_format;	/* format of di_c data */
	uint8_t		di_pad3[2];	/* unused in v2/3 inodes */
	uint16_t	di_metatype;	/* metadata type, if DIFLAG2_METADATA */
	uint32_t	di_uid;		/* owner's user id */
	uint32_t	di_gid;		/* owner's group id */
	uint32_t	di_nlink;	/* number of links to file */
+1 −1
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ xfs_check_ondisk_structs(void)
	XFS_CHECK_STRUCT_SIZE(struct xfs_dinode,		176);
	XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot,		104);
	XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk,			136);
	XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,			264);
	XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,			272);
	XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,		56);
	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,		4);
	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,		16);
Loading