Unverified Commit f3a60882 authored by Christian Brauner's avatar Christian Brauner
Browse files

bdev: open block device as files

Add two new helpers to allow opening block devices as files.
This is not the final infrastructure. This still opens the block device
before opening a struct a file. Until we have removed all references to
struct bdev_handle we can't switch the order:

* Introduce blk_to_file_flags() to translate from block specific to
  flags usable to pen a new file.
* Introduce bdev_file_open_by_{dev,path}().
* Introduce temporary sb_bdev_handle() helper to retrieve a struct
  bdev_handle from a block device file and update places that directly
  reference struct bdev_handle to rely on it.
* Don't count block device openes against the number of open files. A
  bdev_file_open_by_{dev,path}() file is never installed into any
  file descriptor table.

One idea that came to mind was to use kernel_tmpfile_open() which
would require us to pass a path and it would then call do_dentry_open()
going through the regular fops->open::blkdev_open() path. But then we're
back to the problem of routing block specific flags such as
BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste
FMODE_* flags every time we add a new one. With this we can avoid using
a flag bit and we have more leeway in how we open block devices from
bdev_open_by_{dev,path}().

Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.org


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent bac0a9e5
Loading
Loading
Loading
Loading
+97 −4
Original line number Diff line number Diff line
@@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode)
}
EXPORT_SYMBOL(I_BDEV);

struct block_device *file_bdev(struct file *bdev_file)
{
	struct bdev_handle *handle = bdev_file->private_data;
	return handle->bdev;
}
EXPORT_SYMBOL(file_bdev);

static void bdev_write_inode(struct block_device *bdev)
{
	struct inode *inode = bdev->bd_inode;
@@ -368,12 +375,12 @@ static struct file_system_type bd_type = {
};

struct super_block *blockdev_superblock __ro_after_init;
struct vfsmount *blockdev_mnt __ro_after_init;
EXPORT_SYMBOL_GPL(blockdev_superblock);

void __init bdev_cache_init(void)
{
	int err;
	static struct vfsmount *bd_mnt __ro_after_init;

	bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
			0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
@@ -382,10 +389,10 @@ void __init bdev_cache_init(void)
	err = register_filesystem(&bd_type);
	if (err)
		panic("Cannot register bdev pseudo-fs");
	bd_mnt = kern_mount(&bd_type);
	if (IS_ERR(bd_mnt))
	blockdev_mnt = kern_mount(&bd_type);
	if (IS_ERR(blockdev_mnt))
		panic("Cannot create bdev pseudo-fs");
	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
	blockdev_superblock = blockdev_mnt->mnt_sb;   /* For writeback */
}

struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
@@ -911,6 +918,92 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
}
EXPORT_SYMBOL(bdev_open_by_dev);

/*
 * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
 * associated with the floppy driver where it has allowed ioctls if the
 * file was opened for writing, but does not allow reads or writes.
 * Make sure that this quirk is reflected in @f_flags.
 *
 * It can also happen if a block device is opened as O_RDWR | O_WRONLY.
 */
static unsigned blk_to_file_flags(blk_mode_t mode)
{
	unsigned int flags = 0;

	if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
	    (BLK_OPEN_READ | BLK_OPEN_WRITE))
		flags |= O_RDWR;
	else if (mode & BLK_OPEN_WRITE_IOCTL)
		flags |= O_RDWR | O_WRONLY;
	else if (mode & BLK_OPEN_WRITE)
		flags |= O_WRONLY;
	else if (mode & BLK_OPEN_READ)
		flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
	else
		WARN_ON_ONCE(true);

	if (mode & BLK_OPEN_NDELAY)
		flags |= O_NDELAY;

	return flags;
}

struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
				   const struct blk_holder_ops *hops)
{
	struct file *bdev_file;
	struct bdev_handle *handle;
	unsigned int flags;

	handle = bdev_open_by_dev(dev, mode, holder, hops);
	if (IS_ERR(handle))
		return ERR_CAST(handle);

	flags = blk_to_file_flags(mode);
	bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode,
			blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
	if (IS_ERR(bdev_file)) {
		bdev_release(handle);
		return bdev_file;
	}
	ihold(handle->bdev->bd_inode);

	bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
	if (bdev_nowait(handle->bdev))
		bdev_file->f_mode |= FMODE_NOWAIT;

	bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping;
	bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
	bdev_file->private_data = handle;
	return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_dev);

struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
				    void *holder,
				    const struct blk_holder_ops *hops)
{
	struct file *bdev_file;
	dev_t dev;
	int error;

	error = lookup_bdev(path, &dev);
	if (error)
		return ERR_PTR(error);

	bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops);
	if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) {
		struct bdev_handle *handle = bdev_file->private_data;
		if (bdev_read_only(handle->bdev)) {
			fput(bdev_file);
			bdev_file = ERR_PTR(-EACCES);
		}
	}

	return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_path);

/**
 * bdev_open_by_path - open a block device by name
 * @path: path to the block device to open
+1 −1
Original line number Diff line number Diff line
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
		sb->s_mtd = NULL;
	} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
		sync_blockdev(sb->s_bdev);
		bdev_release(sb->s_bdev_handle);
		fput(sb->s_bdev_file);
	}
	kfree(sbi);
}
+1 −1
Original line number Diff line number Diff line
@@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)

	for (i = 0; i < max_devices; i++) {
		if (i == 0)
			FDEV(0).bdev_handle = sbi->sb->s_bdev_handle;
			FDEV(0).bdev_handle = sb_bdev_handle(sbi->sb);
		else if (!RDEV(i).path[0])
			break;

+1 −1
Original line number Diff line number Diff line
@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
	init_waitqueue_head(&log->syncwait);

	set_bit(log_INLINELOG, &log->flag);
	log->bdev_handle = sb->s_bdev_handle;
	log->bdev_handle = sb_bdev_handle(sb);
	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
	    (L2LOGPSIZE - sb->s_blocksize_bits);
+1 −1
Original line number Diff line number Diff line
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK
	if (sb->s_bdev) {
		sync_blockdev(sb->s_bdev);
		bdev_release(sb->s_bdev_handle);
		fput(sb->s_bdev_file);
	}
#endif
}
Loading