Merge tag 'zonefs-6.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs (efb0b63a) · Commits · git / linux-nf

fs/zonefs/file.c

+27 −15

Original line number	Diff line number	Diff line
		@@ -348,6 +348,11 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
		struct zonefs_inode_info *zi = ZONEFS_I(inode);

		if (error) {
		/*
		* For Sync IOs, error recovery is called from
		* zonefs_file_dio_write().
		*/
		if (!is_sync_kiocb(iocb))
		zonefs_io_error(inode, true);
		return error;
		}
		@@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb iocb, struct iov_iter from)
		ret = -EINVAL;
		goto inode_unlock;
		}
		/*
		* Advance the zone write pointer offset. This assumes that the
		* IO will succeed, which is OK to do because we do not allow
		* partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
		* fails, the error path will correct the write pointer offset.
		*/
		z->z_wpoffset += count;
		zonefs_inode_account_active(inode);
		mutex_unlock(&zi->i_truncate_mutex);
		}

		@@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb iocb, struct iov_iter from)
		if (ret == -ENOTBLK)
		ret = -EBUSY;

		if (zonefs_zone_is_seq(z) &&
		(ret > 0 \|\| ret == -EIOCBQUEUED)) {
		if (ret > 0)
		count = ret;

		/*
		* Update the zone write pointer offset assuming the write
		* operation succeeded. If it did not, the error recovery path
		* will correct it. Also do active seq file accounting.
		* For a failed IO or partial completion, trigger error recovery
		* to update the zone write pointer offset to a correct value.
		* For asynchronous IOs, zonefs_file_write_dio_end_io() may already
		* have executed error recovery if the IO already completed when we
		* reach here. However, we cannot know that and execute error recovery
		* again (that will not change anything).
		*/
		mutex_lock(&zi->i_truncate_mutex);
		z->z_wpoffset += count;
		zonefs_inode_account_active(inode);
		mutex_unlock(&zi->i_truncate_mutex);
		if (zonefs_zone_is_seq(z)) {
		if (ret > 0 && ret != count)
		ret = -EIO;
		if (ret < 0 && ret != -EIOCBQUEUED)
		zonefs_io_error(inode, true);
		}

		inode_unlock:

fs/zonefs/super.c

+38 −28

Original line number	Diff line number	Diff line
		@@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
		z->z_mode = inode->i_mode;
		}

		struct zonefs_ioerr_data {
		struct inode *inode;
		bool write;
		};

		static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
		void *data)
		{
		struct zonefs_ioerr_data *err = data;
		struct inode *inode = err->inode;
		struct blk_zone *z = data;

		z = zone;
		return 0;
		}

		static void zonefs_handle_io_error(struct inode inode, struct blk_zone zone,
		bool write)
		{
		struct zonefs_zone *z = zonefs_inode_zone(inode);
		struct super_block *sb = inode->i_sb;
		struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
		@@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
		data_size = zonefs_check_zone_condition(sb, z, zone);
		isize = i_size_read(inode);
		if (!(z->z_flags & (ZONEFS_ZONE_READONLY \| ZONEFS_ZONE_OFFLINE)) &&
		!err->write && isize == data_size)
		return 0;
		!write && isize == data_size)
		return;

		/*
		* At this point, we detected either a bad zone or an inconsistency
		@@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
		* In all cases, warn about inode size inconsistency and handle the
		* IO error according to the zone condition and to the mount options.
		*/
		if (zonefs_zone_is_seq(z) && isize != data_size)
		if (isize != data_size)
		zonefs_warn(sb,
		"inode %lu: invalid size %lld (should be %lld)\n",
		inode->i_ino, isize, data_size);
		@@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
		zonefs_i_size_write(inode, data_size);
		z->z_wpoffset = data_size;
		zonefs_inode_account_active(inode);

		return 0;
		}

		/*
		@@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
		{
		struct zonefs_zone *z = zonefs_inode_zone(inode);
		struct super_block *sb = inode->i_sb;
		struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
		unsigned int noio_flag;
		unsigned int nr_zones = 1;
		struct zonefs_ioerr_data err = {
		.inode = inode,
		.write = write,
		};
		struct blk_zone zone;
		int ret;

		/*
		* The only files that have more than one zone are conventional zone
		* files with aggregated conventional zones, for which the inode zone
		* size is always larger than the device zone size.
		* Conventional zone have no write pointer and cannot become read-only
		* or offline. So simply fake a report for a single or aggregated zone
		* and let zonefs_handle_io_error() correct the zone inode information
		* according to the mount options.
		*/
		if (z->z_size > bdev_zone_sectors(sb->s_bdev))
		nr_zones = z->z_size >>
		(sbi->s_zone_sectors_shift + SECTOR_SHIFT);
		if (!zonefs_zone_is_seq(z)) {
		zone.start = z->z_sector;
		zone.len = z->z_size >> SECTOR_SHIFT;
		zone.wp = zone.start + zone.len;
		zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
		zone.cond = BLK_ZONE_COND_NOT_WP;
		zone.capacity = zone.len;
		goto handle_io_error;
		}

		/*
		* Memory allocations in blkdev_report_zones() can trigger a memory
		@@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
		* the GFP_NOIO context avoids both problems.
		*/
		noio_flag = memalloc_noio_save();
		ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
		zonefs_io_error_cb, &err);
		if (ret != nr_zones)
		ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
		zonefs_io_error_cb, &zone);
		memalloc_noio_restore(noio_flag);

		if (ret != 1) {
		zonefs_err(sb, "Get inode %lu zone information failed %d\n",
		inode->i_ino, ret);
		memalloc_noio_restore(noio_flag);
		zonefs_warn(sb, "remounting filesystem read-only\n");
		sb->s_flags \|= SB_RDONLY;
		return;
		}

		handle_io_error:
		zonefs_handle_io_error(inode, &zone, write);
		}

		static struct kmem_cache *zonefs_inode_cachep;