Commit aa60fe12 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: zlib: refactor S390x HW acceleration buffer preparation



Currently for s390x HW zlib compression, to get the best performance we
need a buffer size which is larger than a page.

This means we need to copy multiple pages into workspace->buf, then use
that buffer as zlib compression input.

Currently it's hardcoded using page sized folio, and all the handling
are deep inside a loop.

Refactor the code by:

- Introduce a dedicated helper to do the buffer copy
  The new helper will be called copy_data_into_buffer().

- Add extra ASSERT()s
  * Make sure we only go into the function for hardware acceleration
  * Make sure we still get page sized folio

- Prepare for future large folios
  This means we will rely on the folio size, other than PAGE_SIZE to do
  the copy.

- Handle the folio mapping and unmapping inside the helper function
  For S390x hardware acceleration case, it never utilize the @data_in
  pointer, thus we can do folio mapping/unmapping all inside the function.

Acked-by: default avatarMikhail Zaslonko <zaslonko@linux.ibm.com>
Tested-by: default avatarMikhail Zaslonko <zaslonko@linux.ibm.com>
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 87d6aaf7
Loading
Loading
Loading
Loading
+55 −29
Original line number Diff line number Diff line
@@ -94,6 +94,47 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
	return ERR_PTR(-ENOMEM);
}

/*
 * Helper for S390x with hardware zlib compression support.
 *
 * That hardware acceleration requires a buffer size larger than a single page
 * to get ideal performance, thus we need to do the memory copy rather than
 * use the page cache directly as input buffer.
 */
static int copy_data_into_buffer(struct address_space *mapping,
				 struct workspace *workspace, u64 filepos,
				 unsigned long length)
{
	u64 cur = filepos;

	/* It's only for hardware accelerated zlib code. */
	ASSERT(zlib_deflate_dfltcc_enabled());

	while (cur < filepos + length) {
		struct folio *folio;
		void *data_in;
		unsigned int offset;
		unsigned long copy_length;
		int ret;

		ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio);
		if (ret < 0)
			return ret;
		/* No large folio support yet. */
		ASSERT(!folio_test_large(folio));

		offset = offset_in_folio(folio, cur);
		copy_length = min(folio_size(folio) - offset,
				  filepos + length - cur);

		data_in = kmap_local_folio(folio, offset);
		memcpy(workspace->buf + cur - filepos, data_in, copy_length);
		kunmap_local(data_in);
		cur += copy_length;
	}
	return 0;
}

int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
			 u64 start, struct folio **folios, unsigned long *out_folios,
			 unsigned long *total_in, unsigned long *total_out)
@@ -105,8 +146,6 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
	int nr_folios = 0;
	struct folio *in_folio = NULL;
	struct folio *out_folio = NULL;
	unsigned long bytes_left;
	unsigned int in_buf_folios;
	unsigned long len = *total_out;
	unsigned long nr_dest_folios = *out_folios;
	const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
@@ -150,34 +189,21 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
		 * the workspace buffer if required.
		 */
		if (workspace->strm.avail_in == 0) {
			bytes_left = len - workspace->strm.total_in;
			in_buf_folios = min(DIV_ROUND_UP(bytes_left, PAGE_SIZE),
					    workspace->buf_size / PAGE_SIZE);
			if (in_buf_folios > 1) {
				int i;

				/* S390 hardware acceleration path, not subpage. */
				ASSERT(!btrfs_is_subpage(
						inode_to_fs_info(mapping->host),
						mapping));
				for (i = 0; i < in_buf_folios; i++) {
					if (data_in) {
						kunmap_local(data_in);
						folio_put(in_folio);
						data_in = NULL;
					}
					ret = btrfs_compress_filemap_get_folio(mapping,
							start, &in_folio);
			unsigned long bytes_left = len - workspace->strm.total_in;
			unsigned int copy_length = min(bytes_left, workspace->buf_size);

			/*
			 * This can only happen when hardware zlib compression is
			 * enabled.
			 */
			if (copy_length > PAGE_SIZE) {
				ret = copy_data_into_buffer(mapping, workspace,
							    start, copy_length);
				if (ret < 0)
					goto out;
					data_in = kmap_local_folio(in_folio, 0);
					copy_page(workspace->buf + i * PAGE_SIZE,
						  data_in);
					start += PAGE_SIZE;
				}
				start += copy_length;
				workspace->strm.next_in = workspace->buf;
				workspace->strm.avail_in = min(bytes_left,
							       in_buf_folios << PAGE_SHIFT);
				workspace->strm.avail_in = copy_length;
			} else {
				unsigned int pg_off;
				unsigned int cur_len;