Commit 19235161 authored by Gao Xiang's avatar Gao Xiang
Browse files

erofs: support I/O submission for sub-page compressed blocks



Add a basic I/O submission path first to support sub-page blocks:

 - Temporary short-lived pages will be used entirely;

 - In-place I/O pages can be used partially, but compressed pages need
   to be able to be mapped in contiguous virtual memory.

As a start, currently cache decompression is explicitly disabled for
sub-page blocks, which will be supported in the future.

Reviewed-by: default avatarYue Hu <huyue2@coolpad.com>
Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarGao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20231206091057.87027-2-hsiangkao@linux.alibaba.com
parent 3c12466b
Loading
Loading
Loading
Loading
+74 −82
Original line number Diff line number Diff line
@@ -1435,86 +1435,85 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
	z_erofs_decompressqueue_work(&io->u.work);
}

static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
				 struct z_erofs_decompress_frontend *f,
				 struct z_erofs_pcluster *pcl,
				 unsigned int nr,
					       struct page **pagepool,
				 struct address_space *mc)
{
	const pgoff_t index = pcl->obj.index;
	gfp_t gfp = mapping_gfp_mask(mc);
	bool tocache = false;

	struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
	struct address_space *mapping;
	struct page *oldpage, *page;
	int justfound;
	struct page *page, *oldpage;
	int justfound, bs = i_blocksize(f->inode);

	/* Except for inplace pages, the entire page can be used for I/Os */
	bvec->bv_offset = 0;
	bvec->bv_len = PAGE_SIZE;
repeat:
	page = READ_ONCE(pcl->compressed_bvecs[nr].page);
	oldpage = page;

	if (!page)
	oldpage = READ_ONCE(zbv->page);
	if (!oldpage)
		goto out_allocpage;

	justfound = (unsigned long)page & 1UL;
	page = (struct page *)((unsigned long)page & ~1UL);
	justfound = (unsigned long)oldpage & 1UL;
	page = (struct page *)((unsigned long)oldpage & ~1UL);
	bvec->bv_page = page;

	DBG_BUGON(z_erofs_is_shortlived_page(page));
	/*
	 * preallocated cached pages, which is used to avoid direct reclaim
	 * otherwise, it will go inplace I/O path instead.
	 * Handle preallocated cached pages.  We tried to allocate such pages
	 * without triggering direct reclaim.  If allocation failed, inplace
	 * file-backed pages will be used instead.
	 */
	if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
		WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
		set_page_private(page, 0);
		WRITE_ONCE(zbv->page, page);
		tocache = true;
		goto out_tocache;
	}
	mapping = READ_ONCE(page->mapping);

	mapping = READ_ONCE(page->mapping);
	/*
	 * file-backed online pages in plcuster are all locked steady,
	 * therefore it is impossible for `mapping' to be NULL.
	 * File-backed pages for inplace I/Os are all locked steady,
	 * therefore it is impossible for `mapping` to be NULL.
	 */
	if (mapping && mapping != mc)
		/* ought to be unmanaged pages */
		goto out;

	/* directly return for shortlived page as well */
	if (z_erofs_is_shortlived_page(page))
		goto out;
	if (mapping && mapping != mc) {
		if (zbv->offset < 0)
			bvec->bv_offset = round_up(-zbv->offset, bs);
		bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
		return;
	}

	lock_page(page);

	/* only true if page reclaim goes wrong, should never happen */
	DBG_BUGON(justfound && PagePrivate(page));

	/* the page is still in manage cache */
	/* the cached page is still in managed cache */
	if (page->mapping == mc) {
		WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);

		if (!PagePrivate(page)) {
		WRITE_ONCE(zbv->page, page);
		/*
			 * impossible to be !PagePrivate(page) for
			 * the current restriction as well if
			 * the page is already in compressed_bvecs[].
		 * The cached page is still available but without a valid
		 * `->private` pcluster hint.  Let's reconnect them.
		 */
		if (!PagePrivate(page)) {
			DBG_BUGON(!justfound);

			justfound = 0;
			set_page_private(page, (unsigned long)pcl);
			SetPagePrivate(page);
			/* compressed_bvecs[] already takes a ref */
			attach_page_private(page, pcl);
			put_page(page);
		}

		/* no need to submit io if it is already up-to-date */
		/* no need to submit if it is already up-to-date */
		if (PageUptodate(page)) {
			unlock_page(page);
			page = NULL;
			bvec->bv_page = NULL;
		}
		goto out;
		return;
	}

	/*
	 * the managed page has been truncated, it's unsafe to
	 * reuse this one, let's allocate a new cache-managed page.
	 * It has been truncated, so it's unsafe to reuse this one. Let's
	 * allocate a new page for compressed data.
	 */
	DBG_BUGON(page->mapping);
	DBG_BUGON(!justfound);
@@ -1523,25 +1522,23 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
	unlock_page(page);
	put_page(page);
out_allocpage:
	page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
	if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
			       oldpage, page)) {
		erofs_pagepool_add(pagepool, page);
	page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
	if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
		erofs_pagepool_add(&f->pagepool, page);
		cond_resched();
		goto repeat;
	}
	bvec->bv_page = page;
out_tocache:
	if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
		/* turn into temporary page if fails (1 ref) */
	if (!tocache || bs != PAGE_SIZE ||
	    add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) {
		/* turn into a temporary shortlived page (1 ref) */
		set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
		goto out;
		return;
	}
	attach_page_private(page, pcl);
	/* drop a refcount added by allocpage (then we have 2 refs here) */
	/* drop a refcount added by allocpage (then 2 refs in total here) */
	put_page(page);

out:	/* the only exit (for tracing and debugging) */
	return page;
}

static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
@@ -1596,7 +1593,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
	qtail[JQ_BYPASS] = &pcl->next;
}

static void z_erofs_decompressqueue_endio(struct bio *bio)
static void z_erofs_submissionqueue_endio(struct bio *bio)
{
	struct z_erofs_decompressqueue *q = bio->bi_private;
	blk_status_t err = bio->bi_status;
@@ -1608,7 +1605,6 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)

		DBG_BUGON(PageUptodate(page));
		DBG_BUGON(z_erofs_page_is_invalidated(page));

		if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
			if (!err)
				SetPageUptodate(page);
@@ -1631,17 +1627,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
	struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
	z_erofs_next_pcluster_t owned_head = f->owned_head;
	/* bio is NULL initially, so no need to initialize last_{index,bdev} */
	pgoff_t last_index;
	erofs_off_t last_pa;
	struct block_device *last_bdev;
	unsigned int nr_bios = 0;
	struct bio *bio = NULL;
	unsigned long pflags;
	int memstall = 0;

	/*
	 * if managed cache is enabled, bypass jobqueue is needed,
	 * no need to read from device for all pclusters in this queue.
	 */
	/* No need to read from device for pclusters in the bypass queue. */
	q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
	q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);

@@ -1654,7 +1647,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
	do {
		struct erofs_map_dev mdev;
		struct z_erofs_pcluster *pcl;
		pgoff_t cur, end;
		erofs_off_t cur, end;
		struct bio_vec bvec;
		unsigned int i = 0;
		bool bypass = true;

@@ -1673,18 +1667,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
		};
		(void)erofs_map_dev(sb, &mdev);

		cur = erofs_blknr(sb, mdev.m_pa);
		end = cur + pcl->pclusterpages;

		cur = mdev.m_pa;
		end = cur + (pcl->pclusterpages << PAGE_SHIFT);
		do {
			struct page *page;

			page = pickup_page_for_submission(pcl, i++,
					&f->pagepool, mc);
			if (!page)
			z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
			if (!bvec.bv_page)
				continue;

			if (bio && (cur != last_index + 1 ||
			if (bio && (cur != last_pa ||
				    last_bdev != mdev.m_bdev)) {
submit_bio_retry:
				submit_bio(bio);
@@ -1695,7 +1685,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
				bio = NULL;
			}

			if (unlikely(PageWorkingset(page)) && !memstall) {
			if (unlikely(PageWorkingset(bvec.bv_page)) &&
			    !memstall) {
				psi_memstall_enter(&pflags);
				memstall = 1;
			}
@@ -1703,23 +1694,24 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
			if (!bio) {
				bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
						REQ_OP_READ, GFP_NOIO);
				bio->bi_end_io = z_erofs_decompressqueue_endio;

				last_bdev = mdev.m_bdev;
				bio->bi_iter.bi_sector = (sector_t)cur <<
					(sb->s_blocksize_bits - 9);
				bio->bi_end_io = z_erofs_submissionqueue_endio;
				bio->bi_iter.bi_sector = cur >> 9;
				bio->bi_private = q[JQ_SUBMIT];
				if (readahead)
					bio->bi_opf |= REQ_RAHEAD;
				++nr_bios;
				last_bdev = mdev.m_bdev;
			}

			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
			if (cur + bvec.bv_len > end)
				bvec.bv_len = end - cur;
			if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
					  bvec.bv_offset))
				goto submit_bio_retry;

			last_index = cur;
			last_pa = cur + bvec.bv_len;
			bypass = false;
		} while (++cur < end);
		} while ((cur += bvec.bv_len) < end);

		if (!bypass)
			qtail[JQ_SUBMIT] = &pcl->next;