Commit dcffc3b1 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Split up bch_dev.io_ref



We now have separate per device io_refs for read and write access.

This fixes a device removal bug where the discard workers were still
running while we're removing alloc info for that device.

It's also a bit of hardening; we no longer allow writes to devices that
are read-only.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f1350c2c
Loading
Loading
Loading
Loading
+7 −7
Original line number Diff line number Diff line
@@ -1950,7 +1950,7 @@ static void bch2_do_discards_work(struct work_struct *work)
	trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
			      bch2_err_str(ret));

	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}

@@ -1967,7 +1967,7 @@ void bch2_dev_do_discards(struct bch_dev *ca)
	if (queue_work(c->write_ref_wq, &ca->discard_work))
		return;

	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
put_write_ref:
	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}
@@ -2045,7 +2045,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
	trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));

	bch2_trans_put(trans);
	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
	bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}

@@ -2065,7 +2065,7 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
	if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
		return;

	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
put_ref:
	bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
@@ -2256,7 +2256,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
	bch2_trans_iter_exit(trans, &iter);
err:
	bch2_trans_put(trans);
	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
	bch2_bkey_buf_exit(&last_flushed, c);
	bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@@ -2274,7 +2274,7 @@ void bch2_dev_do_invalidates(struct bch_dev *ca)
	if (queue_work(c->write_ref_wq, &ca->invalidate_work))
		return;

	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[WRITE]);
put_ref:
	bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
@@ -2506,7 +2506,7 @@ void bch2_recalc_capacity(struct bch_fs *c)

	bch2_set_ra_pages(c, ra_pages);

	for_each_rw_member(c, ca) {
	__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
		u64 dev_reserve = 0;

		/*
+1 −1
Original line number Diff line number Diff line
@@ -462,7 +462,7 @@ static int check_extent_checksum(struct btree_trans *trans,
	if (bio)
		bio_put(bio);
	kvfree(data_buf);
	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[READ]);
	printbuf_exit(&buf);
	return ret;
}
+2 −2
Original line number Diff line number Diff line
@@ -524,8 +524,8 @@ struct bch_dev {
	struct percpu_ref	ref;
#endif
	struct completion	ref_completion;
	struct percpu_ref	io_ref;
	struct completion	io_ref_completion;
	struct percpu_ref	io_ref[2];
	struct completion	io_ref_completion[2];

	struct bch_fs		*fs;

+9 −5
Original line number Diff line number Diff line
@@ -1353,7 +1353,7 @@ static void btree_node_read_work(struct work_struct *work)
					"btree read error %s for %s",
					bch2_blk_status_to_str(bio->bi_status), buf.buf);
		if (rb->have_ioref)
			percpu_ref_put(&ca->io_ref);
			percpu_ref_put(&ca->io_ref[READ]);
		rb->have_ioref = false;

		bch2_mark_io_failure(&failed, &rb->pick, false);
@@ -1609,7 +1609,7 @@ static void btree_node_read_all_replicas_endio(struct bio *bio)
		struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev);

		bch2_latency_acct(ca, rb->start_time, READ);
		percpu_ref_put(&ca->io_ref);
		percpu_ref_put(&ca->io_ref[READ]);
	}

	ra->err[rb->idx] = bio->bi_status;
@@ -1928,7 +1928,7 @@ static void btree_node_scrub_work(struct work_struct *work)
	printbuf_exit(&err);
	bch2_bkey_buf_exit(&scrub->key, c);;
	btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
	percpu_ref_put(&scrub->ca->io_ref);
	percpu_ref_put(&scrub->ca->io_ref[READ]);
	kfree(scrub);
	bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
}
@@ -1997,7 +1997,7 @@ int bch2_btree_node_scrub(struct btree_trans *trans,
	return 0;
err_free:
	btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[READ]);
err:
	bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
	return ret;
@@ -2159,8 +2159,12 @@ static void btree_node_write_endio(struct bio *bio)
		spin_unlock_irqrestore(&c->btree_write_error_lock, flags);
	}

	/*
	 * XXX: we should be using io_ref[WRITE], but we aren't retrying failed
	 * btree writes yet (due to device removal/ro):
	 */
	if (wbio->have_ioref)
		percpu_ref_put(&ca->io_ref);
		percpu_ref_put(&ca->io_ref[READ]);

	if (parent) {
		bio_put(bio);
+4 −4
Original line number Diff line number Diff line
@@ -271,7 +271,7 @@ static int read_btree_nodes_worker(void *p)
err:
	bio_put(bio);
	free_page((unsigned long) buf);
	percpu_ref_put(&ca->io_ref);
	percpu_ref_put(&ca->io_ref[READ]);
	closure_put(w->cl);
	kfree(w);
	return 0;
@@ -291,7 +291,7 @@ static int read_btree_nodes(struct find_btree_nodes *f)

		struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
		if (!w) {
			percpu_ref_put(&ca->io_ref);
			percpu_ref_put(&ca->io_ref[READ]);
			ret = -ENOMEM;
			goto err;
		}
@@ -303,14 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f)
		struct task_struct *t = kthread_create(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
		ret = PTR_ERR_OR_ZERO(t);
		if (ret) {
			percpu_ref_put(&ca->io_ref);
			percpu_ref_put(&ca->io_ref[READ]);
			kfree(w);
			bch_err_msg(c, ret, "starting kthread");
			break;
		}

		closure_get(&cl);
		percpu_ref_get(&ca->io_ref);
		percpu_ref_get(&ca->io_ref[READ]);
		wake_up_process(t);
	}
err:
Loading