Commit f6db1b8e authored by David Teigland's avatar David Teigland Committed by Steven Whitehouse
Browse files

[DLM] abort recovery more quickly



When we abort one recovery to do another, break out of the ping_members()
routine more quickly, and wake up the dlm_recoverd thread more quickly
instead of waiting for it to time out.

Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 5ff51911
Loading
Loading
Loading
Loading
+19 −5
Original line number Diff line number Diff line
@@ -162,11 +162,22 @@ static void make_member_array(struct dlm_ls *ls)

/* send a status request to all members just to establish comms connections */

static void ping_members(struct dlm_ls *ls)
static int ping_members(struct dlm_ls *ls)
{
	struct dlm_member *memb;
	list_for_each_entry(memb, &ls->ls_nodes, list)
		dlm_rcom_status(ls, memb->nodeid);
	int error = 0;

	list_for_each_entry(memb, &ls->ls_nodes, list) {
		error = dlm_recovery_stopped(ls);
		if (error)
			break;
		error = dlm_rcom_status(ls, memb->nodeid);
		if (error)
			break;
	}
	if (error)
		log_debug(ls, "ping_members aborted %d", error);
	return error;
}

int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
@@ -212,10 +223,13 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
	dlm_set_recover_status(ls, DLM_RS_NODES);
	*neg_out = neg;

	ping_members(ls);
	error = ping_members(ls);
	if (error)
		goto out;

	error = dlm_recover_members_wait(ls);
	log_debug(ls, "total members %d", ls->ls_num_nodes);
 out:
	log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
	return error;
}

+1 −0
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ void dlm_recoverd_stop(struct dlm_ls *ls)

void dlm_recoverd_suspend(struct dlm_ls *ls)
{
	wake_up(&ls->ls_wait_general);
	mutex_lock(&ls->ls_recoverd_active);
}