Commit 076381c2 authored by Ilya Dryomov's avatar Ilya Dryomov
Browse files

libceph: fix potential use-after-free in have_mon_and_osd_map()



The wait loop in __ceph_open_session() can race with the client
receiving a new monmap or osdmap shortly after the initial map is
received.  Both ceph_monc_handle_map() and handle_one_map() install
a new map immediately after freeing the old one

    kfree(monc->monmap);
    monc->monmap = monmap;

    ceph_osdmap_destroy(osdc->osdmap);
    osdc->osdmap = newmap;

under client->monc.mutex and client->osdc.lock respectively, but
because neither is taken in have_mon_and_osd_map() it's possible for
client->monc.monmap->epoch and client->osdc.osdmap->epoch arms in

    client->monc.monmap && client->monc.monmap->epoch &&
        client->osdc.osdmap && client->osdc.osdmap->epoch;

condition to dereference an already freed map.  This happens to be
reproducible with generic/395 and generic/397 with KASAN enabled:

    BUG: KASAN: slab-use-after-free in have_mon_and_osd_map+0x56/0x70
    Read of size 4 at addr ffff88811012d810 by task mount.ceph/13305
    CPU: 2 UID: 0 PID: 13305 Comm: mount.ceph Not tainted 6.14.0-rc2-build2+ #1266
    ...
    Call Trace:
    <TASK>
    have_mon_and_osd_map+0x56/0x70
    ceph_open_session+0x182/0x290
    ceph_get_tree+0x333/0x680
    vfs_get_tree+0x49/0x180
    do_new_mount+0x1a3/0x2d0
    path_mount+0x6dd/0x730
    do_mount+0x99/0xe0
    __do_sys_mount+0x141/0x180
    do_syscall_64+0x9f/0x100
    entry_SYSCALL_64_after_hwframe+0x76/0x7e
    </TASK>

    Allocated by task 13305:
    ceph_osdmap_alloc+0x16/0x130
    ceph_osdc_init+0x27a/0x4c0
    ceph_create_client+0x153/0x190
    create_fs_client+0x50/0x2a0
    ceph_get_tree+0xff/0x680
    vfs_get_tree+0x49/0x180
    do_new_mount+0x1a3/0x2d0
    path_mount+0x6dd/0x730
    do_mount+0x99/0xe0
    __do_sys_mount+0x141/0x180
    do_syscall_64+0x9f/0x100
    entry_SYSCALL_64_after_hwframe+0x76/0x7e

    Freed by task 9475:
    kfree+0x212/0x290
    handle_one_map+0x23c/0x3b0
    ceph_osdc_handle_map+0x3c9/0x590
    mon_dispatch+0x655/0x6f0
    ceph_con_process_message+0xc3/0xe0
    ceph_con_v1_try_read+0x614/0x760
    ceph_con_workfn+0x2de/0x650
    process_one_work+0x486/0x7c0
    process_scheduled_works+0x73/0x90
    worker_thread+0x1c8/0x2a0
    kthread+0x2ec/0x300
    ret_from_fork+0x24/0x40
    ret_from_fork_asm+0x1a/0x30

Rewrite the wait loop to check the above condition directly with
client->monc.mutex and client->osdc.lock taken as appropriate.  While
at it, improve the timeout handling (previously mount_timeout could be
exceeded in case wait_event_interruptible_timeout() slept more than
once) and access client->auth_err under client->monc.mutex to match
how it's set in finish_auth().

monmap_show() and osdmap_show() now take the respective lock before
accessing the map as well.

Cc: stable@vger.kernel.org
Reported-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Reviewed-by: default avatarViacheslav Dubeyko <Slava.Dubeyko@ibm.com>
parent ac3fd01e
Loading
Loading
Loading
Loading
+32 −21
Original line number Diff line number Diff line
@@ -785,42 +785,53 @@ void ceph_reset_client_addr(struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_reset_client_addr);

/*
 * true if we have the mon map (and have thus joined the cluster)
 */
static bool have_mon_and_osd_map(struct ceph_client *client)
{
	return client->monc.monmap && client->monc.monmap->epoch &&
	       client->osdc.osdmap && client->osdc.osdmap->epoch;
}

/*
 * mount: join the ceph cluster, and open root directory.
 */
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
	unsigned long timeout = client->options->mount_timeout;
	long err;
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
	long timeout = ceph_timeout_jiffies(client->options->mount_timeout);
	bool have_monmap, have_osdmap;
	int err;

	/* open session, and wait for mon and osd maps */
	err = ceph_monc_open_session(&client->monc);
	if (err < 0)
		return err;

	while (!have_mon_and_osd_map(client)) {
		if (timeout && time_after_eq(jiffies, started + timeout))
			return -ETIMEDOUT;
	add_wait_queue(&client->auth_wq, &wait);
	for (;;) {
		mutex_lock(&client->monc.mutex);
		err = client->auth_err;
		have_monmap = client->monc.monmap && client->monc.monmap->epoch;
		mutex_unlock(&client->monc.mutex);

		down_read(&client->osdc.lock);
		have_osdmap = client->osdc.osdmap && client->osdc.osdmap->epoch;
		up_read(&client->osdc.lock);

		if (err || (have_monmap && have_osdmap))
			break;

		if (signal_pending(current)) {
			err = -ERESTARTSYS;
			break;
		}

		if (!timeout) {
			err = -ETIMEDOUT;
			break;
		}

		/* wait */
		dout("mount waiting for mon_map\n");
		err = wait_event_interruptible_timeout(client->auth_wq,
			have_mon_and_osd_map(client) || (client->auth_err < 0),
			ceph_timeout_jiffies(timeout));
		if (err < 0)
			return err;
		if (client->auth_err < 0)
			return client->auth_err;
		timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
	}
	remove_wait_queue(&client->auth_wq, &wait);

	if (err)
		return err;

	pr_info("client%llu fsid %pU\n", ceph_client_gid(client),
		&client->fsid);
+10 −4
Original line number Diff line number Diff line
@@ -36,8 +36,9 @@ static int monmap_show(struct seq_file *s, void *p)
	int i;
	struct ceph_client *client = s->private;

	mutex_lock(&client->monc.mutex);
	if (client->monc.monmap == NULL)
		return 0;
		goto out_unlock;

	seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
	for (i = 0; i < client->monc.monmap->num_mon; i++) {
@@ -48,6 +49,9 @@ static int monmap_show(struct seq_file *s, void *p)
			   ENTITY_NAME(inst->name),
			   ceph_pr_addr(&inst->addr));
	}

out_unlock:
	mutex_unlock(&client->monc.mutex);
	return 0;
}

@@ -56,13 +60,14 @@ static int osdmap_show(struct seq_file *s, void *p)
	int i;
	struct ceph_client *client = s->private;
	struct ceph_osd_client *osdc = &client->osdc;
	struct ceph_osdmap *map = osdc->osdmap;
	struct ceph_osdmap *map;
	struct rb_node *n;

	down_read(&osdc->lock);
	map = osdc->osdmap;
	if (map == NULL)
		return 0;
		goto out_unlock;

	down_read(&osdc->lock);
	seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch,
			osdc->epoch_barrier, map->flags);

@@ -131,6 +136,7 @@ static int osdmap_show(struct seq_file *s, void *p)
		seq_printf(s, "]\n");
	}

out_unlock:
	up_read(&osdc->lock);
	return 0;
}