Files
linux-net/drivers/infiniband/core/iwcm.h
Jacob Moroni 7874eeacfa RDMA/iwcm: Fix workqueue list corruption by removing work_list
The commit e1168f0 ("RDMA/iwcm: Simplify cm_event_handler()")
changed the work submission logic to unconditionally call
queue_work() with the expectation that queue_work() would
have no effect if work was already pending. The problem is
that a free list of struct iwcm_work is used (for which
struct work_struct is embedded), so each call to queue_work()
is basically unique and therefore does indeed queue the work.

This causes a problem in the work handler which walks the work_list
until it's empty to process entries. This means that a single
run of the work handler could process item N+1 and release it
back to the free list while the actual workqueue entry is still
queued. It could then get reused (INIT_WORK...) and lead to
list corruption in the workqueue logic.

Fix this by just removing the work_list. The workqueue already
does this for us.

This fixes the following error that was observed when stress
testing with ucmatose on an Intel E830 in iWARP mode:

[  151.465780] list_del corruption. next->prev should be ffff9f0915c69c08, but was ffff9f0a1116be08. (next=ffff9f0a15b11c08)
[  151.466639] ------------[ cut here ]------------
[  151.466986] kernel BUG at lib/list_debug.c:67!
[  151.467349] Oops: invalid opcode: 0000 [#1] SMP NOPTI
[  151.467753] CPU: 14 UID: 0 PID: 2306 Comm: kworker/u64:18 Not tainted 6.19.0-rc4+ #1 PREEMPT(voluntary)
[  151.468466] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  151.469192] Workqueue:  0x0 (iw_cm_wq)
[  151.469478] RIP: 0010:__list_del_entry_valid_or_report+0xf0/0x100
[  151.469942] Code: c7 58 5f 4c b2 e8 10 50 aa ff 0f 0b 48 89 ef e8 36 57 cb ff 48 8b 55 08 48 89 e9 48 89 de 48 c7 c7 a8 5f 4c b2 e8 f0 4f aa ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90
[  151.471323] RSP: 0000:ffffb15644e7bd68 EFLAGS: 00010046
[  151.471712] RAX: 000000000000006d RBX: ffff9f0915c69c08 RCX: 0000000000000027
[  151.472243] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9f0a37d9c600
[  151.472768] RBP: ffff9f0a15b11c08 R08: 0000000000000000 R09: c0000000ffff7fff
[  151.473294] R10: 0000000000000001 R11: ffffb15644e7bba8 R12: ffff9f092339ee68
[  151.473817] R13: ffff9f0900059c28 R14: ffff9f092339ee78 R15: 0000000000000000
[  151.474344] FS:  0000000000000000(0000) GS:ffff9f0a847b5000(0000) knlGS:0000000000000000
[  151.474934] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  151.475362] CR2: 0000559e233a9088 CR3: 000000020296b004 CR4: 0000000000770ef0
[  151.475895] PKRU: 55555554
[  151.476118] Call Trace:
[  151.476331]  <TASK>
[  151.476497]  move_linked_works+0x49/0xa0
[  151.476792]  __pwq_activate_work.isra.46+0x2f/0xa0
[  151.477151]  pwq_dec_nr_in_flight+0x1e0/0x2f0
[  151.477479]  process_scheduled_works+0x1c8/0x410
[  151.477823]  worker_thread+0x125/0x260
[  151.478108]  ? __pfx_worker_thread+0x10/0x10
[  151.478430]  kthread+0xfe/0x240
[  151.478671]  ? __pfx_kthread+0x10/0x10
[  151.478955]  ? __pfx_kthread+0x10/0x10
[  151.479240]  ret_from_fork+0x208/0x270
[  151.479523]  ? __pfx_kthread+0x10/0x10
[  151.479806]  ret_from_fork_asm+0x1a/0x30
[  151.480103]  </TASK>

Fixes: e1168f09b3 ("RDMA/iwcm: Simplify cm_event_handler()")
Signed-off-by: Jacob Moroni <jmoroni@google.com>
Link: https://patch.msgid.link/20260112020006.1352438-1-jmoroni@google.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2026-01-15 04:59:53 -05:00

62 lines
2.3 KiB
C

/*
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IWCM_H
#define IWCM_H
enum iw_cm_state {
IW_CM_STATE_IDLE, /* unbound, inactive */
IW_CM_STATE_LISTEN, /* listen waiting for connect */
IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */
IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */
IW_CM_STATE_ESTABLISHED, /* established */
IW_CM_STATE_CLOSING, /* disconnect */
IW_CM_STATE_DESTROYING /* object being deleted */
};
struct iwcm_id_private {
struct iw_cm_id id;
enum iw_cm_state state;
unsigned long flags;
struct ib_qp *qp;
struct completion destroy_comp;
wait_queue_head_t connect_wait;
spinlock_t lock;
refcount_t refcount;
struct list_head work_free_list;
};
#define IWCM_F_DROP_EVENTS 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */