Skip to content

Commit 2c6b640

Browse files
ohartoovjgunthorpe
authored andcommitted
RDMA/mlx5: Fix unsafe xarray access in implicit ODP handling
__xa_store() and __xa_erase() were used without holding the proper lock, which led to a lockdep warning due to unsafe RCU usage. This patch replaces them with xa_store() and xa_erase(), which perform the necessary locking internally. ============================= WARNING: suspicious RCPU usage 6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1 Not tainted ----------------------------- ./include/linux/xarray.h:1211 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by kworker/u136:0/219: at: process_one_work+0xbe4/0x15f0 process_one_work+0x75c/0x15f0 pagefault_mr+0x9a5/0x1390 [mlx5_ib] stack backtrace: CPU: 14 UID: 0 PID: 219 Comm: kworker/u136:0 Not tainted 6.14.0-rc7_for_upstream_debug_2025_03_18_15_01 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5_ib_page_fault mlx5_ib_eqe_pf_action [mlx5_ib] Call Trace: dump_stack_lvl+0xa8/0xc0 lockdep_rcu_suspicious+0x1e6/0x260 xas_create+0xb8a/0xee0 xas_store+0x73/0x14c0 __xa_store+0x13c/0x220 ? xa_store_range+0x390/0x390 ? spin_bug+0x1d0/0x1d0 pagefault_mr+0xcb5/0x1390 [mlx5_ib] ? _raw_spin_unlock+0x1f/0x30 mlx5_ib_eqe_pf_action+0x3be/0x2620 [mlx5_ib] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5_ib_invalidate_range+0xcb0/0xcb0 [mlx5_ib] process_one_work+0x7db/0x15f0 ? pwq_dec_nr_in_flight+0xda0/0xda0 ? assign_work+0x168/0x240 worker_thread+0x57d/0xcd0 ? rescuer_thread+0xc40/0xc40 kthread+0x3b3/0x800 ? kthread_is_per_cpu+0xb0/0xb0 ? lock_downgrade+0x680/0x680 ? do_raw_spin_lock+0x12d/0x270 ? spin_bug+0x1d0/0x1d0 ? finish_task_switch.isra.0+0x284/0x9e0 ? lockdep_hardirqs_on_prepare+0x284/0x400 ? kthread_is_per_cpu+0xb0/0xb0 ret_from_fork+0x2d/0x70 ? kthread_is_per_cpu+0xb0/0xb0 ret_from_fork_asm+0x11/0x20 Fixes: d3d9304 ("RDMA/mlx5: Fix implicit ODP use after free") Link: https://patch.msgid.link/r/a85ddd16f45c8cb2bc0a188c2b0fcedfce975eb8.1750061791.git.leon@kernel.org Signed-off-by: Or Har-Toov <ohartoov@nvidia.com> Reviewed-by: Patrisious Haddad <phaddad@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent b26852d commit 2c6b640

File tree

1 file changed

+4
-4
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+4
-4
lines changed

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
259259
}
260260

261261
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
262-
__xa_erase(&mr_to_mdev(mr)->odp_mkeys,
263-
mlx5_base_mkey(mr->mmkey.key));
262+
xa_erase(&mr_to_mdev(mr)->odp_mkeys,
263+
mlx5_base_mkey(mr->mmkey.key));
264264
xa_unlock(&imr->implicit_children);
265265

266266
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
532532
}
533533

534534
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
535-
ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
536-
&mr->mmkey, GFP_KERNEL);
535+
ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
536+
&mr->mmkey, GFP_KERNEL);
537537
if (xa_is_err(ret)) {
538538
ret = ERR_PTR(xa_err(ret));
539539
__xa_erase(&imr->implicit_children, idx);

0 commit comments

Comments
 (0)