syzbot


possible deadlock in rcu_gp_fqs_loop

Status: upstream: reported on 2025/01/15 08:59
Reported-by: syzbot+d43d1a6cf78fd3fa7596@syzkaller.appspotmail.com
First crash: 3h55m, last: 3h55m

Sample crash report:
======================================================
WARNING: possible circular locking dependency detected
5.15.176-syzkaller #0 Not tainted
------------------------------------------------------
rcu_preempt/15 is trying to acquire lock:
ffff8880b8f3a318 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x26/0x140 kernel/sched/core.c:475

but task is already holding lock:
ffffffff8cb23c98 (rcu_node_0){-.-.}-{2:2}, at: force_qs_rnp kernel/rcu/tree.c:2646 [inline]
ffffffff8cb23c98 (rcu_node_0){-.-.}-{2:2}, at: rcu_gp_fqs_loop+0x734/0x1080 kernel/rcu/tree.c:1986

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (rcu_node_0){-.-.}-{2:2}:
       lock_acquire+0x1db/0x4f0 kernel/locking/lockdep.c:5623
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154
       check_cb_ovld kernel/rcu/tree.c:2974 [inline]
       __call_rcu kernel/rcu/tree.c:3021 [inline]
       call_rcu+0x350/0xa70 kernel/rcu/tree.c:3087
       queue_rcu_work+0x8b/0xa0 kernel/workqueue.c:1788
       kfree_rcu_monitor+0x2f3/0x6c0 kernel/rcu/tree.c:3414
       process_one_work+0x8a1/0x10c0 kernel/workqueue.c:2310
       worker_thread+0xaca/0x1280 kernel/workqueue.c:2457
       kthread+0x3f6/0x4f0 kernel/kthread.c:334
       ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:287

-> #2 (krc.lock){..-.}-{2:2}:
       lock_acquire+0x1db/0x4f0 kernel/locking/lockdep.c:5623
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154
       krc_this_cpu_lock kernel/rcu/tree.c:3199 [inline]
       add_ptr_to_bulk_krc_lock kernel/rcu/tree.c:3506 [inline]
       kvfree_call_rcu+0x1b5/0x8a0 kernel/rcu/tree.c:3597
       trie_delete_elem+0x520/0x690
       0xffffffffa004cf5e
       bpf_dispatcher_nop_func include/linux/bpf.h:790 [inline]
       __bpf_prog_run include/linux/filter.h:628 [inline]
       bpf_prog_run include/linux/filter.h:635 [inline]
       __bpf_trace_run kernel/trace/bpf_trace.c:1878 [inline]
       bpf_trace_run3+0x1d1/0x380 kernel/trace/bpf_trace.c:1916
       __bpf_trace_kmem_cache_free+0x99/0xc0 include/trace/events/kmem.h:138
       trace_kmem_cache_free include/trace/events/kmem.h:138 [inline]
       kmem_cache_free+0x1ce/0x1f0 mm/slub.c:3516
       __dentry_kill+0x4f4/0x650 fs/dcache.c:600
       dentry_kill+0xbb/0x290
       dput+0xd8/0x1a0 fs/dcache.c:893
       __fput+0x636/0x8e0 fs/file_table.c:288
       task_work_run+0x129/0x1a0 kernel/task_work.c:188
       exit_task_work include/linux/task_work.h:33 [inline]
       do_exit+0x6a3/0x2480 kernel/exit.c:874
       do_group_exit+0x144/0x310 kernel/exit.c:996
       get_signal+0xc66/0x14e0 kernel/signal.c:2900
       arch_do_signal_or_restart+0xc3/0x1890 arch/x86/kernel/signal.c:867
       handle_signal_work kernel/entry/common.c:154 [inline]
       exit_to_user_mode_loop+0x97/0x130 kernel/entry/common.c:178
       exit_to_user_mode_prepare+0xb1/0x140 kernel/entry/common.c:214
       __syscall_exit_to_user_mode_work kernel/entry/common.c:296 [inline]
       syscall_exit_to_user_mode+0x5d/0x240 kernel/entry/common.c:307
       do_syscall_64+0x47/0xb0 arch/x86/entry/common.c:86
       entry_SYSCALL_64_after_hwframe+0x66/0xd0

-> #1 (&trie->lock){..-.}-{2:2}:
       lock_acquire+0x1db/0x4f0 kernel/locking/lockdep.c:5623
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0xd1/0x120 kernel/locking/spinlock.c:162
       trie_delete_elem+0x90/0x690 kernel/bpf/lpm_trie.c:467
       0xffffffffa0048d9e
       bpf_dispatcher_nop_func include/linux/bpf.h:790 [inline]
       __bpf_prog_run include/linux/filter.h:628 [inline]
       bpf_prog_run include/linux/filter.h:635 [inline]
       __bpf_trace_run kernel/trace/bpf_trace.c:1878 [inline]
       bpf_trace_run3+0x1d1/0x380 kernel/trace/bpf_trace.c:1916
       __traceiter_sched_switch+0x7d/0xb0 include/trace/events/sched.h:220
       trace_sched_switch include/trace/events/sched.h:220 [inline]
       __schedule+0x1e8d/0x45b0 kernel/sched/core.c:6370
       schedule+0x11b/0x1f0 kernel/sched/core.c:6456
       freezable_schedule include/linux/freezer.h:172 [inline]
       futex_wait_queue_me+0x25b/0x480 kernel/futex/core.c:2863
       futex_wait+0x2f8/0x740 kernel/futex/core.c:2964
       do_futex+0x1414/0x1810 kernel/futex/core.c:3982
       __do_sys_futex kernel/futex/core.c:4059 [inline]
       __se_sys_futex+0x407/0x490 kernel/futex/core.c:4040
       do_syscall_x64 arch/x86/entry/common.c:50 [inline]
       do_syscall_64+0x3b/0xb0 arch/x86/entry/common.c:80
       entry_SYSCALL_64_after_hwframe+0x66/0xd0

-> #0 (&rq->__lock){-.-.}-{2:2}:
       check_prev_add kernel/locking/lockdep.c:3053 [inline]
       check_prevs_add kernel/locking/lockdep.c:3172 [inline]
       validate_chain+0x1649/0x5930 kernel/locking/lockdep.c:3788
       __lock_acquire+0x1295/0x1ff0 kernel/locking/lockdep.c:5012
       lock_acquire+0x1db/0x4f0 kernel/locking/lockdep.c:5623
       _raw_spin_lock_nested+0x2d/0x40 kernel/locking/spinlock.c:368
       raw_spin_rq_lock_nested+0x26/0x140 kernel/sched/core.c:475
       raw_spin_rq_lock kernel/sched/sched.h:1326 [inline]
       _raw_spin_rq_lock_irqsave kernel/sched/sched.h:1345 [inline]
       resched_cpu+0x122/0x2c0 kernel/sched/core.c:991
       rcu_implicit_dynticks_qs+0x437/0xc00 kernel/rcu/tree.c:1329
       force_qs_rnp kernel/rcu/tree.c:2664 [inline]
       rcu_gp_fqs_loop+0x914/0x1080 kernel/rcu/tree.c:1986
       rcu_gp_kthread+0xa4/0x360 kernel/rcu/tree.c:2145
       kthread+0x3f6/0x4f0 kernel/kthread.c:334
       ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:287

other info that might help us debug this:

Chain exists of:
  &rq->__lock --> krc.lock --> rcu_node_0

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(rcu_node_0);
                               lock(krc.lock);
                               lock(rcu_node_0);
  lock(&rq->__lock);

 *** DEADLOCK ***

1 lock held by rcu_preempt/15:
 #0: ffffffff8cb23c98 (rcu_node_0){-.-.}-{2:2}, at: force_qs_rnp kernel/rcu/tree.c:2646 [inline]
 #0: ffffffff8cb23c98 (rcu_node_0){-.-.}-{2:2}, at: rcu_gp_fqs_loop+0x734/0x1080 kernel/rcu/tree.c:1986

stack backtrace:
CPU: 0 PID: 15 Comm: rcu_preempt Not tainted 5.15.176-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x1e3/0x2d0 lib/dump_stack.c:106
 check_noncircular+0x2f8/0x3b0 kernel/locking/lockdep.c:2133
 check_prev_add kernel/locking/lockdep.c:3053 [inline]
 check_prevs_add kernel/locking/lockdep.c:3172 [inline]
 validate_chain+0x1649/0x5930 kernel/locking/lockdep.c:3788
 __lock_acquire+0x1295/0x1ff0 kernel/locking/lockdep.c:5012
 lock_acquire+0x1db/0x4f0 kernel/locking/lockdep.c:5623
 _raw_spin_lock_nested+0x2d/0x40 kernel/locking/spinlock.c:368
 raw_spin_rq_lock_nested+0x26/0x140 kernel/sched/core.c:475
 raw_spin_rq_lock kernel/sched/sched.h:1326 [inline]
 _raw_spin_rq_lock_irqsave kernel/sched/sched.h:1345 [inline]
 resched_cpu+0x122/0x2c0 kernel/sched/core.c:991
 rcu_implicit_dynticks_qs+0x437/0xc00 kernel/rcu/tree.c:1329
 force_qs_rnp kernel/rcu/tree.c:2664 [inline]
 rcu_gp_fqs_loop+0x914/0x1080 kernel/rcu/tree.c:1986
 rcu_gp_kthread+0xa4/0x360 kernel/rcu/tree.c:2145
 kthread+0x3f6/0x4f0 kernel/kthread.c:334
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:287
 </TASK>

Crashes (1):
Time Kernel Commit Syzkaller Config Log Report Syz repro C repro VM info Assets (help?) Manager Title
2025/01/15 08:58 linux-5.15.y 4735586da88e 7315a7cf .config console log report info [disk image] [vmlinux] [kernel image] ci2-linux-5-15-kasan possible deadlock in rcu_gp_fqs_loop
* Struck through repros no longer work on HEAD.