syzbot


possible deadlock in __change_pid

Status: upstream: reported on 2024/04/28 03:03
Subsystems: kernel
[Documentation on labels]
Reported-by: syzbot+e7d7533fd9113a64a564@syzkaller.appspotmail.com
First crash: 17d, last: 12d
Discussions (1)
Title Replies (including bot) Last reply
[syzbot] [kernel?] possible deadlock in __change_pid 0 (1) 2024/04/28 03:03

Sample crash report:
======================================================
WARNING: possible circular locking dependency detected
6.9.0-rc6-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor.0/6788 is trying to acquire lock:
ffff8880b943d998 (&pool->lock){-.-.}-{2:2}, at: __queue_work+0x23a/0x1020 kernel/workqueue.c:2346

but task is already holding lock:
ffff8880604b9d18 (&pid->wait_pidfd){....}-{2:2}, at: __wake_up_common_lock kernel/sched/wait.c:105 [inline]
ffff8880604b9d18 (&pid->wait_pidfd){....}-{2:2}, at: __wake_up+0x1c/0x60 kernel/sched/wait.c:127

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #4 (&pid->wait_pidfd){....}-{2:2}:
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0x3a/0x60 kernel/locking/spinlock.c:162
       __wake_up_common_lock kernel/sched/wait.c:105 [inline]
       __wake_up+0x1c/0x60 kernel/sched/wait.c:127
       __change_pid+0x199/0x5a0 kernel/pid.c:360
       __unhash_process kernel/exit.c:130 [inline]
       __exit_signal kernel/exit.c:202 [inline]
       release_task+0xb96/0x1b10 kernel/exit.c:259
       exit_notify kernel/exit.c:774 [inline]
       do_exit+0x1680/0x2c10 kernel/exit.c:898
       call_usermodehelper_exec_async+0x379/0x4c0 kernel/umh.c:123
       ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
       ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244

-> #3 (&____s->seqcount#5){....}-{0:0}:
       seqcount_lockdep_reader_access include/linux/seqlock.h:72 [inline]
       read_seqbegin include/linux/seqlock.h:772 [inline]
       read_seqbegin_or_lock_irqsave include/linux/seqlock.h:1133 [inline]
       thread_group_cputime+0x164/0x820 kernel/sched/cputime.c:336
       thread_group_start_cputime kernel/time/posix-cpu-timers.c:311 [inline]
       cpu_clock_sample_group+0x44b/0x800 kernel/time/posix-cpu-timers.c:348
       posix_cpu_timer_set+0x39b/0x1350 kernel/time/posix-cpu-timers.c:689
       do_timer_settime+0x1e8/0x2f0 kernel/time/posix-timers.c:925
       __do_sys_timer_settime kernel/time/posix-timers.c:954 [inline]
       __se_sys_timer_settime kernel/time/posix-timers.c:940 [inline]
       __x64_sys_timer_settime+0x26a/0x2c0 kernel/time/posix-timers.c:940
       do_syscall_x64 arch/x86/entry/common.c:52 [inline]
       do_syscall_64+0xcf/0x260 arch/x86/entry/common.c:83
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

-> #2 (&sighand->siglock){-.-.}-{2:2}:
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0x3a/0x60 kernel/locking/spinlock.c:162
       __lock_task_sighand+0xc2/0x340 kernel/signal.c:1414
       lock_task_sighand include/linux/sched/signal.h:746 [inline]
       do_send_sig_info kernel/signal.c:1300 [inline]
       group_send_sig_info+0x290/0x300 kernel/signal.c:1453
       bpf_send_signal_common+0x2e8/0x3a0 kernel/trace/bpf_trace.c:881
       ____bpf_send_signal_thread kernel/trace/bpf_trace.c:898 [inline]
       bpf_send_signal_thread+0x16/0x20 kernel/trace/bpf_trace.c:896
       ___bpf_prog_run+0x3e51/0xabd0 kernel/bpf/core.c:1997
       __bpf_prog_run32+0xc1/0x100 kernel/bpf/core.c:2236
       bpf_dispatcher_nop_func include/linux/bpf.h:1234 [inline]
       __bpf_prog_run include/linux/filter.h:657 [inline]
       bpf_prog_run include/linux/filter.h:664 [inline]
       __bpf_trace_run kernel/trace/bpf_trace.c:2381 [inline]
       bpf_trace_run4+0x176/0x460 kernel/trace/bpf_trace.c:2422
       __bpf_trace_mmap_lock_acquire_returned+0x134/0x180 include/trace/events/mmap_lock.h:52
       trace_mmap_lock_acquire_returned include/trace/events/mmap_lock.h:52 [inline]
       __mmap_lock_do_trace_acquire_returned+0x456/0x790 mm/mmap_lock.c:237
       __mmap_lock_trace_acquire_returned include/linux/mmap_lock.h:36 [inline]
       mmap_read_trylock include/linux/mmap_lock.h:166 [inline]
       get_mmap_lock_carefully mm/memory.c:5633 [inline]
       lock_mm_and_find_vma+0xeb/0x580 mm/memory.c:5693
       do_user_addr_fault+0x29c/0x1080 arch/x86/mm/fault.c:1385
       handle_page_fault arch/x86/mm/fault.c:1505 [inline]
       exc_page_fault+0x5c/0xc0 arch/x86/mm/fault.c:1563
       asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:623

-> #1 (lock#10){+.+.}-{2:2}:
       local_lock_acquire include/linux/local_lock_internal.h:29 [inline]
       __mmap_lock_do_trace_acquire_returned+0x97/0x790 mm/mmap_lock.c:237
       __mmap_lock_trace_acquire_returned include/linux/mmap_lock.h:36 [inline]
       mmap_read_trylock include/linux/mmap_lock.h:166 [inline]
       stack_map_get_build_id_offset+0x5df/0x7d0 kernel/bpf/stackmap.c:141
       __bpf_get_stack+0x6bf/0x700 kernel/bpf/stackmap.c:449
       ____bpf_get_stack_raw_tp kernel/trace/bpf_trace.c:1985 [inline]
       bpf_get_stack_raw_tp+0x124/0x160 kernel/trace/bpf_trace.c:1975
       ___bpf_prog_run+0x3e51/0xabd0 kernel/bpf/core.c:1997
       __bpf_prog_run32+0xc1/0x100 kernel/bpf/core.c:2236
       bpf_dispatcher_nop_func include/linux/bpf.h:1234 [inline]
       __bpf_prog_run include/linux/filter.h:657 [inline]
       bpf_prog_run include/linux/filter.h:664 [inline]
       __bpf_trace_run kernel/trace/bpf_trace.c:2381 [inline]
       bpf_trace_run3+0x167/0x440 kernel/trace/bpf_trace.c:2421
       __bpf_trace_workqueue_queue_work+0x101/0x140 include/trace/events/workqueue.h:23
       trace_workqueue_queue_work include/trace/events/workqueue.h:23 [inline]
       __queue_work+0x627/0x1020 kernel/workqueue.c:2382
       queue_work_on+0xf4/0x120 kernel/workqueue.c:2435
       bpf_prog_load+0x19bb/0x2660 kernel/bpf/syscall.c:2944
       __sys_bpf+0x9b4/0x4b40 kernel/bpf/syscall.c:5660
       __do_sys_bpf kernel/bpf/syscall.c:5767 [inline]
       __se_sys_bpf kernel/bpf/syscall.c:5765 [inline]
       __x64_sys_bpf+0x78/0xc0 kernel/bpf/syscall.c:5765
       do_syscall_x64 arch/x86/entry/common.c:52 [inline]
       do_syscall_64+0xcf/0x260 arch/x86/entry/common.c:83
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

-> #0 (&pool->lock){-.-.}-{2:2}:
       check_prev_add kernel/locking/lockdep.c:3134 [inline]
       check_prevs_add kernel/locking/lockdep.c:3253 [inline]
       validate_chain kernel/locking/lockdep.c:3869 [inline]
       __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137
       lock_acquire kernel/locking/lockdep.c:5754 [inline]
       lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719
       __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
       _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
       __queue_work+0x23a/0x1020 kernel/workqueue.c:2346
       queue_work_on+0xf4/0x120 kernel/workqueue.c:2435
       queue_work include/linux/workqueue.h:605 [inline]
       schedule_work include/linux/workqueue.h:666 [inline]
       p9_pollwake+0xc1/0x1d0 net/9p/trans_fd.c:538
       __wake_up_common+0x131/0x1e0 kernel/sched/wait.c:89
       __wake_up_common_lock kernel/sched/wait.c:106 [inline]
       __wake_up+0x31/0x60 kernel/sched/wait.c:127
       exit_notify kernel/exit.c:747 [inline]
       do_exit+0x1448/0x2c10 kernel/exit.c:898
       do_group_exit+0xd3/0x2a0 kernel/exit.c:1027
       __do_sys_exit_group kernel/exit.c:1038 [inline]
       __se_sys_exit_group kernel/exit.c:1036 [inline]
       __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1036
       do_syscall_x64 arch/x86/entry/common.c:52 [inline]
       do_syscall_64+0xcf/0x260 arch/x86/entry/common.c:83
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

other info that might help us debug this:

Chain exists of:
  &pool->lock --> &____s->seqcount#5 --> &pid->wait_pidfd

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&pid->wait_pidfd);
                               lock(&____s->seqcount#5);
                               lock(&pid->wait_pidfd);
  lock(&pool->lock);

 *** DEADLOCK ***

3 locks held by syz-executor.0/6788:
 #0: ffffffff8d40a098 (tasklist_lock){.+.+}-{2:2}, at: exit_notify kernel/exit.c:735 [inline]
 #0: ffffffff8d40a098 (tasklist_lock){.+.+}-{2:2}, at: do_exit+0xac2/0x2c10 kernel/exit.c:898
 #1: ffff8880604b9d18 (&pid->wait_pidfd){....}-{2:2}, at: __wake_up_common_lock kernel/sched/wait.c:105 [inline]
 #1: ffff8880604b9d18 (&pid->wait_pidfd){....}-{2:2}, at: __wake_up+0x1c/0x60 kernel/sched/wait.c:127
 #2: ffffffff8d7b0e20 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
 #2: ffffffff8d7b0e20 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:781 [inline]
 #2: ffffffff8d7b0e20 (rcu_read_lock){....}-{1:2}, at: __queue_work+0xf2/0x1020 kernel/workqueue.c:2324

stack backtrace:
CPU: 1 PID: 6788 Comm: syz-executor.0 Not tainted 6.9.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:114
 check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2187
 check_prev_add kernel/locking/lockdep.c:3134 [inline]
 check_prevs_add kernel/locking/lockdep.c:3253 [inline]
 validate_chain kernel/locking/lockdep.c:3869 [inline]
 __lock_acquire+0x2478/0x3b30 kernel/locking/lockdep.c:5137
 lock_acquire kernel/locking/lockdep.c:5754 [inline]
 lock_acquire+0x1b1/0x560 kernel/locking/lockdep.c:5719
 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
 _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154
 __queue_work+0x23a/0x1020 kernel/workqueue.c:2346
 queue_work_on+0xf4/0x120 kernel/workqueue.c:2435
 queue_work include/linux/workqueue.h:605 [inline]
 schedule_work include/linux/workqueue.h:666 [inline]
 p9_pollwake+0xc1/0x1d0 net/9p/trans_fd.c:538
 __wake_up_common+0x131/0x1e0 kernel/sched/wait.c:89
 __wake_up_common_lock kernel/sched/wait.c:106 [inline]
 __wake_up+0x31/0x60 kernel/sched/wait.c:127
 exit_notify kernel/exit.c:747 [inline]
 do_exit+0x1448/0x2c10 kernel/exit.c:898
 do_group_exit+0xd3/0x2a0 kernel/exit.c:1027
 __do_sys_exit_group kernel/exit.c:1038 [inline]
 __se_sys_exit_group kernel/exit.c:1036 [inline]
 __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1036
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xcf/0x260 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f214947dea9
Code: Unable to access opcode bytes at 0x7f214947de7f.
RSP: 002b:00007ffc485776d8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 000000000000001e RCX: 00007f214947dea9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000001 R08: 000000000000051c R09: 0000000000000000
R10: 0000001b31520000 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000001
 </TASK>

Crashes (2):
Time Kernel Commit Syzkaller Config Log Report Syz repro C repro VM info Assets (help?) Manager Title
2024/04/28 23:05 upstream e67572cd2204 07b455f9 .config console log report info [disk image] [vmlinux] [kernel image] ci-upstream-kasan-gce-selinux-root possible deadlock in __change_pid
2024/04/24 02:55 upstream 9d1ddab261f3 21339d7b .config console log report info [disk image] [vmlinux] [kernel image] ci-upstream-kasan-gce-selinux-root possible deadlock in __change_pid
* Struck through repros no longer work on HEAD.