syzbot


possible deadlock in __schedule

Status: upstream: reported on 2024/07/26 09:13
Reported-by: syzbot+ba61d03fb79a4f10f2e0@syzkaller.appspotmail.com
First crash: 83d, last: 83d
Similar bugs (3)
Kernel Title Repro Cause bisect Fix bisect Count Last Reported Patched Status
upstream possible deadlock in __schedule (3) kernel 1 35d 31d 0/28 upstream: reported on 2024/09/16 05:27
upstream possible deadlock in __schedule kernel 1 1255d 1249d 0/28 auto-closed as invalid on 2021/09/09 01:04
upstream possible deadlock in __schedule (2) kernel 13 352d 363d 0/28 auto-obsoleted due to no activity on 2024/01/09 23:41

Sample crash report:
FAULT_INJECTION: forcing a failure.
name fail_page_alloc, interval 1, probability 0, space 0, times 0
======================================================
WARNING: possible circular locking dependency detected
5.15.163-syzkaller #0 Not tainted
------------------------------------------------------
syz.1.1199/8444 is trying to acquire lock:
ffff800014b6f4b8 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0x28/0xd8 kernel/locking/semaphore.c:138

but task is already holding lock:
ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested kernel/sched/core.c:475 [inline]
ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock kernel/sched/sched.h:1326 [inline]
ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: rq_lock kernel/sched/sched.h:1621 [inline]
ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0x328/0x1e48 kernel/sched/core.c:6290

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #2 (&rq->__lock){-.-.}-{2:2}:
       _raw_spin_lock_nested+0xb4/0x110 kernel/locking/spinlock.c:368
       raw_spin_rq_lock_nested+0x2c/0x44 kernel/sched/core.c:475
       raw_spin_rq_lock kernel/sched/sched.h:1326 [inline]
       rq_lock kernel/sched/sched.h:1621 [inline]
       task_fork_fair+0x7c/0x23c kernel/sched/fair.c:11495
       sched_cgroup_fork+0x334/0x3d8 kernel/sched/core.c:4466
       copy_process+0x24d4/0x3750 kernel/fork.c:2320
       kernel_clone+0x1d8/0xa58 kernel/fork.c:2604
       kernel_thread+0x148/0x1bc kernel/fork.c:2656
       rest_init+0x2c/0x38c init/main.c:706
       arch_call_rest_init+0x14/0x20 init/main.c:893
       start_kernel+0x440/0x600 init/main.c:1140
       __primary_switched+0xa8/0xb0 arch/arm64/kernel/head.S:468

-> #1 (&p->pi_lock){-.-.}-{2:2}:
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0xc4/0x14c kernel/locking/spinlock.c:162
       try_to_wake_up+0xb0/0xc2c kernel/sched/core.c:4030
       wake_up_process+0x18/0x24 kernel/sched/core.c:4215
       __up+0x11c/0x148 kernel/locking/semaphore.c:265
       up+0x90/0xb0 kernel/locking/semaphore.c:190
       __up_console_sem+0x8c/0x100 kernel/printk/printk.c:256
       console_unlock+0x1160/0x1394 kernel/printk/printk.c:2760
       vprintk_emit+0x140/0x21c kernel/printk/printk.c:2274
       vprintk_default+0xa0/0xe4 kernel/printk/printk.c:2289
       vprintk+0x200/0x2d4 kernel/printk/printk_safe.c:45
       _printk+0xdc/0x128 kernel/printk/printk.c:2299
       bt_err+0xfc/0x144 net/bluetooth/lib.c:180
       hci_cmd_timeout+0x14c/0x204 net/bluetooth/hci_core.c:2779
       process_one_work+0x790/0x11b8 kernel/workqueue.c:2310
       worker_thread+0x910/0x1034 kernel/workqueue.c:2457
       kthread+0x37c/0x45c kernel/kthread.c:334
       ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:870

-> #0 ((console_sem).lock){-.-.}-{2:2}:
       check_prev_add kernel/locking/lockdep.c:3053 [inline]
       check_prevs_add kernel/locking/lockdep.c:3172 [inline]
       validate_chain kernel/locking/lockdep.c:3788 [inline]
       __lock_acquire+0x32d4/0x7638 kernel/locking/lockdep.c:5012
       lock_acquire+0x240/0x77c kernel/locking/lockdep.c:5623
       __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
       _raw_spin_lock_irqsave+0xc4/0x14c kernel/locking/spinlock.c:162
       down_trylock+0x28/0xd8 kernel/locking/semaphore.c:138
       __down_trylock_console_sem+0x80/0x144 kernel/printk/printk.c:239
       console_trylock+0xb8/0x1f8 kernel/printk/printk.c:2575
       console_trylock_spinning+0x1c/0x288 kernel/printk/printk.c:1867
       vprintk_emit+0x124/0x21c kernel/printk/printk.c:2273
       vprintk_default+0xa0/0xe4 kernel/printk/printk.c:2289
       vprintk+0x200/0x2d4 kernel/printk/printk_safe.c:45
       _printk+0xdc/0x128 kernel/printk/printk.c:2299
       fail_dump lib/fault-inject.c:45 [inline]
       should_fail+0x398/0x514 lib/fault-inject.c:146
       __should_fail_alloc_page mm/page_alloc.c:3777 [inline]
       should_fail_alloc_page+0x74/0xa8 mm/page_alloc.c:3814
       prepare_alloc_pages+0x160/0x460 mm/page_alloc.c:5200
       __alloc_pages+0x138/0x674 mm/page_alloc.c:5412
       alloc_pages+0x368/0x5d4
       stack_depot_save+0x368/0x4a4 lib/stackdepot.c:302
       kasan_save_stack+0x54/0x68 mm/kasan/common.c:40
       kasan_record_aux_stack+0xd4/0x11c mm/kasan/generic.c:348
       irq_work_queue_on+0xcc/0x224 kernel/irq_work.c:101
       tell_cpu_to_push+0x1d8/0x408 kernel/sched/rt.c:2155
       pull_rt_task kernel/sched/rt.c:2221 [inline]
       balance_rt+0x268/0x354 kernel/sched/rt.c:1560
       put_prev_task_balance kernel/sched/core.c:5624 [inline]
       __pick_next_task kernel/sched/core.c:5664 [inline]
       pick_next_task kernel/sched/core.c:6192 [inline]
       __schedule+0x714/0x1e48 kernel/sched/core.c:6340
       schedule+0x11c/0x1c8 kernel/sched/core.c:6459
       io_schedule+0x8c/0x194 kernel/sched/core.c:8484
       bit_wait_io+0x1c/0xac kernel/sched/wait_bit.c:209
       __wait_on_bit kernel/sched/wait_bit.c:49 [inline]
       out_of_line_wait_on_bit+0x208/0x33c kernel/sched/wait_bit.c:64
       wait_on_bit_io include/linux/wait_bit.h:101 [inline]
       __wait_on_buffer+0x80/0x90 fs/buffer.c:122
       wait_on_buffer include/linux/buffer_head.h:390 [inline]
       __ext4_find_entry+0xd20/0x1614 fs/ext4/namei.c:1680
       ext4_find_entry+0x200/0x330 fs/ext4/namei.c:1750
       ext4_get_parent+0xc4/0x2ec fs/ext4/namei.c:1893
       reconnect_one fs/exportfs/expfs.c:131 [inline]
       reconnect_path+0x18c/0x68c fs/exportfs/expfs.c:222
       exportfs_decode_fh_raw+0x1c4/0x4b4 fs/exportfs/expfs.c:459
       exportfs_decode_fh+0x50/0x80 fs/exportfs/expfs.c:576
       do_handle_to_path fs/fhandle.c:152 [inline]
       handle_to_path fs/fhandle.c:207 [inline]
       do_handle_open+0x5b8/0x9f0 fs/fhandle.c:223
       __do_sys_open_by_handle_at fs/fhandle.c:265 [inline]
       __se_sys_open_by_handle_at fs/fhandle.c:256 [inline]
       __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:256
       __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
       invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:52
       el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
       do_el0_svc+0x58/0x14c arch/arm64/kernel/syscall.c:181
       el0_svc+0x7c/0x1f0 arch/arm64/kernel/entry-common.c:608
       el0t_64_sync_handler+0x84/0xe4 arch/arm64/kernel/entry-common.c:626
       el0t_64_sync+0x1a0/0x1a4 arch/arm64/kernel/entry.S:584

other info that might help us debug this:

Chain exists of:
  (console_sem).lock --> &p->pi_lock --> &rq->__lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&rq->__lock);
                               lock(&p->pi_lock);
                               lock(&rq->__lock);
  lock((console_sem).lock);

 *** DEADLOCK ***

2 locks held by syz.1.1199/8444:
 #0: ffff0000e5f48de8 (&type->i_mutex_dir_key#3){++++}-{3:3}, at: inode_lock include/linux/fs.h:789 [inline]
 #0: ffff0000e5f48de8 (&type->i_mutex_dir_key#3){++++}-{3:3}, at: reconnect_one fs/exportfs/expfs.c:129 [inline]
 #0: ffff0000e5f48de8 (&type->i_mutex_dir_key#3){++++}-{3:3}, at: reconnect_path+0x134/0x68c fs/exportfs/expfs.c:222
 #1: ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested kernel/sched/core.c:475 [inline]
 #1: ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock kernel/sched/sched.h:1326 [inline]
 #1: ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: rq_lock kernel/sched/sched.h:1621 [inline]
 #1: ffff0001b47e8958 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0x328/0x1e48 kernel/sched/core.c:6290

stack backtrace:
CPU: 1 PID: 8444 Comm: syz.1.1199 Not tainted 5.15.163-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024
Call trace:
 dump_backtrace+0x0/0x530 arch/arm64/kernel/stacktrace.c:152
 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:216
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x108/0x170 lib/dump_stack.c:106
 dump_stack+0x1c/0x58 lib/dump_stack.c:113
 print_circular_bug+0x150/0x1b8 kernel/locking/lockdep.c:2011
 check_noncircular+0x2cc/0x378 kernel/locking/lockdep.c:2133
 check_prev_add kernel/locking/lockdep.c:3053 [inline]
 check_prevs_add kernel/locking/lockdep.c:3172 [inline]
 validate_chain kernel/locking/lockdep.c:3788 [inline]
 __lock_acquire+0x32d4/0x7638 kernel/locking/lockdep.c:5012
 lock_acquire+0x240/0x77c kernel/locking/lockdep.c:5623
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0xc4/0x14c kernel/locking/spinlock.c:162
 down_trylock+0x28/0xd8 kernel/locking/semaphore.c:138
 __down_trylock_console_sem+0x80/0x144 kernel/printk/printk.c:239
 console_trylock+0xb8/0x1f8 kernel/printk/printk.c:2575
 console_trylock_spinning+0x1c/0x288 kernel/printk/printk.c:1867
 vprintk_emit+0x124/0x21c kernel/printk/printk.c:2273
 vprintk_default+0xa0/0xe4 kernel/printk/printk.c:2289
 vprintk+0x200/0x2d4 kernel/printk/printk_safe.c:45
 _printk+0xdc/0x128 kernel/printk/printk.c:2299
 fail_dump lib/fault-inject.c:45 [inline]
 should_fail+0x398/0x514 lib/fault-inject.c:146
 __should_fail_alloc_page mm/page_alloc.c:3777 [inline]
 should_fail_alloc_page+0x74/0xa8 mm/page_alloc.c:3814
 prepare_alloc_pages+0x160/0x460 mm/page_alloc.c:5200
 __alloc_pages+0x138/0x674 mm/page_alloc.c:5412
 alloc_pages+0x368/0x5d4
 stack_depot_save+0x368/0x4a4 lib/stackdepot.c:302
 kasan_save_stack+0x54/0x68 mm/kasan/common.c:40
 kasan_record_aux_stack+0xd4/0x11c mm/kasan/generic.c:348
 irq_work_queue_on+0xcc/0x224 kernel/irq_work.c:101
 tell_cpu_to_push+0x1d8/0x408 kernel/sched/rt.c:2155
 pull_rt_task kernel/sched/rt.c:2221 [inline]
 balance_rt+0x268/0x354 kernel/sched/rt.c:1560
 put_prev_task_balance kernel/sched/core.c:5624 [inline]
 __pick_next_task kernel/sched/core.c:5664 [inline]
 pick_next_task kernel/sched/core.c:6192 [inline]
 __schedule+0x714/0x1e48 kernel/sched/core.c:6340
 schedule+0x11c/0x1c8 kernel/sched/core.c:6459
 io_schedule+0x8c/0x194 kernel/sched/core.c:8484
 bit_wait_io+0x1c/0xac kernel/sched/wait_bit.c:209
 __wait_on_bit kernel/sched/wait_bit.c:49 [inline]
 out_of_line_wait_on_bit+0x208/0x33c kernel/sched/wait_bit.c:64
 wait_on_bit_io include/linux/wait_bit.h:101 [inline]
 __wait_on_buffer+0x80/0x90 fs/buffer.c:122
 wait_on_buffer include/linux/buffer_head.h:390 [inline]
 __ext4_find_entry+0xd20/0x1614 fs/ext4/namei.c:1680
 ext4_find_entry+0x200/0x330 fs/ext4/namei.c:1750
 ext4_get_parent+0xc4/0x2ec fs/ext4/namei.c:1893
 reconnect_one fs/exportfs/expfs.c:131 [inline]
 reconnect_path+0x18c/0x68c fs/exportfs/expfs.c:222
 exportfs_decode_fh_raw+0x1c4/0x4b4 fs/exportfs/expfs.c:459
 exportfs_decode_fh+0x50/0x80 fs/exportfs/expfs.c:576
 do_handle_to_path fs/fhandle.c:152 [inline]
 handle_to_path fs/fhandle.c:207 [inline]
 do_handle_open+0x5b8/0x9f0 fs/fhandle.c:223
 __do_sys_open_by_handle_at fs/fhandle.c:265 [inline]
 __se_sys_open_by_handle_at fs/fhandle.c:256 [inline]
 __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:256
 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
 invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:52
 el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
 do_el0_svc+0x58/0x14c arch/arm64/kernel/syscall.c:181
 el0_svc+0x7c/0x1f0 arch/arm64/kernel/entry-common.c:608
 el0t_64_sync_handler+0x84/0xe4 arch/arm64/kernel/entry-common.c:626
 el0t_64_sync+0x1a0/0x1a4 arch/arm64/kernel/entry.S:584
CPU: 1 PID: 8444 Comm: syz.1.1199 Not tainted 5.15.163-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024
Call trace:
 dump_backtrace+0x0/0x530 arch/arm64/kernel/stacktrace.c:152
 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:216
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x108/0x170 lib/dump_stack.c:106
 dump_stack+0x1c/0x58 lib/dump_stack.c:113
 fail_dump lib/fault-inject.c:52 [inline]
 should_fail+0x3b8/0x514 lib/fault-inject.c:146
 __should_fail_alloc_page mm/page_alloc.c:3777 [inline]
 should_fail_alloc_page+0x74/0xa8 mm/page_alloc.c:3814
 prepare_alloc_pages+0x160/0x460 mm/page_alloc.c:5200
 __alloc_pages+0x138/0x674 mm/page_alloc.c:5412
 alloc_pages+0x368/0x5d4
 stack_depot_save+0x368/0x4a4 lib/stackdepot.c:302
 kasan_save_stack+0x54/0x68 mm/kasan/common.c:40
 kasan_record_aux_stack+0xd4/0x11c mm/kasan/generic.c:348
 irq_work_queue_on+0xcc/0x224 kernel/irq_work.c:101
 tell_cpu_to_push+0x1d8/0x408 kernel/sched/rt.c:2155
 pull_rt_task kernel/sched/rt.c:2221 [inline]
 balance_rt+0x268/0x354 kernel/sched/rt.c:1560
 put_prev_task_balance kernel/sched/core.c:5624 [inline]
 __pick_next_task kernel/sched/core.c:5664 [inline]
 pick_next_task kernel/sched/core.c:6192 [inline]
 __schedule+0x714/0x1e48 kernel/sched/core.c:6340
 schedule+0x11c/0x1c8 kernel/sched/core.c:6459
 io_schedule+0x8c/0x194 kernel/sched/core.c:8484
 bit_wait_io+0x1c/0xac kernel/sched/wait_bit.c:209
 __wait_on_bit kernel/sched/wait_bit.c:49 [inline]
 out_of_line_wait_on_bit+0x208/0x33c kernel/sched/wait_bit.c:64
 wait_on_bit_io include/linux/wait_bit.h:101 [inline]
 __wait_on_buffer+0x80/0x90 fs/buffer.c:122
 wait_on_buffer include/linux/buffer_head.h:390 [inline]
 __ext4_find_entry+0xd20/0x1614 fs/ext4/namei.c:1680
 ext4_find_entry+0x200/0x330 fs/ext4/namei.c:1750
 ext4_get_parent+0xc4/0x2ec fs/ext4/namei.c:1893
 reconnect_one fs/exportfs/expfs.c:131 [inline]
 reconnect_path+0x18c/0x68c fs/exportfs/expfs.c:222
 exportfs_decode_fh_raw+0x1c4/0x4b4 fs/exportfs/expfs.c:459
 exportfs_decode_fh+0x50/0x80 fs/exportfs/expfs.c:576
 do_handle_to_path fs/fhandle.c:152 [inline]
 handle_to_path fs/fhandle.c:207 [inline]
 do_handle_open+0x5b8/0x9f0 fs/fhandle.c:223
 __do_sys_open_by_handle_at fs/fhandle.c:265 [inline]
 __se_sys_open_by_handle_at fs/fhandle.c:256 [inline]
 __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:256
 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
 invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:52
 el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
 do_el0_svc+0x58/0x14c arch/arm64/kernel/syscall.c:181
 el0_svc+0x7c/0x1f0 arch/arm64/kernel/entry-common.c:608
 el0t_64_sync_handler+0x84/0xe4 arch/arm64/kernel/entry-common.c:626
 el0t_64_sync+0x1a0/0x1a4 arch/arm64/kernel/entry.S:584

Crashes (1):
Time Kernel Commit Syzkaller Config Log Report Syz repro C repro VM info Assets (help?) Manager Title
2024/07/26 09:12 linux-5.15.y 7c6d66f0266f 3f86dfed .config console log report info [disk image] [vmlinux] [kernel image] ci2-linux-5-15-kasan-arm64 possible deadlock in __schedule
* Struck through repros no longer work on HEAD.