diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5f97a3d2a8d7..f7df7c0c8955 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2698,21 +2698,54 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; + struct btrfs_inode *inode = BTRFS_I(mapping->host); int ret = 0; struct btrfs_bio_ctrl bio_ctrl = { .wbc = wbc, .opf = REQ_OP_WRITE | wbc_to_write_flags(wbc), }; + /* + * If this inode is being used for a clone/reflink operation that + * copied an inline extent into a page of the destination inode, skip + * writeback to avoid a deadlock or a long blocked task. + * + * The clone operation holds the extent range locked in the inode's + * io_tree for its entire duration. Any writeback attempt on this + * inode will block trying to lock that same extent range inside + * writepage_delalloc() or btrfs_invalidate_folio(), causing a + * hung task. + * + * When writeback is triggered from the transaction commit path via + * btrfs_start_delalloc_flush() -> try_to_writeback_inodes_sb(), + * this becomes a true circular deadlock: + * + * clone -> waits for transaction commit to finish + * commit -> waits for kworker writeback to finish + * kworker -> waits for extent lock held by clone + * + * The flag BTRFS_INODE_NO_DELALLOC_FLUSH was already checked in + * start_delalloc_inodes() but only for the btrfs metadata reclaim + * path. The transaction commit path goes through + * try_to_writeback_inodes_sb() which bypasses that check entirely + * and calls btrfs_writepages() directly. + * + * By checking the flag here we catch all writeback paths. The inode + * will be safely written after the clone operation finishes and + * clears BTRFS_INODE_NO_DELALLOC_FLUSH, at which point all locks + * are released and writeback can proceed normally. + */ + if (test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags)) + return 0; + /* * Allow only a single thread to do the reloc work in zoned mode to * protect the write pointer updates. */ - btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); + btrfs_zoned_data_reloc_lock(inode); ret = extent_write_cache_pages(mapping, &bio_ctrl); submit_write_bio(&bio_ctrl, ret); - btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); + btrfs_zoned_data_reloc_unlock(inode); return ret; }