diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9c94ed8c3ab0..519497bc1045 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -474,29 +474,31 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end, vma_interval_tree_foreach(vma, root, start, end ? end - 1 : ULONG_MAX) { unsigned long v_start; unsigned long v_end; + bool have_shareable_lock; + zap_flags_t local_flags = zap_flags; if (!hugetlb_vma_trylock_write(vma)) continue; - + + have_shareable_lock = __vma_shareable_lock(vma); + /* - * Skip VMAs without shareable locks. Per the design in commit - * 40549ba8f8e0, these will be handled by remove_inode_hugepages() - * called after this function with proper locking. + * If we can't get the shareable lock, set ZAP_FLAG_NO_UNSHARE + * to skip PMD unsharing. We still proceed with unmapping to + * ensure pages are properly freed, which is critical for punch + * hole operations that expect immediate page freeing. */ - if (!__vma_shareable_lock(vma)) - goto skip; - + if (!have_shareable_lock) + local_flags |= ZAP_FLAG_NO_UNSHARE; v_start = vma_offset_start(vma, start); v_end = vma_offset_end(vma, end); - unmap_hugepage_range(vma, v_start, v_end, NULL, zap_flags); - + unmap_hugepage_range(vma, v_start, v_end, NULL, local_flags); /* * Note that vma lock only exists for shared/non-private * vmas. Therefore, lock is not held when calling * unmap_hugepage_range for private vmas. */ -skip: hugetlb_vma_unlock_write(vma); } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..9126ab44320d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2395,6 +2395,8 @@ struct zap_details { #define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) /* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ #define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) +/* Skip PMD unsharing when unmapping hugetlb ranges without shareable lock */ +#define ZAP_FLAG_NO_UNSHARE ((__force zap_flags_t) BIT(2)) #ifdef CONFIG_SCHED_MM_CID void sched_mm_cid_before_execve(struct task_struct *t); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6cac826cb61f..c4257aa568fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5885,7 +5885,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, vma, address, ptep)) { + if (!(zap_flags & ZAP_FLAG_NO_UNSHARE) && + huge_pmd_unshare(mm, vma, address, ptep)) { spin_unlock(ptl); tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); force_flush = true;