--- a/fs/mount.h 2022-05-16 19:33:29.792582900 +0800 +++ b/fs/mount.h 2022-05-17 21:09:35.549527700 +0800 @@ -77,6 +77,8 @@ struct mount { int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; struct hlist_head mnt_stuck_children; + struct wait_queue_head wwq; /* writer wq */ + struct list_head leg_list; } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -98,6 +100,7 @@ static inline int is_mounted(struct vfsm } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); +extern struct mount *__lookup_mnt_leg(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); extern bool legitimize_mnt(struct vfsmount *, unsigned); --- a/fs/namei.c 2022-05-17 20:03:40.760743200 +0800 +++ b/fs/namei.c 2022-05-17 20:31:13.396868700 +0800 @@ -1497,7 +1497,7 @@ static bool __follow_mount_rcu(struct na } if (flags & DCACHE_MOUNTED) { - struct mount *mounted = __lookup_mnt(path->mnt, dentry); + struct mount *mounted = __lookup_mnt_leg(path->mnt, dentry); if (mounted) { path->mnt = &mounted->mnt; dentry = path->dentry = mounted->mnt.mnt_root; --- a/fs/namespace.c 2022-05-16 19:34:10.676163000 +0800 +++ b/fs/namespace.c 2022-05-18 06:33:54.713376200 +0800 @@ -233,6 +233,8 @@ static struct mount *alloc_vfsmnt(const INIT_LIST_HEAD(&mnt->mnt_umounting); INIT_HLIST_HEAD(&mnt->mnt_stuck_children); mnt->mnt.mnt_userns = &init_user_ns; + init_waitqueue_head(&mnt->wwq); + INIT_LIST_HEAD(&mnt->leg_list); } return mnt; @@ -469,6 +471,12 @@ void mnt_drop_write(struct vfsmount *mnt { __mnt_drop_write(mnt); sb_end_write(mnt->mnt_sb); + if (mnt->mnt_flags & MNT_DOOMED) { + struct mount *m = real_mount(mnt); + + if (!m->mnt_ns && !mnt_get_writers(m)) + wake_up(&m->wwq); + } } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -676,6 +684,57 @@ struct mount *__lookup_mnt(struct vfsmou return NULL; } +static LIST_HEAD(leg_put_list); +static DEFINE_SPINLOCK(leg_put_lock); + +static void leg_put_workfn(struct work_struct *w) +{ + struct mount *mnt; +again: + spin_lock_irq(&leg_put_lock); + + if (!list_empty(&leg_put_list)) { + mnt = list_first_entry(&leg_put_list, struct mount, leg_list); + list_del_init(&mnt->leg_list); + spin_unlock_irq(&leg_put_lock); + + mntput(&mnt->mnt); + goto again; + } + spin_unlock_irq(&leg_put_lock); +} +static DECLARE_WORK(leg_put_work, leg_put_workfn); + +struct mount *__lookup_mnt_leg(struct vfsmount *mnt, struct dentry *dentry) +{ + struct mount *child_mnt; + struct vfsmount *m; + unsigned seq; + int res; +again: + seq = read_seqbegin(&mount_lock); + child_mnt = __lookup_mnt(mnt, dentry); + m = child_mnt ? &child_mnt->mnt : NULL; + res = __legitimize_mnt(m, seq); + + if (res == 0) + return child_mnt; + if (res > 0) + if (read_seqretry(&mount_lock, seq)) + goto again; + else + return NULL; + + spin_lock_irq(&leg_put_lock); + if (list_empty(&child_mnt->leg_list)) { + list_add(&child_mnt->leg_list, &leg_put_list); + queue_work(system_unbound_wq, &leg_put_work); + } else + mnt_add_count(child_mnt, -1); + spin_unlock_irq(&leg_put_lock); + return NULL; +} + /* * lookup_mnt - Return the first child mount mounted at path * @@ -1174,7 +1233,7 @@ static void cleanup_mnt(struct mount *mn * The locking used to deal with mnt_count decrement provides barriers, * so mnt_get_writers() below is safe. */ - WARN_ON(mnt_get_writers(mnt)); + wait_event(mnt->wwq, !mnt_get_writers(mnt)); if (unlikely(mnt->mnt_pins.first)) mnt_pin_kill(mnt); hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) { @@ -1221,10 +1280,16 @@ static void mntput_no_expire(struct moun * we are dropping is not the final one. */ mnt_add_count(mnt, -1); + count = mnt_get_count(mnt); + WARN_ON(count == 0); + WARN_ON(count < 0); rcu_read_unlock(); return; } lock_mount_hash(); + count = mnt_get_count(mnt); + WARN_ON(count == 0); + WARN_ON(count < 0); /* * make sure that if __legitimize_mnt() has not seen us grab * mount_lock, we'll see their refcount increment here. --- a/include/linux/cgroup-defs.h 2022-05-16 20:01:41.873691800 +0800 +++ b/include/linux/cgroup-defs.h 2022-05-16 20:05:57.239210800 +0800 @@ -179,7 +179,7 @@ struct cgroup_subsys_state { atomic_t online_cnt; /* percpu_ref killing and RCU release */ - struct work_struct destroy_work; + struct work_struct destroy_work, release_work; struct rcu_work destroy_rwork; /* --- a/kernel/cgroup/cgroup.c 2022-05-16 20:03:31.595702700 +0800 +++ b/kernel/cgroup/cgroup.c 2022-05-17 19:05:10.484641700 +0800 @@ -5154,7 +5154,7 @@ static void css_free_rwork_fn(struct wor static void css_release_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = - container_of(work, struct cgroup_subsys_state, destroy_work); + container_of(work, struct cgroup_subsys_state, release_work); struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; @@ -5210,8 +5210,8 @@ static void css_release(struct percpu_re struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_release_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + INIT_WORK(&css->release_work, css_release_work_fn); + queue_work(cgroup_destroy_wq, &css->release_work); } static void init_and_link_css(struct cgroup_subsys_state *css, @@ -5547,14 +5547,19 @@ static void css_killed_work_fn(struct wo { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); + int put = 1; mutex_lock(&cgroup_mutex); do { + struct cgroup_subsys_state *parent = css->parent; + offline_css(css); - css_put(css); - /* @css can't go away while we're holding cgroup_mutex */ - css = css->parent; + if (put) { + css_put(css); + put = 0; + } + css = parent; } while (css && atomic_dec_and_test(&css->online_cnt)); mutex_unlock(&cgroup_mutex); --- a/net/ipv4/tcp_input.c 2022-05-16 19:59:50.885069300 +0800 +++ b/net/ipv4/tcp_input.c 2022-05-16 20:05:57.183788500 +0800 @@ -5926,6 +5926,7 @@ void tcp_rcv_established(struct sock *sk NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ + skb_dst_drop(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen);