diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1a687444b27..798d60b3e2ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2084,6 +2084,8 @@ enum netdev_reg_state { * * FIXME: cleanup struct net_device such that network protocol info * moves out. + * + * @netdev_trace_buffer_list: Linked list for debugging refcount leak. */ struct net_device { @@ -2238,6 +2240,9 @@ struct net_device { #if IS_ENABLED(CONFIG_TLS_DEVICE) const struct tlsdev_ops *tlsdev_ops; #endif +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + struct list_head netdev_trace_buffer_list; +#endif unsigned int operstate; unsigned char link_mode; @@ -3166,6 +3171,7 @@ enum netdev_cmd { NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, NETDEV_XDP_FEAT_CHANGE, + NETDEV_DEBUG_UNREGISTER, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); @@ -4345,9 +4351,15 @@ static inline bool dev_nit_active(const struct net_device *dev) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); +void save_netdev_trace_buffer(struct net_device *dev, int delta); +int trim_netdev_trace(unsigned long *entries, int nr_entries); + static inline void __dev_put(struct net_device *dev) { if (dev) { +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + save_netdev_trace_buffer(dev, -1); +#endif #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); #else @@ -4359,6 +4371,9 @@ static inline void __dev_put(struct net_device *dev) static inline void __dev_hold(struct net_device *dev) { if (dev) { +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + save_netdev_trace_buffer(dev, 1); +#endif #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); #else diff --git a/kernel/softirq.c b/kernel/softirq.c index 77198911b8dd..5f435c1e48d8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -576,6 +576,10 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static noinline void handle_softirqs(bool ksirqd); +#endif + static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 45320e27a16c..e9c654a9d0bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3145,6 +3145,10 @@ static bool manage_workers(struct worker *worker) return true; } +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static noinline void process_one_work(struct worker *worker, struct work_struct *work); +#endif + /** * process_one_work - process single work * @worker: self diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index a93af55df5fd..66e6624abfa3 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -124,6 +124,16 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) static DEFINE_MUTEX(j1939_netdev_lock); +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static void dump_priv_trace_buffer(const struct net_device *ndev); +static void erase_priv_trace_buffer(struct j1939_priv *priv); +static noinline void save_priv_trace_buffer(struct j1939_priv *priv, int delta); +#else +static inline void dump_priv_trace_buffer(const struct net_device *ndev) { }; +static inline void erase_priv_trace_buffer(struct j1939_priv *priv) { }; +static inline void save_priv_trace_buffer(struct j1939_priv *priv, int delta) { }; +#endif + static struct j1939_priv *j1939_priv_create(struct net_device *ndev) { struct j1939_priv *priv; @@ -137,6 +147,7 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev) priv->ndev = ndev; kref_init(&priv->kref); kref_init(&priv->rx_kref); + save_priv_trace_buffer(priv, 1); dev_hold(ndev); netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv); @@ -164,17 +175,20 @@ static void __j1939_priv_release(struct kref *kref) WARN_ON_ONCE(!list_empty(&priv->j1939_socks)); dev_put(ndev); + erase_priv_trace_buffer(priv); kfree(priv); } void j1939_priv_put(struct j1939_priv *priv) { + save_priv_trace_buffer(priv, -1); kref_put(&priv->kref, __j1939_priv_release); } void j1939_priv_get(struct j1939_priv *priv) { kref_get(&priv->kref); + save_priv_trace_buffer(priv, 1); } static int j1939_can_rx_register(struct j1939_priv *priv) @@ -282,6 +296,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) kref_get(&priv_new->rx_kref); mutex_unlock(&j1939_netdev_lock); dev_put(ndev); + erase_priv_trace_buffer(priv); kfree(priv); return priv_new; } @@ -299,6 +314,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) mutex_unlock(&j1939_netdev_lock); dev_put(ndev); + erase_priv_trace_buffer(priv); kfree(priv); return ERR_PTR(ret); @@ -364,6 +380,9 @@ static int j1939_netdev_notify(struct notifier_block *nb, struct can_ml_priv *can_ml = can_get_ml_priv(ndev); struct j1939_priv *priv; + if (msg == NETDEV_DEBUG_UNREGISTER) + dump_priv_trace_buffer(ndev); + if (!can_ml) goto notify_done; @@ -428,3 +447,79 @@ static __exit void j1939_module_exit(void) module_init(j1939_module_init); module_exit(j1939_module_exit); + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + +#define PRIV_TRACE_BUFFER_SIZE 1024 +static struct priv_trace_buffer { + struct j1939_priv *priv; // no-ref + struct net_device *ndev; // no-ref + atomic_t count; + int nr_entries; + unsigned long entries[20]; +} priv_trace_buffer[PRIV_TRACE_BUFFER_SIZE]; +static bool priv_trace_buffer_exhausted; + +static void dump_priv_trace_buffer(const struct net_device *ndev) +{ + struct priv_trace_buffer *ptr; + int count, balance = 0; + int i; + + for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) { + ptr = &priv_trace_buffer[i]; + if (!ptr->priv || ptr->ndev != ndev) + continue; + count = atomic_read(&ptr->count); + balance += count; + pr_info("Call trace for %s@%p %+d at\n", ndev->name, ptr->priv, count); + stack_trace_print(ptr->entries, ptr->nr_entries, 4); + } + if (!priv_trace_buffer_exhausted) + pr_info("balance for %s@j1939_priv is %d\n", ndev->name, balance); + else + pr_info("balance for %s@j1939_priv is unknown\n", ndev->name); +} + +static void erase_priv_trace_buffer(struct j1939_priv *priv) +{ + int i; + + for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) + if (priv_trace_buffer[i].priv == priv) + priv_trace_buffer[i].priv = NULL; +} + +static noinline void save_priv_trace_buffer(struct j1939_priv *priv, int delta) +{ + struct priv_trace_buffer *ptr; + unsigned long entries[ARRAY_SIZE(ptr->entries)]; + unsigned long nr_entries; + int i; + + if (in_nmi()) + return; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(ptr->entries), 1); + nr_entries = trim_netdev_trace(entries, nr_entries); + for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) { + ptr = &priv_trace_buffer[i]; + if (ptr->priv == priv && ptr->nr_entries == nr_entries && + !memcmp(ptr->entries, entries, nr_entries * sizeof(unsigned long))) { + atomic_add(delta, &ptr->count); + return; + } + } + for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) { + ptr = &priv_trace_buffer[i]; + if (!ptr->priv && !cmpxchg(&ptr->priv, NULL, priv)) { + ptr->ndev = priv->ndev; + atomic_set(&ptr->count, delta); + ptr->nr_entries = nr_entries; + memmove(ptr->entries, entries, nr_entries * sizeof(unsigned long)); + return; + } + } + priv_trace_buffer_exhausted = true; +} + +#endif diff --git a/net/core/dev.c b/net/core/dev.c index 2acfa44927da..c3a62c16fa15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1854,6 +1854,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd) N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE) N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA) N(XDP_FEAT_CHANGE) + N(DEBUG_UNREGISTER) } #undef N return "UNKNOWN_NETDEV_EVENT"; @@ -11429,6 +11430,14 @@ int netdev_refcnt_read(const struct net_device *dev) } EXPORT_SYMBOL(netdev_refcnt_read); +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static void dump_netdev_trace_buffer(const struct net_device *dev); +static void erase_netdev_trace_buffer(const struct net_device *dev); +#else +static inline void dump_netdev_trace_buffer(const struct net_device *dev) { } +static inline void erase_netdev_trace_buffer(const struct net_device *dev) { } +#endif + int netdev_unregister_timeout_secs __read_mostly = 10; #define WAIT_REFS_MIN_MSECS 1 @@ -11502,11 +11511,16 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) if (time_after(jiffies, warning_time + READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { + rtnl_lock(); list_for_each_entry(dev, list, todo_list) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, netdev_refcnt_read(dev)); ref_tracker_dir_print(&dev->refcnt_tracker, 10); + call_netdevice_notifiers(NETDEV_DEBUG_UNREGISTER, dev); + dump_netdev_trace_buffer(dev); } + __rtnl_unlock(); + rcu_barrier(); warning_time = jiffies; } @@ -11904,6 +11918,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_len = sizeof_priv; +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + INIT_LIST_HEAD(&dev->netdev_trace_buffer_list); +#endif ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev"); #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); @@ -12076,6 +12093,8 @@ void free_netdev(struct net_device *dev) mutex_destroy(&dev->lock); + erase_netdev_trace_buffer(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { @@ -13090,3 +13109,180 @@ static int __init net_dev_init(void) } subsys_initcall(net_dev_init); + +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER + +#define NETDEV_TRACE_BUFFER_SIZE 32768 +static struct netdev_trace_buffer { + struct list_head list; + int prev_count; + atomic_t count; + int nr_entries; + unsigned long entries[20]; +} netdev_trace_buffer[NETDEV_TRACE_BUFFER_SIZE]; +static LIST_HEAD(netdev_trace_buffer_list); +static DEFINE_SPINLOCK(netdev_trace_buffer_lock); +static bool netdev_trace_buffer_exhausted; + +static int netdev_trace_buffer_init(void) +{ + int i; + + for (i = 0; i < NETDEV_TRACE_BUFFER_SIZE; i++) + list_add_tail(&netdev_trace_buffer[i].list, &netdev_trace_buffer_list); + return 0; +} +pure_initcall(netdev_trace_buffer_init); + +static void dump_netdev_trace_buffer(const struct net_device *dev) +{ + struct netdev_trace_buffer *ptr; + int count, balance = 0, pos = 0; + + list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list, + /* list elements can't go away. */ 1) { + pos++; + count = atomic_read(&ptr->count); + balance += count; + if (ptr->prev_count == count) + continue; + ptr->prev_count = count; + pr_info("Call trace for %s[%d] %+d at\n", dev->name, pos, count); + stack_trace_print(ptr->entries, ptr->nr_entries, 4); + cond_resched(); + } + if (!netdev_trace_buffer_exhausted) + pr_info("balance as of %s[%d] is %d\n", dev->name, pos, balance); +} + +static void erase_netdev_trace_buffer(const struct net_device *dev) +{ + struct netdev_trace_buffer *ptr; + unsigned long flags; + + spin_lock_irqsave(&netdev_trace_buffer_lock, flags); + while (!list_empty(&dev->netdev_trace_buffer_list)) { + ptr = list_first_entry(&dev->netdev_trace_buffer_list, typeof(*ptr), list); + list_del(&ptr->list); + list_add_tail(&ptr->list, &netdev_trace_buffer_list); + } + spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags); +} + +#ifdef CONFIG_KALLSYMS +static noinline unsigned long __find_trim(unsigned long *entries, int nr_entries, const char *name) +{ + int i; + char buffer[KSYM_SYMBOL_LEN]; + const int len = strlen(name); + + for (i = 0; i < nr_entries; i++) { + snprintf(buffer, sizeof(buffer), "%pS", (void *)entries[i]); + if (!strncmp(buffer, name, len) && buffer[len] == '+') + return entries[i]; + } + return 0; +} + +static unsigned long caller_handle_softirqs; +static unsigned long caller_process_one_work; +static unsigned long caller_ksys_unshare; +static unsigned long caller___sys_bind; +static unsigned long caller___sock_sendmsg; + +static int __init net_check_symbols(void) +{ + if (!kallsyms_lookup_name("handle_softirqs")) + caller_handle_softirqs = -1; + if (!kallsyms_lookup_name("process_one_work")) + caller_process_one_work = -1; + if (!kallsyms_lookup_name("ksys_unshare")) + caller_ksys_unshare = -1; + if (!kallsyms_lookup_name("__sys_bind")) + caller___sys_bind = -1; + if (!kallsyms_lookup_name("sock_sendmsg_nosec") && + !kallsyms_lookup_name("__sock_sendmsg")) + caller___sock_sendmsg = -1; + return 0; +} +late_initcall(net_check_symbols); +#endif + +int trim_netdev_trace(unsigned long *entries, int nr_entries) +{ +#ifdef CONFIG_KALLSYMS + int i; + + if (in_softirq()) { + if (unlikely(!caller_handle_softirqs)) + caller_handle_softirqs = __find_trim(entries, nr_entries, + "handle_softirqs"); + for (i = 0; i < nr_entries; i++) + if (entries[i] == caller_handle_softirqs) + return i + 1; + } else if (current->flags & PF_WQ_WORKER) { + if (unlikely(!caller_process_one_work)) + caller_process_one_work = __find_trim(entries, nr_entries, + "process_one_work"); + for (i = 0; i < nr_entries; i++) + if (entries[i] == caller_process_one_work) + return i + 1; + } else { + if (unlikely(!caller_ksys_unshare)) + caller_ksys_unshare = __find_trim(entries, nr_entries, "ksys_unshare"); + if (unlikely(!caller___sys_bind)) + caller___sys_bind = __find_trim(entries, nr_entries, "__sys_bind"); + if (unlikely(!caller___sock_sendmsg)) { + caller___sock_sendmsg = __find_trim(entries, nr_entries, + "sock_sendmsg_nosec"); + if (!caller___sock_sendmsg) + caller___sock_sendmsg = __find_trim(entries, nr_entries, + "__sock_sendmsg"); + } + for (i = 0; i < nr_entries; i++) + if (entries[i] == caller_ksys_unshare || + entries[i] == caller___sys_bind || + entries[i] == caller___sock_sendmsg) + return i + 1; + } +#endif + return nr_entries; +} +EXPORT_SYMBOL(trim_netdev_trace); + +void save_netdev_trace_buffer(struct net_device *dev, int delta) +{ + struct netdev_trace_buffer *ptr; + unsigned long entries[ARRAY_SIZE(ptr->entries)]; + unsigned long nr_entries; + unsigned long flags; + + if (in_nmi()) + return; + nr_entries = stack_trace_save(entries, ARRAY_SIZE(ptr->entries), 1); + nr_entries = trim_netdev_trace(entries, nr_entries); + list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list, + /* list elements can't go away. */ 1) { + if (ptr->nr_entries == nr_entries && + !memcmp(ptr->entries, entries, nr_entries * sizeof(unsigned long))) { + atomic_add(delta, &ptr->count); + return; + } + } + spin_lock_irqsave(&netdev_trace_buffer_lock, flags); + if (!list_empty(&netdev_trace_buffer_list)) { + ptr = list_first_entry(&netdev_trace_buffer_list, typeof(*ptr), list); + list_del(&ptr->list); + ptr->prev_count = 0; + atomic_set(&ptr->count, delta); + ptr->nr_entries = nr_entries; + memmove(ptr->entries, entries, nr_entries * sizeof(unsigned long)); + list_add_tail_rcu(&ptr->list, &dev->netdev_trace_buffer_list); + } else { + netdev_trace_buffer_exhausted = true; + } + spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags); +} +EXPORT_SYMBOL(save_netdev_trace_buffer); + +#endif diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index 9e9fb25314b9..78d611bb6d1c 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -29,6 +29,7 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_UNREGISTER: + case NETDEV_DEBUG_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_PRE_CHANGEADDR: diff --git a/net/socket.c b/net/socket.c index e8892b218708..fce536d2d8b9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -734,6 +734,10 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) return ret; } +#ifdef CONFIG_NET_DEV_REFCNT_TRACKER +static noinline int __sock_sendmsg(struct socket *sock, struct msghdr *msg); +#endif + static int __sock_sendmsg(struct socket *sock, struct msghdr *msg) { int err = security_socket_sendmsg(sock, msg,