diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d79acf5c5100..b7be2cc1efc3 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -50,8 +50,8 @@ * * The kmemleak_object structures have a use_count incremented or decremented * using the get_object()/put_object() functions. When the use_count becomes - * 0, this count can no longer be incremented and put_object() schedules the - * kmemleak_object freeing via an RCU callback. All calls to the get_object() + * 0, this count can no longer be incremented and put_object() adds the + * kmemleak_object to a deferred free list. All calls to the get_object() * function must be protected by rcu_read_lock() to avoid accessing a freed * structure. */ @@ -93,6 +93,7 @@ #include #include #include +#include #include #include @@ -138,7 +139,7 @@ struct kmemleak_object { struct list_head object_list; struct list_head gray_list; struct rb_node rb_node; - struct rcu_head rcu; /* object_list lockless traversal */ + struct llist_node free_node; /* deferred freeing */ /* object usage count; object freed when use_count == 0 */ atomic_t use_count; unsigned int del_state; /* deletion state */ @@ -209,6 +210,13 @@ static DEFINE_RAW_SPINLOCK(kmemleak_lock); static struct kmem_cache *object_cache; static struct kmem_cache *scan_area_cache; +/* objects pending RCU-deferred freeing */ +static LLIST_HEAD(objects_to_free); +static atomic_long_t objects_to_free_count; +static void flush_deferred_frees_work(struct work_struct *work); +static DECLARE_WORK(deferred_free_work, flush_deferred_frees_work); +#define DEFERRED_FREE_BATCH 256 + /* set if tracing memory operations is enabled */ static int kmemleak_enabled __read_mostly = 1; /* same as above but only for the kmemleak_free() callback */ @@ -522,14 +530,12 @@ static void mem_pool_free(struct kmemleak_object *object) } /* - * RCU callback to free a kmemleak_object. + * Free a kmemleak_object and its associated scan areas. */ -static void free_object_rcu(struct rcu_head *rcu) +static void free_object(struct kmemleak_object *object) { struct hlist_node *tmp; struct kmemleak_scan_area *area; - struct kmemleak_object *object = - container_of(rcu, struct kmemleak_object, rcu); /* * Once use_count is 0 (guaranteed by put_object), there is no other @@ -543,11 +549,19 @@ static void free_object_rcu(struct rcu_head *rcu) } /* - * Decrement the object use_count. Once the count is 0, free the object using - * an RCU callback. Since put_object() may be called via the kmemleak_free() -> - * delete_object() path, the delayed RCU freeing ensures that there is no - * recursive call to the kernel allocator. Lock-less RCU object_list traversal - * is also possible. + * Decrement the object use_count. Once the count is 0, add the object to the + * deferred free list. Since put_object() may be called via the + * kmemleak_free() -> delete_object() path, the deferred freeing ensures that + * there is no recursive call to the kernel allocator. Lock-less RCU + * object_list traversal is also possible. The actual freeing happens after + * an RCU grace period in flush_deferred_frees(). + * + * Unlike the previous call_rcu()-based approach, this avoids embedding + * rcu_head in kmemleak_object. Objects from SLAB_NOLEAKTRACE caches (like + * kmemleak's own object_cache) are not tracked by kmemleak. When such + * objects were linked in the call_rcu callback chain via rcu_head->next, + * kmemleak could not scan through them, breaking the chain and causing + * false positive leak reports for objects queued after them. */ static void put_object(struct kmemleak_object *object) { @@ -558,14 +572,46 @@ static void put_object(struct kmemleak_object *object) WARN_ON(object->flags & OBJECT_ALLOCATED); /* - * It may be too early for the RCU callbacks, however, there is no + * It may be too early for deferred freeing, however, there is no * concurrent object_list traversal when !object_cache and all objects * came from the memory pool. Free the object directly. */ - if (object_cache) - call_rcu(&object->rcu, free_object_rcu); - else - free_object_rcu(&object->rcu); + if (object_cache) { + llist_add(&object->free_node, &objects_to_free); + if (atomic_long_inc_return(&objects_to_free_count) >= + DEFERRED_FREE_BATCH) + schedule_work(&deferred_free_work); + } else { + free_object(object); + } +} + +/* + * Flush all deferred object frees after an RCU grace period. This must be + * called from a context that can block. + */ +static void flush_deferred_frees(void) +{ + struct llist_node *list; + struct kmemleak_object *object, *tmp; + long count = 0; + + list = llist_del_all(&objects_to_free); + if (!list) + return; + + synchronize_rcu(); + + llist_for_each_entry_safe(object, tmp, list, free_node) { + free_object(object); + count++; + } + atomic_long_sub(count, &objects_to_free_count); +} + +static void flush_deferred_frees_work(struct work_struct *work) +{ + flush_deferred_frees(); } /* @@ -809,7 +855,7 @@ static void create_object_percpu(unsigned long ptr, size_t size, } /* - * Mark the object as not allocated and schedule RCU freeing via put_object(). + * Mark the object as not allocated and schedule deferred freeing via put_object(). */ static void __delete_object(struct kmemleak_object *object) { @@ -2209,6 +2255,7 @@ static void __kmemleak_do_cleanup(void) if (!(++cnt & 0x3f)) cond_resched(); } + flush_deferred_frees(); } /* diff --git a/mm/slub.c b/mm/slub.c index 20cb4f3b636d..6bdf409d427e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -7537,6 +7537,7 @@ static void early_kmem_cache_node_alloc(int node) n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); slab->freelist = get_freepointer(kmem_cache_node, n); slab->inuse = 1; + kmemleak_alloc(n, sizeof(*n), 1, GFP_NOWAIT); kmem_cache_node->node[node] = n; init_kmem_cache_node(n, NULL); inc_slabs_node(kmem_cache_node, node, slab->objects);