Skip to content

Commit 78d0b16

Browse files
anakryikomhiramat
authored andcommitted
objpool: cache nr_possible_cpus() and avoid caching nr_cpu_ids
Profiling shows that calling nr_possible_cpus() in objpool_pop() takes a noticeable amount of CPU (when profiled on 80-core machine), as we need to recalculate number of set bits in a CPU bit mask. This number can't change, so there is no point in paying the price for recalculating it. As such, cache this value in struct objpool_head and use it in objpool_pop(). On the other hand, cached pool->nr_cpus isn't necessary, as it's not used in hot path and is also a pretty trivial value to retrieve. So drop pool->nr_cpus in favor of using nr_cpu_ids everywhere. This way the size of struct objpool_head remains the same, which is a nice bonus. Same BPF selftests benchmarks were used to evaluate the effect. Using changes in previous patch (inlining of objpool_pop/objpool_push) as baseline, here are the differences: BASELINE ======== kretprobe : 9.937 ± 0.174M/s kretprobe-multi: 10.440 ± 0.108M/s AFTER ===== kretprobe : 10.106 ± 0.120M/s (+1.7%) kretprobe-multi: 10.515 ± 0.180M/s (+0.7%) Link: https://lore.kernel.org/all/20240424215214.3956041-3-andrii@kernel.org/ Cc: Matt (Qiang) Wu <wuqiang.matt@bytedance.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
1 parent a3b00f1 commit 78d0b16

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

include/linux/objpool.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
7373
* struct objpool_head - object pooling metadata
7474
* @obj_size: object size, aligned to sizeof(void *)
7575
* @nr_objs: total objs (to be pre-allocated with objpool)
76-
* @nr_cpus: local copy of nr_cpu_ids
76+
* @nr_possible_cpus: cached value of num_possible_cpus()
7777
* @capacity: max objs can be managed by one objpool_slot
7878
* @gfp: gfp flags for kmalloc & vmalloc
7979
* @ref: refcount of objpool
@@ -85,7 +85,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
8585
struct objpool_head {
8686
int obj_size;
8787
int nr_objs;
88-
int nr_cpus;
88+
int nr_possible_cpus;
8989
int capacity;
9090
gfp_t gfp;
9191
refcount_t ref;
@@ -176,7 +176,7 @@ static inline void *objpool_pop(struct objpool_head *pool)
176176
raw_local_irq_save(flags);
177177

178178
cpu = raw_smp_processor_id();
179-
for (i = 0; i < num_possible_cpus(); i++) {
179+
for (i = 0; i < pool->nr_possible_cpus; i++) {
180180
obj = __objpool_try_get_slot(pool, cpu);
181181
if (obj)
182182
break;

lib/objpool.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
5050
{
5151
int i, cpu_count = 0;
5252

53-
for (i = 0; i < pool->nr_cpus; i++) {
53+
for (i = 0; i < nr_cpu_ids; i++) {
5454

5555
struct objpool_slot *slot;
5656
int nodes, size, rc;
@@ -60,8 +60,8 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
6060
continue;
6161

6262
/* compute how many objects to be allocated with this slot */
63-
nodes = nr_objs / num_possible_cpus();
64-
if (cpu_count < (nr_objs % num_possible_cpus()))
63+
nodes = nr_objs / pool->nr_possible_cpus;
64+
if (cpu_count < (nr_objs % pool->nr_possible_cpus))
6565
nodes++;
6666
cpu_count++;
6767

@@ -103,7 +103,7 @@ static void objpool_fini_percpu_slots(struct objpool_head *pool)
103103
if (!pool->cpu_slots)
104104
return;
105105

106-
for (i = 0; i < pool->nr_cpus; i++)
106+
for (i = 0; i < nr_cpu_ids; i++)
107107
kvfree(pool->cpu_slots[i]);
108108
kfree(pool->cpu_slots);
109109
}
@@ -130,13 +130,13 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
130130

131131
/* initialize objpool pool */
132132
memset(pool, 0, sizeof(struct objpool_head));
133-
pool->nr_cpus = nr_cpu_ids;
133+
pool->nr_possible_cpus = num_possible_cpus();
134134
pool->obj_size = object_size;
135135
pool->capacity = capacity;
136136
pool->gfp = gfp & ~__GFP_ZERO;
137137
pool->context = context;
138138
pool->release = release;
139-
slot_size = pool->nr_cpus * sizeof(struct objpool_slot);
139+
slot_size = nr_cpu_ids * sizeof(struct objpool_slot);
140140
pool->cpu_slots = kzalloc(slot_size, pool->gfp);
141141
if (!pool->cpu_slots)
142142
return -ENOMEM;

0 commit comments

Comments
 (0)