Skip to content

Commit a3b00f1

Browse files
anakryikomhiramat
authored andcommitted
objpool: enable inlining objpool_push() and objpool_pop() operations
objpool_push() and objpool_pop() are very performance-critical functions and can be called very frequently in kretprobe triggering path. As such, it makes sense to allow compiler to inline them completely to eliminate function calls overhead. Luckily, their logic is quite well isolated and doesn't have any sprawling dependencies. This patch moves both objpool_push() and objpool_pop() into include/linux/objpool.h and marks them as static inline functions, enabling inlining. To avoid anyone using internal helpers (objpool_try_get_slot, objpool_try_add_slot), rename them to use leading underscores. We used kretprobe microbenchmark from BPF selftests (bench trig-kprobe and trig-kprobe-multi benchmarks) running no-op BPF kretprobe/kretprobe.multi programs in a tight loop to evaluate the effect. BPF own overhead in this case is minimal and it mostly stresses the rest of in-kernel kretprobe infrastructure overhead. Results are in millions of calls per second. This is not super scientific, but shows the trend nevertheless. BEFORE ====== kretprobe : 9.794 ± 0.086M/s kretprobe-multi: 10.219 ± 0.032M/s AFTER ===== kretprobe : 9.937 ± 0.174M/s (+1.5%) kretprobe-multi: 10.440 ± 0.108M/s (+2.2%) Link: https://lore.kernel.org/all/20240424215214.3956041-2-andrii@kernel.org/ Cc: Matt (Qiang) Wu <wuqiang.matt@bytedance.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
1 parent e03c05a commit a3b00f1

File tree

2 files changed

+99
-102
lines changed

2 files changed

+99
-102
lines changed

include/linux/objpool.h

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55

66
#include <linux/types.h>
77
#include <linux/refcount.h>
8+
#include <linux/atomic.h>
9+
#include <linux/cpumask.h>
10+
#include <linux/irqflags.h>
11+
#include <linux/smp.h>
812

913
/*
1014
* objpool: ring-array based lockless MPMC queue
@@ -118,13 +122,94 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
118122
gfp_t gfp, void *context, objpool_init_obj_cb objinit,
119123
objpool_fini_cb release);
120124

125+
/* try to retrieve object from slot */
126+
static inline void *__objpool_try_get_slot(struct objpool_head *pool, int cpu)
127+
{
128+
struct objpool_slot *slot = pool->cpu_slots[cpu];
129+
/* load head snapshot, other cpus may change it */
130+
uint32_t head = smp_load_acquire(&slot->head);
131+
132+
while (head != READ_ONCE(slot->last)) {
133+
void *obj;
134+
135+
/*
136+
* data visibility of 'last' and 'head' could be out of
137+
* order since memory updating of 'last' and 'head' are
138+
* performed in push() and pop() independently
139+
*
140+
* before any retrieving attempts, pop() must guarantee
141+
* 'last' is behind 'head', that is to say, there must
142+
* be available objects in slot, which could be ensured
143+
* by condition 'last != head && last - head <= nr_objs'
144+
* that is equivalent to 'last - head - 1 < nr_objs' as
145+
* 'last' and 'head' are both unsigned int32
146+
*/
147+
if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
148+
head = READ_ONCE(slot->head);
149+
continue;
150+
}
151+
152+
/* obj must be retrieved before moving forward head */
153+
obj = READ_ONCE(slot->entries[head & slot->mask]);
154+
155+
/* move head forward to mark it's consumption */
156+
if (try_cmpxchg_release(&slot->head, &head, head + 1))
157+
return obj;
158+
}
159+
160+
return NULL;
161+
}
162+
121163
/**
122164
* objpool_pop() - allocate an object from objpool
123165
* @pool: object pool
124166
*
125167
* return value: object ptr or NULL if failed
126168
*/
127-
void *objpool_pop(struct objpool_head *pool);
169+
static inline void *objpool_pop(struct objpool_head *pool)
170+
{
171+
void *obj = NULL;
172+
unsigned long flags;
173+
int i, cpu;
174+
175+
/* disable local irq to avoid preemption & interruption */
176+
raw_local_irq_save(flags);
177+
178+
cpu = raw_smp_processor_id();
179+
for (i = 0; i < num_possible_cpus(); i++) {
180+
obj = __objpool_try_get_slot(pool, cpu);
181+
if (obj)
182+
break;
183+
cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
184+
}
185+
raw_local_irq_restore(flags);
186+
187+
return obj;
188+
}
189+
190+
/* adding object to slot, abort if the slot was already full */
191+
static inline int
192+
__objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu)
193+
{
194+
struct objpool_slot *slot = pool->cpu_slots[cpu];
195+
uint32_t head, tail;
196+
197+
/* loading tail and head as a local snapshot, tail first */
198+
tail = READ_ONCE(slot->tail);
199+
200+
do {
201+
head = READ_ONCE(slot->head);
202+
/* fault caught: something must be wrong */
203+
WARN_ON_ONCE(tail - head > pool->nr_objs);
204+
} while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1));
205+
206+
/* now the tail position is reserved for the given obj */
207+
WRITE_ONCE(slot->entries[tail & slot->mask], obj);
208+
/* update sequence to make this obj available for pop() */
209+
smp_store_release(&slot->last, tail + 1);
210+
211+
return 0;
212+
}
128213

129214
/**
130215
* objpool_push() - reclaim the object and return back to objpool
@@ -134,7 +219,19 @@ void *objpool_pop(struct objpool_head *pool);
134219
* return: 0 or error code (it fails only when user tries to push
135220
* the same object multiple times or wrong "objects" into objpool)
136221
*/
137-
int objpool_push(void *obj, struct objpool_head *pool);
222+
static inline int objpool_push(void *obj, struct objpool_head *pool)
223+
{
224+
unsigned long flags;
225+
int rc;
226+
227+
/* disable local irq to avoid preemption & interruption */
228+
raw_local_irq_save(flags);
229+
rc = __objpool_try_add_slot(obj, pool, raw_smp_processor_id());
230+
raw_local_irq_restore(flags);
231+
232+
return rc;
233+
}
234+
138235

139236
/**
140237
* objpool_drop() - discard the object and deref objpool

lib/objpool.c

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -152,106 +152,6 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
152152
}
153153
EXPORT_SYMBOL_GPL(objpool_init);
154154

155-
/* adding object to slot, abort if the slot was already full */
156-
static inline int
157-
objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu)
158-
{
159-
struct objpool_slot *slot = pool->cpu_slots[cpu];
160-
uint32_t head, tail;
161-
162-
/* loading tail and head as a local snapshot, tail first */
163-
tail = READ_ONCE(slot->tail);
164-
165-
do {
166-
head = READ_ONCE(slot->head);
167-
/* fault caught: something must be wrong */
168-
WARN_ON_ONCE(tail - head > pool->nr_objs);
169-
} while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1));
170-
171-
/* now the tail position is reserved for the given obj */
172-
WRITE_ONCE(slot->entries[tail & slot->mask], obj);
173-
/* update sequence to make this obj available for pop() */
174-
smp_store_release(&slot->last, tail + 1);
175-
176-
return 0;
177-
}
178-
179-
/* reclaim an object to object pool */
180-
int objpool_push(void *obj, struct objpool_head *pool)
181-
{
182-
unsigned long flags;
183-
int rc;
184-
185-
/* disable local irq to avoid preemption & interruption */
186-
raw_local_irq_save(flags);
187-
rc = objpool_try_add_slot(obj, pool, raw_smp_processor_id());
188-
raw_local_irq_restore(flags);
189-
190-
return rc;
191-
}
192-
EXPORT_SYMBOL_GPL(objpool_push);
193-
194-
/* try to retrieve object from slot */
195-
static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu)
196-
{
197-
struct objpool_slot *slot = pool->cpu_slots[cpu];
198-
/* load head snapshot, other cpus may change it */
199-
uint32_t head = smp_load_acquire(&slot->head);
200-
201-
while (head != READ_ONCE(slot->last)) {
202-
void *obj;
203-
204-
/*
205-
* data visibility of 'last' and 'head' could be out of
206-
* order since memory updating of 'last' and 'head' are
207-
* performed in push() and pop() independently
208-
*
209-
* before any retrieving attempts, pop() must guarantee
210-
* 'last' is behind 'head', that is to say, there must
211-
* be available objects in slot, which could be ensured
212-
* by condition 'last != head && last - head <= nr_objs'
213-
* that is equivalent to 'last - head - 1 < nr_objs' as
214-
* 'last' and 'head' are both unsigned int32
215-
*/
216-
if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
217-
head = READ_ONCE(slot->head);
218-
continue;
219-
}
220-
221-
/* obj must be retrieved before moving forward head */
222-
obj = READ_ONCE(slot->entries[head & slot->mask]);
223-
224-
/* move head forward to mark it's consumption */
225-
if (try_cmpxchg_release(&slot->head, &head, head + 1))
226-
return obj;
227-
}
228-
229-
return NULL;
230-
}
231-
232-
/* allocate an object from object pool */
233-
void *objpool_pop(struct objpool_head *pool)
234-
{
235-
void *obj = NULL;
236-
unsigned long flags;
237-
int i, cpu;
238-
239-
/* disable local irq to avoid preemption & interruption */
240-
raw_local_irq_save(flags);
241-
242-
cpu = raw_smp_processor_id();
243-
for (i = 0; i < num_possible_cpus(); i++) {
244-
obj = objpool_try_get_slot(pool, cpu);
245-
if (obj)
246-
break;
247-
cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
248-
}
249-
raw_local_irq_restore(flags);
250-
251-
return obj;
252-
}
253-
EXPORT_SYMBOL_GPL(objpool_pop);
254-
255155
/* release whole objpool forcely */
256156
void objpool_free(struct objpool_head *pool)
257157
{

0 commit comments

Comments
 (0)