2525
2626
2727class PythonProfilerBase (ProfilerBase ):
28- MAX_FREQUENCY = 100
28+ MAX_FREQUENCY : Optional [ int ] = None # set by base classes
2929
3030 def __init__ (
3131 self ,
@@ -35,6 +35,7 @@ def __init__(
3535 storage_dir : str ,
3636 ):
3737 super ().__init__ ()
38+ assert isinstance (self .MAX_FREQUENCY , int )
3839 self ._frequency = min (frequency , self .MAX_FREQUENCY )
3940 self ._duration = duration
4041 self ._stop_event = stop_event or Event ()
@@ -119,7 +120,11 @@ def snapshot(self) -> Mapping[int, Mapping[str, int]]:
119120
120121
121122class PythonEbpfProfiler (PythonProfilerBase ):
123+ MAX_FREQUENCY = 1000
122124 PYPERF_RESOURCE = "python/pyperf/PyPerf"
125+ events_buffer_pages = 256 # 1mb and needs to be physically contiguous
126+ # 28mb (each symbol is 224 bytes), but needn't be physicall contiguous so don't care
127+ symbols_map_size = 131072
123128 dump_signal = signal .SIGUSR2
124129 dump_timeout = 5 # seconds
125130 poll_timeout = 10 # seconds
@@ -195,6 +200,10 @@ def start(self):
195200 str (self .output_path ),
196201 "-F" ,
197202 str (self ._frequency ),
203+ "--events-buffer-pages" ,
204+ str (self .events_buffer_pages ),
205+ "--symbols-map-size" ,
206+ str (self .symbols_map_size ),
198207 # Duration is irrelevant here, we want to run continuously.
199208 ]
200209 process = start_process (cmd , via_staticx = True )
@@ -204,6 +213,7 @@ def start(self):
204213 wait_event (self .poll_timeout , self ._stop_event , lambda : os .path .exists (self .output_path ))
205214 except TimeoutError :
206215 process .kill ()
216+ logger .error (f"PyPerf failed to start. stdout { process .stdout .read ()!r} stderr { process .stderr .read ()!r} " )
207217 raise
208218 else :
209219 self .process = process
@@ -225,7 +235,15 @@ def _dump(self) -> Path:
225235 self .process .send_signal (self .dump_signal )
226236
227237 try :
228- return self ._wait_for_output_file (self .dump_timeout )
238+ output = self ._wait_for_output_file (self .dump_timeout )
239+ # PyPerf outputs sampling & error counters every interval (after writing the output file), print them.
240+ # also, makes sure its output pipe doesn't fill up.
241+ # using read1() which performs just a single read() call and doesn't read until EOF
242+ # (unlike Popen.communicate())
243+ assert self .process is not None
244+ # Python 3.6 doesn't have read1() without size argument :/
245+ logger .debug (f"PyPerf output: { self .process .stderr .read1 (4096 )} " )
246+ return output
229247 except TimeoutError :
230248 # error flow :(
231249 try :
0 commit comments