Skip to content

Commit f0ee2fb

Browse files
authored
Upgrade async-profiler to v2.8.1 & use mcache (#363)
Upgrade async-profiler to latest release v2.8.1, and use the new mcache option to retain jmethodID -> method name across profiling sessions (because in some JVM versions, this translation is expensive).
1 parent c41ddb4 commit f0ee2fb

File tree

7 files changed

+56
-16
lines changed

7 files changed

+56
-16
lines changed

gprofiler/gprofiler_types.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from collections import Counter
77
from dataclasses import dataclass
8-
from typing import Any, Dict, MutableMapping, Optional, Union
8+
from typing import Any, Callable, Dict, MutableMapping, Optional, Union
99

1010
import configargparse
1111

@@ -44,3 +44,15 @@ def nonnegative_integer(value_str: str) -> int:
4444
if value < 0:
4545
raise configargparse.ArgumentTypeError("invalid non-negative integer value: {!r}".format(value))
4646
return value
47+
48+
49+
def integer_range(min_range: int, max_range: int) -> Callable[[str], int]:
50+
def integer_range_check(value_str: str) -> int:
51+
value = int(value_str)
52+
if value < min_range or value >= max_range:
53+
raise configargparse.ArgumentTypeError(
54+
f"invalid integer value {value!r} (out of range {min_range!r}-{max_range!r})"
55+
)
56+
return value
57+
58+
return integer_range_check

gprofiler/profilers/java.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,13 @@
3838

3939
from gprofiler import merge
4040
from gprofiler.exceptions import CalledProcessError, CalledProcessTimeoutError, NoRwExecDirectoryFoundError
41-
from gprofiler.gprofiler_types import ProcessToProfileData, ProfileData, StackToSampleCount, positive_integer
41+
from gprofiler.gprofiler_types import (
42+
ProcessToProfileData,
43+
ProfileData,
44+
StackToSampleCount,
45+
integer_range,
46+
positive_integer,
47+
)
4248
from gprofiler.kernel_messages import get_kernel_messages_provider
4349
from gprofiler.log import get_logger_adapter
4450
from gprofiler.metadata import application_identifiers
@@ -261,6 +267,8 @@ class AsyncProfiledProcess:
261267
_FDTRANSFER_TIMEOUT = 10
262268
_JATTACH_TIMEOUT = 30 # higher than jattach's timeout
263269

270+
_DEFAULT_MCACHE = 30 # arbitrarily chosen, not too high & not too low.
271+
264272
def __init__(
265273
self,
266274
process: Process,
@@ -271,6 +279,7 @@ def __init__(
271279
ap_safemode: int,
272280
ap_args: str,
273281
jattach_timeout: int = _JATTACH_TIMEOUT,
282+
mcache: int = 0,
274283
):
275284
self.process = process
276285
self._stop_event = stop_event
@@ -317,6 +326,7 @@ def __init__(
317326
self._ap_safemode = ap_safemode
318327
self._ap_args = ap_args
319328
self._jattach_timeout = jattach_timeout
329+
self._mcache = mcache
320330

321331
def _find_rw_exec_dir(self, available_dirs: Sequence[str]) -> str:
322332
"""
@@ -439,7 +449,7 @@ def _get_start_cmd(self, interval: int, ap_timeout: int) -> List[str]:
439449

440450
def _get_stop_cmd(self, with_output: bool) -> List[str]:
441451
return self._get_base_cmd() + [
442-
f"stop,log={self._log_path_process}"
452+
f"stop,log={self._log_path_process},mcache={self._mcache}"
443453
+ (self._get_ap_output_args() if with_output else "")
444454
+ self._get_extra_ap_args()
445455
]
@@ -614,9 +624,8 @@ def parse_jvm_version(version_string: str) -> JvmVersion:
614624
ProfilerArgument(
615625
"--java-async-profiler-safemode",
616626
dest="java_async_profiler_safemode",
617-
type=int,
618627
default=JAVA_ASYNC_PROFILER_DEFAULT_SAFEMODE,
619-
choices=range(0, 128),
628+
type=integer_range(0, 128),
620629
metavar="[0-127]",
621630
help="Controls the 'safemode' parameter passed to async-profiler. This is parameter denotes multiple"
622631
" bits that describe different stack recovery techniques which async-profiler uses (see StackRecovery"
@@ -645,6 +654,15 @@ def parse_jvm_version(version_string: str) -> JvmVersion:
645654
default=AsyncProfiledProcess._JATTACH_TIMEOUT,
646655
help="Timeout for jattach operations (start/stop AP, etc)",
647656
),
657+
ProfilerArgument(
658+
"--java-async-profiler-mcache",
659+
dest="java_async_profiler_mcache",
660+
# this is "unsigned char" in AP's code
661+
type=integer_range(0, 256),
662+
metavar="[0-255]",
663+
default=AsyncProfiledProcess._DEFAULT_MCACHE,
664+
help="async-profiler mcache option (defaults to %(default)s)",
665+
),
648666
],
649667
)
650668
class JavaProfiler(ProcessProfilerBase):
@@ -679,6 +697,7 @@ def __init__(
679697
java_async_profiler_args: str,
680698
java_safemode: str,
681699
java_jattach_timeout: int,
700+
java_async_profiler_mcache: int,
682701
java_mode: str,
683702
):
684703
assert java_mode == "ap", "Java profiler should not be initialized, wrong java_mode value given"
@@ -694,6 +713,7 @@ def __init__(
694713
self._ap_safemode = java_async_profiler_safemode
695714
self._ap_args = java_async_profiler_args
696715
self._jattach_timeout = java_jattach_timeout
716+
self._ap_mcache = java_async_profiler_mcache
697717
self._init_java_safemode(java_safemode)
698718
self._should_profile = True
699719
# if set, profiling is disabled due to this safemode reason.
@@ -894,6 +914,7 @@ def _profile_process_stackcollapse(self, process: Process) -> StackToSampleCount
894914
self._ap_safemode,
895915
self._ap_args,
896916
self._jattach_timeout,
917+
self._ap_mcache,
897918
) as ap_proc:
898919
return self._profile_ap_process(ap_proc, comm)
899920

scripts/async_profiler_build_shared.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#
66
set -euo pipefail
77

8-
VERSION=v2.7g2
9-
GIT_REV="6541b573294c3c9e8efb633bc830440a6f2e13d0"
8+
VERSION=v2.8.1g1
9+
GIT_REV="a54f956ad4f9ee7601fcfce1a968abdca9921a6d"
1010

1111
git clone --depth 1 -b "$VERSION" https://github.com/Granulate/async-profiler.git && cd async-profiler && git reset --hard "$GIT_REV"
1212
source "$1"

scripts/async_profiler_env_glibc.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
set -euo pipefail
77

88
yum install -y centos-release-scl
9-
yum install -y devtoolset-7-toolchain make java-1.8.0-openjdk-devel glibc-static git
9+
yum install -y devtoolset-7-toolchain make java-11-openjdk-devel glibc-static git

scripts/async_profiler_env_musl.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
#
66
set -euo pipefail
77

8-
apk add make gcc g++ openjdk8 musl-dev linux-headers bash git
8+
apk add make gcc g++ openjdk11 musl-dev linux-headers bash git

tests/test_java.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,13 @@ def get_libjvm_path(application_pid: int) -> str:
5050
return get_lib_path(application_pid, "/libjvm.so")
5151

5252

53+
def _read_pid_maps(pid: int) -> str:
54+
return Path(f"/proc/{pid}/maps").read_text()
55+
56+
5357
def is_libjvm_deleted(application_pid: int) -> bool:
5458
# can't use get_libjvm_path() - psutil removes "deleted" if the file actually exists...
55-
return "/libjvm.so (deleted)" in Path(f"/proc/{application_pid}/maps").read_text()
59+
return "/libjvm.so (deleted)" in _read_pid_maps(application_pid)
5660

5761

5862
# adds the "status" command to AsyncProfiledProcess from gProfiler.
@@ -340,13 +344,12 @@ def test_sanity_j9(
340344
assert_collapsed(process_collapsed)
341345

342346

343-
@pytest.mark.xfail(
344-
reason="AP 2.7 doesn't support, see https://github.com/jvm-profiling-tools/async-profiler/issues/572"
345-
" we will fix after that's closed."
346-
)
347347
# test only once. in a container, so that we don't mess up the environment :)
348348
@pytest.mark.parametrize("in_container", [True])
349-
def test_java_deleted_libjvm(tmp_path: Path, application_pid: int, assert_collapsed: AssertInCollapsed) -> None:
349+
@pytest.mark.xfail(reason="In CI, file doesn't appear as deleted for some reason... works on my machine :shrug:")
350+
def test_java_deleted_libjvm(
351+
tmp_path: Path, application_pid: int, application_docker_container: Container, assert_collapsed: AssertInCollapsed
352+
) -> None:
350353
"""
351354
Tests that we can profile processes whose libjvm was deleted, e.g because Java was upgraded.
352355
"""
@@ -357,7 +360,9 @@ def test_java_deleted_libjvm(tmp_path: Path, application_pid: int, assert_collap
357360
shutil.copy(libjvm, libjvm_tmp)
358361
os.unlink(libjvm)
359362
os.rename(libjvm_tmp, libjvm)
360-
assert is_libjvm_deleted(application_pid)
363+
assert is_libjvm_deleted(
364+
application_pid
365+
), f"Not (deleted) after deleting? libjvm={libjvm} maps={_read_pid_maps(application_pid)}"
361366

362367
with make_java_profiler(storage_dir=str(tmp_path), duration=3) as profiler:
363368
process_collapsed = snapshot_one_collapsed(profiler)

tests/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def make_java_profiler(
137137
java_async_profiler_args: str = "",
138138
java_safemode: str = JAVA_SAFEMODE_ALL,
139139
java_jattach_timeout: int = AsyncProfiledProcess._JATTACH_TIMEOUT,
140+
java_async_profiler_mcache: int = AsyncProfiledProcess._DEFAULT_MCACHE,
140141
java_mode: str = "ap",
141142
) -> JavaProfiler:
142143
assert storage_dir is not None
@@ -152,6 +153,7 @@ def make_java_profiler(
152153
java_async_profiler_args=java_async_profiler_args,
153154
java_safemode=java_safemode,
154155
java_jattach_timeout=java_jattach_timeout,
156+
java_async_profiler_mcache=java_async_profiler_mcache,
155157
java_mode=java_mode,
156158
)
157159

0 commit comments

Comments
 (0)