Skip to content

Commit d6ab597

Browse files
authored
Dump stacktraces when tests fail to close context (#13020)
We are starting to experience flaky `runtime-integration-test` that fail to cleanup: ``` [info] org.enso.interpreter.test.instrument.RuntimeServerTest *** ABORTED *** (31 seconds, 896 milliseconds) [info] java.lang.IllegalStateException: The context is currently executing on another thread. Set cancelIfExecuting to true to stop the execution on this thread. [info] at org.graalvm.truffle/com.oracle.truffle.polyglot.PolyglotEngineException.illegalState(PolyglotEngineException.java:133) [info] at org.graalvm.truffle/com.oracle.truffle.polyglot.PolyglotContextImpl.closeAndMaybeWait(PolyglotContextImpl.java:2037) ... [info] at org.enso.runtime/org.enso.interpreter.test.instrument.InstrumentTestContext.close(InstrumentTestContext.scala:158) [info] at org.enso.runtime/org.enso.interpreter.test.instrument.RuntimeServerTest$TestContext.close(RuntimeServerTest.scala:120) [info] at org.enso.runtime/org.enso.interpreter.test.instrument.RuntimeServerTest.afterEach(RuntimeServerTest.scala:136) [info] at org.scalatest.BeforeAndAfterEach.$anonfun$runTest$1(BeforeAndAfterEach.scala:247) [info] ... ``` This change catches the exception and dumps all stacktraces in an attempt to debug the problem in CI (not reproducible otherwise). Since the logic is already repeated in numerous places, refactored it to a single `runtime-utils` project and re-used. # Important Notes Recent examples of failures: - https://github.com/enso-org/enso/actions/runs/14780427449/job/41497962037#step:10:3830 - https://github.com/enso-org/enso/actions/runs/14775739759/job/41483592236?pr=12987#step:10:3574
1 parent b7fb08a commit d6ab597

File tree

10 files changed

+86
-79
lines changed

10 files changed

+86
-79
lines changed

build.sbt

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ lazy val componentModulesPaths =
759759
(`runtime-instrument-runtime-server` / Compile / exportedModuleBin).value,
760760
(`runtime-language-arrow` / Compile / exportedModuleBin).value,
761761
(`runtime-language-epb` / Compile / exportedModuleBin).value,
762+
(`runtime-utils` / Compile / exportedModuleBin).value,
762763
(`runtime-version-manager` / Compile / exportedModuleBin).value,
763764
(`persistance` / Compile / exportedModuleBin).value,
764765
(`cli` / Compile / exportedModuleBin).value,
@@ -1379,6 +1380,15 @@ lazy val `jna-wrapper` = project
13791380
}
13801381
)
13811382

1383+
lazy val `runtime-utils` = project
1384+
.in(file("lib/java/runtime-utils"))
1385+
.enablePlugins(JPMSPlugin)
1386+
.settings(
1387+
frgaalJavaCompilerSetting,
1388+
scalaModuleDependencySetting,
1389+
javaModuleName := "org.enso.runtime.utils"
1390+
)
1391+
13821392
lazy val `directory-watcher-wrapper` = project
13831393
.in(file("lib/java/directory-watcher-wrapper"))
13841394
.enablePlugins(JPMSPlugin)
@@ -1850,6 +1860,7 @@ lazy val `json-rpc-server` = project
18501860
(`akka-wrapper` / Compile / exportedModule).value
18511861
)
18521862
)
1863+
.dependsOn(`runtime-utils` % "test->compile")
18531864

18541865
// An automatic JPMS module
18551866
lazy val testkit = project
@@ -2387,6 +2398,7 @@ lazy val `language-server` = (project in file("engine/language-server"))
23872398
(`runtime-instrument-repl-debugger` / Compile / exportedModule).value,
23882399
(`runtime-instrument-id-execution` / Compile / exportedModule).value,
23892400
(`runtime-language-epb` / Compile / exportedModule).value,
2401+
(`runtime-utils` / Compile / exportedModule).value,
23902402
(`ydoc-polyfill` / Compile / exportedModule).value,
23912403
(`syntax-rust-definition` / Compile / exportedModule).value,
23922404
(`profiling-utils` / Compile / exportedModule).value,
@@ -2680,7 +2692,8 @@ lazy val `runtime-language-epb` =
26802692
"org.graalvm.sdk" % "nativeimage" % graalMavenPackagesVersion
26812693
),
26822694
Compile / internalModuleDependencies := Seq(
2683-
(`ydoc-polyfill` / Compile / exportedModule).value
2695+
(`ydoc-polyfill` / Compile / exportedModule).value,
2696+
(`runtime-utils` / Compile / exportedModule).value
26842697
)
26852698
)
26862699

@@ -2953,6 +2966,7 @@ lazy val `runtime-integration-tests` =
29532966
(`runtime-compiler` / Compile / exportedModule).value,
29542967
(`runtime-compiler-dump` / Compile / exportedModule).value,
29552968
(`runtime-compiler-dump-igv` / Compile / exportedModule).value,
2969+
(`runtime-utils` / Compile / exportedModule).value,
29562970
(`polyglot-api` / Compile / exportedModule).value,
29572971
(`polyglot-api-macros` / Compile / exportedModule).value,
29582972
(`pkg` / Compile / exportedModule).value,
@@ -3004,6 +3018,7 @@ lazy val `runtime-integration-tests` =
30043018
(`runtime-test-instruments` / javaModuleName).value,
30053019
(`ydoc-polyfill` / javaModuleName).value,
30063020
(`runtime-instrument-common` / javaModuleName).value,
3021+
(`runtime-utils` / javaModuleName).value,
30073022
(`text-buffer` / javaModuleName).value,
30083023
(`logging-service-logback` / Test / javaModuleName).value,
30093024
(`logging-service-telemetry` / Compile / javaModuleName).value,
@@ -3064,6 +3079,7 @@ lazy val `runtime-integration-tests` =
30643079
)
30653080
.dependsOn(`runtime`)
30663081
.dependsOn(`runtime-test-instruments`)
3082+
.dependsOn(`runtime-utils` % "test->compile")
30673083
.dependsOn(`logging-service-logback` % "test->test")
30683084
.dependsOn(`logging-service-telemetry` % "test->compile")
30693085
.dependsOn(`logging-utils` % Test)
@@ -3122,6 +3138,7 @@ lazy val `runtime-benchmarks` =
31223138
},
31233139
Compile / internalModuleDependencies := Seq(
31243140
(`runtime` / Compile / exportedModule).value,
3141+
(`runtime-utils` / Compile / exportedModule).value,
31253142
(`runtime-instrument-common` / Compile / exportedModule).value,
31263143
(`runtime-instrument-runtime-server` / Compile / exportedModule).value,
31273144
(`runtime-instrument-repl-debugger` / Compile / exportedModule).value,
@@ -3566,13 +3583,15 @@ lazy val `runtime-instrument-common` =
35663583
(`runtime-compiler-dump` / Compile / exportedModule).value,
35673584
(`runtime-parser` / Compile / exportedModule).value,
35683585
(`runtime-suggestions` / Compile / exportedModule).value,
3586+
(`runtime-utils` / Compile / exportedModule).value,
35693587
(`text-buffer` / Compile / exportedModule).value,
35703588
(`pkg` / Compile / exportedModule).value,
35713589
(`polyglot-api` / Compile / exportedModule).value,
35723590
(`scala-libs-wrapper` / Compile / exportedModule).value
35733591
)
35743592
)
35753593
.dependsOn(`refactoring-utils`)
3594+
.dependsOn(`runtime-utils`)
35763595
.dependsOn(`runtime` % "compile->compile;runtime->runtime")
35773596

35783597
lazy val `runtime-instrument-id-execution` =
@@ -4768,6 +4787,7 @@ lazy val `process-utils` = project
47684787
scalaModuleDependencySetting,
47694788
compileOrder := CompileOrder.ScalaThenJava
47704789
)
4790+
.dependsOn(`runtime-utils`)
47714791

47724792
lazy val `locking-test-helper` = project
47734793
.in(file("lib/scala/locking-test-helper"))

engine/runtime-instrument-common/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
requires org.enso.runtime.compiler;
1515
requires org.enso.runtime.parser;
1616
requires org.enso.runtime.suggestions;
17+
requires org.enso.runtime.utils;
1718
requires org.enso.text.buffer;
1819
requires org.enso.pkg;
1920
requires org.enso.polyglot.api;

engine/runtime-instrument-common/src/main/scala/org/enso/interpreter/instrument/execution/JobExecutionEngine.scala

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import org.enso.common.Asserts.assertInJvm
44
import org.enso.interpreter.instrument.InterpreterContext
55
import org.enso.interpreter.instrument.job.{BackgroundJob, Job, UniqueJob}
66
import org.enso.text.Sha3_224VersionCalculator
7+
import org.enso.runtime.utils.ThreadUtils
78

89
import org.slf4j.Logger
910
import org.slf4j.LoggerFactory
@@ -113,25 +114,11 @@ final class JobExecutionEngine(
113114
)
114115
} catch {
115116
case _: TimeoutException =>
116-
val sb = new StringBuilder(
117-
"Threaddump when timeout is reached while waiting for the job " + runningJob.id + " running in thread " + runningJob.job
118-
.threadNameExecutingJob() + " to cancel:\n"
117+
val msg = ThreadUtils.dumpAllStacktraces(
118+
"Thread dump when timeout is reached while waiting for the job " + runningJob.id + " running in thread " + runningJob.job
119+
.threadNameExecutingJob() + " to cancel:"
119120
)
120-
Thread.getAllStackTraces.entrySet.forEach { entry =>
121-
sb.append(entry.getKey.getName).append("\n")
122-
entry.getValue.foreach { e =>
123-
sb.append(" ")
124-
.append(e.getClassName)
125-
.append(".")
126-
.append(e.getMethodName)
127-
.append("(")
128-
.append(e.getFileName)
129-
.append(":")
130-
.append(e.getLineNumber)
131-
.append(")\n")
132-
}
133-
}
134-
logger.warn(sb.toString())
121+
logger.warn(msg)
135122
runningJob.future.cancel(runningJob.job.mayInterruptIfRunning)
136123
case _: CancellationException =>
137124
logger.debug(

engine/runtime-integration-tests/src/test/scala/org/enso/interpreter/test/instrument/InstrumentTestContext.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import org.enso.pkg.{Package, PackageManager}
66
import org.enso.common.LanguageInfo
77
import org.enso.polyglot.PolyglotContext
88
import org.enso.polyglot.runtime.Runtime.Api
9+
import org.enso.runtime.utils.ThreadUtils
910
import org.graalvm.polyglot.Context
1011

1112
import java.io.File
@@ -155,7 +156,16 @@ abstract class InstrumentTestContext(packageName: String) {
155156

156157
def close(): Unit = {
157158
if (context() != null) {
158-
context().close()
159+
try {
160+
context().close()
161+
} catch {
162+
case e: IllegalStateException =>
163+
val msg = ThreadUtils.dumpAllStacktraces(
164+
"Thread dump on failure to close test Instrument Context:"
165+
)
166+
println(msg)
167+
throw e
168+
}
159169
}
160170
Await.ready(runtimeServerEmulator.terminate(), 5.seconds)
161171
lockManager.reset()

engine/runtime-language-epb/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
requires java.logging;
33
requires org.graalvm.polyglot;
44
requires org.graalvm.truffle;
5+
requires org.enso.runtime.utils;
56
requires org.enso.ydoc.polyfill;
67

78
provides com.oracle.truffle.api.provider.TruffleLanguageProvider with

engine/runtime-language-epb/src/main/java/org/enso/interpreter/epb/EpbContext.java

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import java.util.concurrent.ScheduledExecutorService;
1313
import java.util.function.Function;
1414
import java.util.logging.Level;
15+
import org.enso.runtime.utils.ThreadUtils;
1516
import org.enso.ydoc.polyfill.web.WebEnvironment;
1617
import org.graalvm.polyglot.Value;
1718

@@ -119,30 +120,10 @@ final void handleMultiAccess(String msg) {
119120
}
120121

121122
private boolean dumpStack(int ms) {
122-
var sb = new StringBuilder("Polyglot access failed. Waiting " + ms + " ms. Threaddump:\n");
123-
var traces = Thread.getAllStackTraces();
124-
for (var entry : traces.entrySet()) {
125-
var thread = new StringBuilder();
126-
thread.append(entry.getKey().getName()).append("\n");
127-
var keep = false;
128-
for (var e : entry.getValue()) {
129-
keep |= e.getClassName().contains("com.oracle.truffle");
130-
thread
131-
.append(" ")
132-
.append(e.getClassName())
133-
.append(".")
134-
.append(e.getMethodName())
135-
.append("(")
136-
.append(e.getFileName())
137-
.append(":")
138-
.append(e.getLineNumber())
139-
.append(")\n");
140-
}
141-
if (keep) {
142-
sb.append(thread.toString());
143-
}
144-
}
145-
log(Level.WARNING, sb.toString());
123+
var msg =
124+
ThreadUtils.dumpAllStacktraces(
125+
"Polyglot access failed. Waiting " + ms + " ms. Thread dump:");
126+
log(Level.WARNING, msg);
146127
return true;
147128
}
148129
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module org.enso.runtime.utils {
2+
exports org.enso.runtime.utils;
3+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package org.enso.runtime.utils;
2+
3+
import java.util.Arrays;
4+
5+
public class ThreadUtils {
6+
7+
private ThreadUtils() {}
8+
9+
public static String dumpAllStacktraces(String prefix) {
10+
var sb = new StringBuilder(prefix);
11+
sb.append(System.lineSeparator());
12+
Thread.getAllStackTraces().entrySet().forEach(entry -> {
13+
sb.append(entry.getKey().getName()).append(System.lineSeparator());
14+
Arrays.stream(entry.getValue()).forEach(e ->
15+
sb.append(" ")
16+
.append(e.getClassName())
17+
.append(".")
18+
.append(e.getMethodName())
19+
.append("(")
20+
.append(e.getFileName())
21+
.append(":")
22+
.append(e.getLineNumber())
23+
.append(")")
24+
.append(System.lineSeparator())
25+
);
26+
});
27+
return sb.toString();
28+
}
29+
30+
}

lib/scala/json-rpc-server/src/test/scala/org/enso/jsonrpc/test/JsonRpcServerTestKit.scala

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.enso.jsonrpc.{
1414
JsonRpcServer,
1515
ProtocolFactory
1616
}
17+
import org.enso.runtime.utils.ThreadUtils
1718
import org.scalactic.source.Position
1819
import org.scalatest.matchers.{MatchResult, Matcher}
1920
import org.scalatest.matchers.should.Matchers
@@ -131,24 +132,10 @@ abstract class JsonRpcServerTestKit
131132
if e.getMessage.contains(
132133
"timeout"
133134
) && printStackTracesOnFailure =>
134-
val sb = new StringBuilder(
135-
"Thread dump when timeout is reached while waiting for the message:\n"
136-
)
137-
Thread.getAllStackTraces.entrySet.forEach { entry =>
138-
sb.append(entry.getKey.getName).append("\n")
139-
entry.getValue.foreach { e =>
140-
sb.append(" ")
141-
.append(e.getClassName)
142-
.append(".")
143-
.append(e.getMethodName)
144-
.append("(")
145-
.append(e.getFileName)
146-
.append(":")
147-
.append(e.getLineNumber)
148-
.append(")\n")
149-
}
150-
}
151-
println(sb.toString())
135+
val msg = ThreadUtils.dumpAllStacktraces(
136+
"Thread dump when timeout is reached while waiting for the message:"
137+
);
138+
println(msg)
152139
throw e
153140
}
154141
if (debugMessages) println(message)

lib/scala/process-utils/src/main/scala/org/enso/process/WrappedProcess.scala

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package org.enso.process
22

33
import java.io._
44
import java.util.concurrent.{Semaphore, TimeUnit}
5+
import org.enso.runtime.utils.ThreadUtils
56
import scala.collection.Factory
67
import scala.concurrent.TimeoutException
78
import scala.jdk.CollectionConverters._
@@ -205,24 +206,10 @@ class WrappedProcess(command: Seq[String], process: Process) {
205206
case e @ (_: InterruptedException | _: TimeoutException) =>
206207
if (process.isAlive) {
207208
println(s"Killing the timed-out process: ${command.mkString(" ")}")
208-
val sb = new StringBuilder(
209-
"Thread dump before forcefully killing the process:\n"
209+
val msg = ThreadUtils.dumpAllStacktraces(
210+
"Thread dump before forcefully killing the process:"
210211
)
211-
Thread.getAllStackTraces.entrySet.forEach { entry =>
212-
sb.append(entry.getKey.getName).append("\n")
213-
entry.getValue.foreach { e =>
214-
sb.append(" ")
215-
.append(e.getClassName)
216-
.append(".")
217-
.append(e.getMethodName)
218-
.append("(")
219-
.append(e.getFileName)
220-
.append(":")
221-
.append(e.getLineNumber)
222-
.append(")\n")
223-
}
224-
}
225-
println(sb.toString())
212+
println(msg)
226213
process.destroyForcibly()
227214
}
228215
for (processHandle <- descendants) {

0 commit comments

Comments
 (0)