Skip to content

Commit ec1744f

Browse files
authored
🍒 #8191 - TempLocationManager Fixes and Improvements (#8199)
* Remove CompletableFuture from TempLocationManager (cherry picked from commit d0eed45) * Do not run the cleanup if there is nothing to clean up (cherry picked from commit 58d5da8) * Exclude JFR repository from temp files self-cleanup (cherry picked from commit a770911)
1 parent d24ba9e commit ec1744f

File tree

2 files changed

+121
-39
lines changed

2 files changed

+121
-39
lines changed

dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java

Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import datadog.trace.api.config.ProfilingConfig;
44
import datadog.trace.bootstrap.config.provider.ConfigProvider;
5+
import datadog.trace.util.AgentTaskScheduler;
56
import datadog.trace.util.PidHelper;
67
import java.io.IOException;
78
import java.nio.file.FileVisitResult;
@@ -15,10 +16,10 @@
1516
import java.time.Instant;
1617
import java.time.temporal.ChronoUnit;
1718
import java.util.Set;
18-
import java.util.concurrent.CompletableFuture;
19-
import java.util.concurrent.ExecutionException;
19+
import java.util.concurrent.CountDownLatch;
2020
import java.util.concurrent.TimeUnit;
21-
import java.util.concurrent.TimeoutException;
21+
import java.util.regex.Pattern;
22+
import java.util.stream.Stream;
2223
import org.slf4j.Logger;
2324
import org.slf4j.LoggerFactory;
2425

@@ -30,6 +31,9 @@
3031
*/
3132
public final class TempLocationManager {
3233
private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class);
34+
private static final Pattern JFR_DIR_PATTERN =
35+
Pattern.compile("\\d{4}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{6}");
36+
private static final String TEMPDIR_PREFIX = "pid_";
3337

3438
private static final class SingletonHolder {
3539
private static final TempLocationManager INSTANCE = new TempLocationManager();
@@ -62,7 +66,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE
6266
default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {}
6367
}
6468

65-
private class CleanupVisitor implements FileVisitor<Path> {
69+
private final class CleanupVisitor implements FileVisitor<Path> {
6670
private boolean shouldClean;
6771

6872
private final Set<String> pidSet = PidHelper.getJavaPids();
@@ -98,14 +102,19 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
98102
terminated = true;
99103
return FileVisitResult.TERMINATE;
100104
}
105+
if (cleanSelf && JFR_DIR_PATTERN.matcher(dir.getFileName().toString()).matches()) {
106+
// do not delete JFR repository on 'self-cleanup' - it conflicts with the JFR's own cleanup
107+
return FileVisitResult.SKIP_SUBTREE;
108+
}
109+
101110
cleanupTestHook.preVisitDirectory(dir, attrs);
102111

103112
if (dir.equals(baseTempDir)) {
104113
return FileVisitResult.CONTINUE;
105114
}
106115
String fileName = dir.getFileName().toString();
107116
// the JFR repository directories are under <basedir>/pid_<pid>
108-
String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null;
117+
String pid = fileName.startsWith(TEMPDIR_PREFIX) ? fileName.substring(4) : null;
109118
boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid());
110119
shouldClean |= cleanSelf ? isSelfPid : !isSelfPid && !pidSet.contains(pid);
111120
if (shouldClean) {
@@ -167,18 +176,43 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
167176
}
168177
String fileName = dir.getFileName().toString();
169178
// reset the flag only if we are done cleaning the top-level directory
170-
shouldClean = !fileName.startsWith("pid_");
179+
shouldClean = !fileName.startsWith(TEMPDIR_PREFIX);
171180
}
172181
return FileVisitResult.CONTINUE;
173182
}
174183
}
175184

185+
private final class CleanupTask implements Runnable {
186+
private final CountDownLatch latch = new CountDownLatch(1);
187+
private volatile Throwable throwable = null;
188+
189+
@Override
190+
public void run() {
191+
try {
192+
cleanup(false);
193+
} catch (OutOfMemoryError oom) {
194+
throw oom;
195+
} catch (Throwable t) {
196+
throwable = t;
197+
} finally {
198+
latch.countDown();
199+
}
200+
}
201+
202+
boolean await(long timeout, TimeUnit unit) throws Throwable {
203+
boolean ret = latch.await(timeout, unit);
204+
if (throwable != null) {
205+
throw throwable;
206+
}
207+
return ret;
208+
}
209+
}
210+
176211
private final Path baseTempDir;
177212
private final Path tempDir;
178213
private final long cutoffSeconds;
179214

180-
private final CompletableFuture<Void> cleanupTask;
181-
215+
private final CleanupTask cleanupTask = new CleanupTask();
182216
private final CleanupHook cleanupTestHook;
183217

184218
/**
@@ -200,11 +234,7 @@ public static TempLocationManager getInstance() {
200234
static TempLocationManager getInstance(boolean waitForCleanup) {
201235
TempLocationManager instance = SingletonHolder.INSTANCE;
202236
if (waitForCleanup) {
203-
try {
204-
instance.waitForCleanup(5, TimeUnit.SECONDS);
205-
} catch (TimeoutException ignored) {
206-
207-
}
237+
instance.waitForCleanup(5, TimeUnit.SECONDS);
208238
}
209239
return instance;
210240
}
@@ -214,10 +244,11 @@ private TempLocationManager() {
214244
}
215245

216246
TempLocationManager(ConfigProvider configProvider) {
217-
this(configProvider, CleanupHook.EMPTY);
247+
this(configProvider, true, CleanupHook.EMPTY);
218248
}
219249

220-
TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) {
250+
TempLocationManager(
251+
ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) {
221252
cleanupTestHook = testHook;
222253

223254
// In order to avoid racy attempts to clean up files which are currently being processed in a
@@ -255,21 +286,21 @@ private TempLocationManager() {
255286
baseTempDir = configuredTempDir.resolve("ddprof");
256287
baseTempDir.toFile().deleteOnExit();
257288

258-
tempDir = baseTempDir.resolve("pid_" + pid);
259-
cleanupTask = CompletableFuture.runAsync(() -> cleanup(false));
289+
tempDir = baseTempDir.resolve(TEMPDIR_PREFIX + pid);
290+
if (runStartupCleanup) {
291+
// do not execute the background cleanup task when running in tests
292+
AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false));
293+
}
260294

261295
Thread selfCleanup =
262296
new Thread(
263297
() -> {
264-
try {
265-
waitForCleanup(1, TimeUnit.SECONDS);
266-
} catch (TimeoutException e) {
298+
if (!waitForCleanup(1, TimeUnit.SECONDS)) {
267299
log.info(
268300
"Cleanup task timed out. {} temp directory might not have been cleaned up properly",
269301
tempDir);
270-
} finally {
271-
cleanup(true);
272302
}
303+
cleanup(true);
273304
},
274305
"Temp Location Manager Cleanup");
275306
Runtime.getRuntime().addShutdownHook(selfCleanup);
@@ -344,6 +375,19 @@ boolean cleanup(boolean cleanSelf) {
344375
*/
345376
boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
346377
try {
378+
if (!Files.exists(baseTempDir)) {
379+
// not event the main temp location exists; nothing to clean up
380+
return true;
381+
}
382+
try (Stream<Path> paths = Files.walk(baseTempDir)) {
383+
if (paths.noneMatch(
384+
path ->
385+
Files.isDirectory(path)
386+
&& path.getFileName().toString().startsWith(TEMPDIR_PREFIX))) {
387+
// nothing to clean up; bail out early
388+
return true;
389+
}
390+
}
347391
cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit);
348392
CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit);
349393
Files.walkFileTree(baseTempDir, visitor);
@@ -359,21 +403,24 @@ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
359403
}
360404

361405
// accessible for tests
362-
void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException {
406+
boolean waitForCleanup(long timeout, TimeUnit unit) {
363407
try {
364-
cleanupTask.get(timeout, unit);
408+
return cleanupTask.await(timeout, unit);
365409
} catch (InterruptedException e) {
366-
cleanupTask.cancel(true);
410+
log.debug("Temp directory cleanup was interrupted");
367411
Thread.currentThread().interrupt();
368-
} catch (TimeoutException e) {
369-
cleanupTask.cancel(true);
370-
throw e;
371-
} catch (ExecutionException e) {
412+
} catch (Throwable t) {
372413
if (log.isDebugEnabled()) {
373-
log.debug("Failed to cleanup temp directory: {}", tempDir, e);
414+
log.debug("Failed to cleanup temp directory: {}", tempDir, t);
374415
} else {
375416
log.debug("Failed to cleanup temp directory: {}", tempDir);
376417
}
377418
}
419+
return false;
420+
}
421+
422+
// accessible for tests
423+
void createDirStructure() throws IOException {
424+
Files.createDirectories(baseTempDir);
378425
}
379426
}

dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.UUID;
1919
import java.util.concurrent.Phaser;
2020
import java.util.concurrent.TimeUnit;
21+
import java.util.concurrent.atomic.AtomicBoolean;
2122
import java.util.concurrent.locks.LockSupport;
2223
import java.util.stream.Stream;
2324
import org.junit.jupiter.api.Test;
@@ -173,7 +174,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
173174
}
174175
};
175176

176-
TempLocationManager mgr = instance(baseDir, blocker);
177+
TempLocationManager mgr = instance(baseDir, true, blocker);
177178

178179
// wait for the cleanup start
179180
phaser.arriveAndAwaitAdvance();
@@ -187,8 +188,9 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
187188

188189
@ParameterizedTest
189190
@MethodSource("timeoutTestArguments")
190-
void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception {
191-
long timeoutMs = 500;
191+
void testCleanupWithTimeout(boolean selfCleanup, boolean shouldSucceed, String section)
192+
throws Exception {
193+
long timeoutMs = 10;
192194
TempLocationManager.CleanupHook delayer =
193195
new TempLocationManager.CleanupHook() {
194196
@Override
@@ -229,30 +231,63 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
229231
Files.createTempDirectory(
230232
"ddprof-test-",
231233
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
232-
TempLocationManager instance = instance(baseDir, delayer);
233-
boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS);
234-
assertTrue(rslt);
234+
TempLocationManager instance = instance(baseDir, false, delayer);
235+
Path mytempdir = instance.getTempDir();
236+
Path otherTempdir = mytempdir.getParent().resolve("pid_0000");
237+
Files.createDirectories(otherTempdir);
238+
Files.createFile(mytempdir.resolve("dummy"));
239+
Files.createFile(otherTempdir.resolve("dummy"));
240+
boolean rslt =
241+
instance.cleanup(
242+
selfCleanup, (long) (timeoutMs * (shouldSucceed ? 10 : 0.5d)), TimeUnit.MILLISECONDS);
243+
assertEquals(shouldSucceed, rslt);
244+
}
245+
246+
@Test
247+
void testShortCircuit() throws Exception {
248+
Path baseDir =
249+
Files.createTempDirectory(
250+
"ddprof-test-",
251+
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
252+
AtomicBoolean executed = new AtomicBoolean();
253+
TempLocationManager.CleanupHook hook =
254+
new TempLocationManager.CleanupHook() {
255+
@Override
256+
public void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {
257+
executed.set(true);
258+
}
259+
};
260+
TempLocationManager instance = instance(baseDir, false, hook);
261+
262+
instance.createDirStructure();
263+
264+
boolean ret = instance.cleanup(false);
265+
assertTrue(ret);
266+
assertFalse(executed.get());
235267
}
236268

237269
private static Stream<Arguments> timeoutTestArguments() {
238270
List<Arguments> argumentsList = new ArrayList<>();
239271
for (boolean selfCleanup : new boolean[] {true, false}) {
240272
for (String intercepted :
241273
new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) {
242-
argumentsList.add(Arguments.of(selfCleanup, intercepted));
274+
argumentsList.add(Arguments.of(selfCleanup, true, intercepted));
275+
argumentsList.add(Arguments.of(selfCleanup, false, intercepted));
243276
}
244277
}
245278
return argumentsList.stream();
246279
}
247280

248-
private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook)
281+
private TempLocationManager instance(
282+
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook)
249283
throws IOException {
250284
Properties props = new Properties();
251285
props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString());
252286
props.put(
253287
ProfilingConfig.PROFILING_UPLOAD_PERIOD,
254288
"0"); // to force immediate cleanup; must be a string value!
255289

256-
return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook);
290+
return new TempLocationManager(
291+
ConfigProvider.withPropertiesOverride(props), withStartupCleanup, cleanupHook);
257292
}
258293
}

0 commit comments

Comments
 (0)