@@ -111,7 +111,7 @@ class TGlobalPagePool {
111
111
112
112
void FreePage (void * addr) {
113
113
auto res = T::Munmap (addr, PageSize);
114
- Y_DEBUG_ABORT_UNLESS (0 == res, " Munmap failed: %s" , LastSystemErrorText ());
114
+ Y_DEBUG_ABORT_UNLESS (0 == res, " Madvise failed: %s" , LastSystemErrorText ());
115
115
}
116
116
117
117
private:
@@ -251,7 +251,46 @@ inline int TSystemMmap::Munmap(void* addr, size_t size)
251
251
{
252
252
Y_DEBUG_ABORT_UNLESS (AlignUp (addr, SYS_PAGE_SIZE) == addr, " Got unaligned address" );
253
253
Y_DEBUG_ABORT_UNLESS (AlignUp (size, SYS_PAGE_SIZE) == size, " Got unaligned size" );
254
- return ::munmap (addr, size);
254
+
255
+ if (size > MaxMidSize) {
256
+ return ::munmap (addr, size);
257
+ }
258
+
259
+ // Unlock memory in case somewhere was called `mlockall(MCL_FUTURE)`.
260
+ if (::munlock (addr, size) == -1 ) {
261
+ switch (LastSystemError ()) {
262
+ case EAGAIN: [[fallthrough]];
263
+ // The memory region was probably not locked - skip,
264
+ // also since we can't distinguish from other kernel problems that may cause EAGAIN (not enough memory for structures?)
265
+ // we rely on the failure of the following `madvise()` call.
266
+
267
+ case EPERM: [[fallthrough]];
268
+ // The most common case we get this error if we have no privileges, but also ignored error when called `mlockall()`
269
+ // somewhere earlier. So ignore this.
270
+
271
+ case EINVAL:
272
+ // Something wrong with `addr` and `size` - we'll see the same error from the following `madvise()` call.
273
+ break ;
274
+
275
+ case ENOMEM:
276
+ // Locking or unlocking a region would result in the total number of mappings with distinct attributes
277
+ // (e.g., locked versus unlocked) exceeding the allowed maximum.
278
+ // NOTE: `madvise(MADV_DONTNEED)` can't return ENOMEM.
279
+ return -1 ;
280
+ }
281
+ }
282
+
283
+ /* *
284
+ There is at least a couple of drawbacks of using madvise instead of munmap:
285
+ - more potential for use-after-free and memory corruption since we still may access unneeded regions by mistake,
286
+ - actual RSS memory may be freed later after kernel gets some memory-pressure, and it may confuse system monitoring tools.
287
+
288
+ But also there is a huge advantage: the number of memory maps used by process doesn't increase because of the "holes".
289
+
290
+ The main source of the growth of number of memory regions is a clean-up of freed pages from page pools.
291
+ Now we can safely invoke `TAlignedPagePool::DoCleanupGlobalFreeList()` whenever we want it.
292
+ */
293
+ return ::madvise (addr, size, MADV_DONTNEED);
255
294
}
256
295
#endif
257
296
0 commit comments