Skip to content

Commit b874288

Browse files
author
capone212
committed
YT-22435: Using anonymous memory limit in tcmalloc
93d6930ce24f343ccd23c55e7264db80f2e0046b
1 parent 42ef08b commit b874288

File tree

8 files changed

+82
-14
lines changed

8 files changed

+82
-14
lines changed

yt/yt/core/misc/proc.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,20 @@ TCgroupMemoryStat GetCgroupMemoryStat(
523523
#endif
524524
}
525525

526+
std::optional<i64> GetCgroupAnonymousMemoryLimit(
527+
const TString& cgroupPath,
528+
const TString& cgroupMountPoint)
529+
{
530+
#ifdef _linux_
531+
TString path = cgroupMountPoint + "/memory" + cgroupPath + "/memory.anon.limit";
532+
auto content = Trim(TUnbufferedFileInput(path).ReadAll(), "\n");
533+
return FromString<i64>(content);
534+
#else
535+
Y_UNUSED(cgroupPath, cgroupMountPoint);
536+
return {};
537+
#endif
538+
}
539+
526540
THashMap<TString, i64> GetVmstat()
527541
{
528542
#ifdef _linux_

yt/yt/core/misc/proc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ TCgroupMemoryStat GetCgroupMemoryStat(
104104
const TString& cgroupPath,
105105
const TString& cgroupMountPoint = "/sys/fs/cgroup");
106106

107+
108+
std::optional<i64> GetCgroupAnonymousMemoryLimit(
109+
const TString& cgroupPath,
110+
const TString& cgroupMountPoint = "/sys/fs/cgroup");
111+
107112
THashMap<TString, i64> GetVmstat();
108113

109114
ui64 GetProcessCumulativeMajorPageFaults(int pid = -1);

yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ class TTCMallocLimitHandler
204204
TString GetProfilePaths(const TString& timestamp) const
205205
{
206206
return Format(
207-
"%v/oom_profile_paths_%v.pb.gz",
207+
"%v/oom_profile_paths_%v.yson",
208208
Options_.HeapDumpDirectory,
209209
timestamp);
210210
}

yt/yt/library/profiling/resource_tracker/resource_tracker.cpp

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer)
119119
writer->AddGauge("/dirty", stat.Dirty);
120120
writer->AddGauge("/writeback", stat.Writeback);
121121

122-
TotalMemoryLimit.store(stat.HierarchicalMemoryLimit);
122+
TotalMemoryLimit_.store(stat.HierarchicalMemoryLimit);
123+
AnonymousMemoryLimit_.store(SafeGetAnonymousMemoryLimit(
124+
group.Path,
125+
stat.HierarchicalMemoryLimit));
123126

124127
return;
125128
}
@@ -133,9 +136,31 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer)
133136
}
134137
}
135138

136-
i64 TMemoryCgroupTracker::GetTotalMemoryLimit()
139+
i64 TMemoryCgroupTracker::SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit)
137140
{
138-
return TotalMemoryLimit.load();
141+
try {
142+
auto anonymousLimit = GetCgroupAnonymousMemoryLimit(cgroupPath);
143+
auto result = anonymousLimit.value_or(totalMemoryLimit);
144+
result = std::min(result, totalMemoryLimit);
145+
return result != 0 ? result : totalMemoryLimit;
146+
} catch (const std::exception& ex) {
147+
if (!AnonymousLimitErrorLogged_) {
148+
YT_LOG_INFO(ex, "Failed to collect cgroup anonymous memory limit");
149+
AnonymousLimitErrorLogged_ = true;
150+
}
151+
}
152+
153+
return totalMemoryLimit;
154+
}
155+
156+
i64 TMemoryCgroupTracker::GetTotalMemoryLimit() const
157+
{
158+
return TotalMemoryLimit_.load();
159+
}
160+
161+
i64 TMemoryCgroupTracker::GetAnonymousMemoryLimit() const
162+
{
163+
return AnonymousMemoryLimit_.load();
139164
}
140165

141166
TResourceTracker::TTimings TResourceTracker::TTimings::operator-(const TResourceTracker::TTimings& other) const
@@ -424,6 +449,11 @@ i64 TResourceTracker::GetTotalMemoryLimit()
424449
return MemoryCgroupTracker_->GetTotalMemoryLimit();
425450
}
426451

452+
i64 TResourceTracker::GetAnonymousMemoryLimit()
453+
{
454+
return MemoryCgroupTracker_->GetAnonymousMemoryLimit();
455+
}
456+
427457
TResourceTrackerPtr GetResourceTracker()
428458
{
429459
return LeakyRefCountedSingleton<TResourceTracker>();

yt/yt/library/profiling/resource_tracker/resource_tracker.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,17 @@ class TMemoryCgroupTracker
3434
public:
3535
void CollectSensors(ISensorWriter* writer) override;
3636

37-
i64 GetTotalMemoryLimit();
37+
i64 GetTotalMemoryLimit() const;
38+
i64 GetAnonymousMemoryLimit() const;
3839

3940
private:
4041
bool CgroupErrorLogged_ = false;
42+
bool AnonymousLimitErrorLogged_ = false;
43+
44+
std::atomic<i64> TotalMemoryLimit_ = 0;
45+
std::atomic<i64> AnonymousMemoryLimit_ = 0;
4146

42-
std::atomic<i64> TotalMemoryLimit = 0;
47+
i64 SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit);
4348
};
4449

4550
////////////////////////////////////////////////////////////////////////////////
@@ -57,6 +62,7 @@ class TResourceTracker
5762
double GetCpuWait();
5863

5964
i64 GetTotalMemoryLimit();
65+
i64 GetAnonymousMemoryLimit();
6066

6167
void CollectSensors(ISensorWriter* writer) override;
6268

yt/yt/library/program/config.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ void THeapSizeLimitConfig::Register(TRegistrar registrar)
1212
{
1313
registrar.Parameter("container_memory_ratio", &TThis::ContainerMemoryRatio)
1414
.Optional();
15+
registrar.Parameter("container_memory_margin", &TThis::ContainerMemoryMargin)
16+
.Optional();
1517
registrar.Parameter("hard", &TThis::Hard)
1618
.Default(false);
1719
registrar.Parameter("dump_memory_profile_on_violation", &TThis::DumpMemoryProfileOnViolation)

yt/yt/library/program/config.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@ class THeapSizeLimitConfig
4343
// If program heap size exceeds the limit tcmalloc is instructed to release memory to the kernel.
4444
std::optional<double> ContainerMemoryRatio;
4545

46-
//! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio.
46+
//! Similar to #ContainerMemoryRatio, but is set in terms of absolute difference from
47+
//! the container memory limit.
48+
//! For example, if ContainerMemoryLimit=200Gb and ContainerMemoryMargin=1Gb
49+
// then tcmalloc limit will be 199Gb.
50+
std::optional<double> ContainerMemoryMargin;
51+
52+
//! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio/#ContainerMemoryMargin.
4753
bool Hard;
4854

4955
bool DumpMemoryProfileOnViolation;

yt/yt/library/program/helpers.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class TCMallocLimitsAdjuster
6262
public:
6363
void Adjust(const TTCMallocConfigPtr& config)
6464
{
65-
i64 totalMemory = GetContainerMemoryLimit();
65+
i64 totalMemory = GetAnonymousMemoryLimit();
6666
AdjustPageHeapLimit(totalMemory, config);
6767
AdjustAggressiveReleaseThreshold(totalMemory, config);
6868
SetupMemoryLimitHandler(config);
@@ -120,27 +120,32 @@ class TCMallocLimitsAdjuster
120120
}
121121
}
122122

123-
i64 GetContainerMemoryLimit() const
123+
i64 GetAnonymousMemoryLimit() const
124124
{
125125
auto resourceTracker = NProfiling::GetResourceTracker();
126126
if (!resourceTracker) {
127127
return 0;
128128
}
129129

130-
return resourceTracker->GetTotalMemoryLimit();
130+
return resourceTracker->GetAnonymousMemoryLimit();
131131
}
132132

133133
TAllocatorMemoryLimit ProposeHeapMemoryLimit(i64 totalMemory, const TTCMallocConfigPtr& config) const
134134
{
135-
const auto& heapLimitConfig = config->HeapSizeLimit;
135+
const auto& heapSizeConfig = config->HeapSizeLimit;
136136

137-
if (totalMemory == 0 || !heapLimitConfig->ContainerMemoryRatio) {
137+
if (totalMemory == 0 || !heapSizeConfig->ContainerMemoryRatio && !heapSizeConfig->ContainerMemoryMargin) {
138138
return {};
139139
}
140140

141141
TAllocatorMemoryLimit proposed;
142-
proposed.limit = *heapLimitConfig->ContainerMemoryRatio * totalMemory;
143-
proposed.hard = heapLimitConfig->Hard;
142+
proposed.hard = heapSizeConfig->Hard;
143+
144+
if (heapSizeConfig->ContainerMemoryMargin) {
145+
proposed.limit = totalMemory - *heapSizeConfig->ContainerMemoryMargin;
146+
} else {
147+
proposed.limit = *heapSizeConfig->ContainerMemoryRatio * totalMemory;
148+
}
144149

145150
return proposed;
146151
}

0 commit comments

Comments
 (0)