Skip to content

Commit ff68e99

Browse files
committed
Fix serialized batch stat calculation (#19106)
1 parent 774754f commit ff68e99

File tree

2 files changed

+28
-17
lines changed

2 files changed

+28
-17
lines changed

ydb/library/formats/arrow/splitter/stats.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ std::vector<i64> TSimpleSerializationStat::SplitRecords(
4545
}
4646

4747
std::vector<i64> TBatchSerializationStat::SplitRecordsForBlobSize(const i64 recordsCount, const ui64 blobSize) const {
48-
if (!SerializedBytesPerRecord) {
48+
if (!SerializedBytes) {
4949
return { recordsCount };
5050
}
51-
const ui32 recordsCountPerBlob = blobSize / SerializedBytesPerRecord;
51+
const ui32 recordsCountPerBlob = blobSize / GetSerializedBytesPerRecord();
5252
return TSimilarPacker::SplitWithExpected(recordsCount, recordsCountPerBlob);
5353
}
5454

ydb/library/formats/arrow/splitter/stats.h

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,49 +69,60 @@ class TSimpleSerializationStat {
6969

7070
class TBatchSerializationStat {
7171
protected:
72-
double SerializedBytesPerRecord = 0;
73-
double RawBytesPerRecord = 0;
72+
ui64 RecordCount = 0;
73+
double SerializedBytes = 0;
74+
double RawBytes = 0;
75+
protected:
76+
double GetSerializedBytesPerRecord() const {
77+
return SerializedBytes / RecordCount;
78+
}
79+
double GetRawBytesPerRecord() const {
80+
return RawBytes / RecordCount;
81+
}
7482
public:
7583
TBatchSerializationStat() = default;
7684
TBatchSerializationStat(const ui64 bytes, const ui64 recordsCount, const ui64 rawBytes) {
7785
Y_ABORT_UNLESS(recordsCount);
78-
SerializedBytesPerRecord = 1.0 * bytes / recordsCount;
79-
RawBytesPerRecord = 1.0 * rawBytes / recordsCount;
86+
RecordCount = recordsCount;
87+
SerializedBytes = bytes;
88+
RawBytes = rawBytes;
8089
}
8190

8291
TString DebugString() const {
83-
return TStringBuilder() << "{sbpr=" << SerializedBytesPerRecord << ";rbpr=" << RawBytesPerRecord << "}";
92+
return TStringBuilder() << "{sbpr=" << GetSerializedBytesPerRecord() << ";rbpr=" << GetRawBytesPerRecord() << "}";
8493
}
8594

8695
TBatchSerializationStat(const TSimpleSerializationStat& simple) {
87-
SerializedBytesPerRecord = simple.GetSerializedBytesPerRecord();
88-
RawBytesPerRecord = simple.GetRawBytesPerRecord();
96+
RecordCount = simple.GetRecordsCount();
97+
SerializedBytes = simple.GetSerializedBytes();
98+
RawBytes = simple.GetRawBytes();
8999
}
90100

91101
void Merge(const TSimpleSerializationStat& item) {
92-
SerializedBytesPerRecord += item.GetSerializedBytesPerRecord();
93-
RawBytesPerRecord += item.GetRawBytesPerRecord();
102+
RecordCount += item.GetRecordsCount();
103+
SerializedBytes += item.GetSerializedBytes();
104+
RawBytes += item.GetRawBytes();
94105
}
95106

96107
std::vector<i64> SplitRecordsForBlobSize(const i64 recordsCount, const ui64 blobSize) const;
97108

98-
std::optional<ui64> PredictOptimalPackRecordsCount(const ui64 recordsCount, const ui64 blobSize) const {
99-
if (!SerializedBytesPerRecord) {
109+
std::optional<ui64> PredictOptimalPackRecordsCount(const ui64 recordsCount, const ui64 blobSize) const {
110+
if (!SerializedBytes) {
100111
return {};
101112
}
102-
const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord;
113+
const ui64 fullSize = recordsCount * GetSerializedBytesPerRecord();
103114
if (fullSize < blobSize) {
104115
return recordsCount;
105116
} else {
106-
return std::floor(1.0 * blobSize / SerializedBytesPerRecord);
117+
return std::floor(blobSize / GetSerializedBytesPerRecord());
107118
}
108119
}
109120

110121
std::optional<ui64> PredictOptimalSplitFactor(const ui64 recordsCount, const ui64 blobSize) const {
111-
if (!SerializedBytesPerRecord) {
122+
if (!SerializedBytes) {
112123
return {};
113124
}
114-
const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord;
125+
const ui64 fullSize = recordsCount * GetSerializedBytesPerRecord();
115126
if (fullSize < blobSize) {
116127
return 1;
117128
} else {

0 commit comments

Comments
 (0)