Skip to content

Commit 3878fbc

Browse files
committed
Revert "Fix serialized batch stat calculation (#19106)" (#19184)
1 parent 76b249a commit 3878fbc

File tree

2 files changed

+17
-28
lines changed

2 files changed

+17
-28
lines changed

ydb/library/formats/arrow/splitter/stats.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ std::vector<i64> TSimpleSerializationStat::SplitRecords(
4545
}
4646

4747
std::vector<i64> TBatchSerializationStat::SplitRecordsForBlobSize(const i64 recordsCount, const ui64 blobSize) const {
48-
if (!SerializedBytes || blobSize < GetSerializedBytesPerRecord()) {
48+
if (!SerializedBytesPerRecord || blobSize < SerializedBytesPerRecord) {
4949
return { recordsCount };
5050
}
51-
const ui32 recordsCountPerBlob = blobSize / GetSerializedBytesPerRecord();
51+
const ui32 recordsCountPerBlob = blobSize / SerializedBytesPerRecord;
5252
return TSimilarPacker::SplitWithExpected(recordsCount, recordsCountPerBlob);
5353
}
5454

ydb/library/formats/arrow/splitter/stats.h

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -69,60 +69,49 @@ class TSimpleSerializationStat {
6969

7070
class TBatchSerializationStat {
7171
protected:
72-
ui64 RecordCount = 0;
73-
double SerializedBytes = 0;
74-
double RawBytes = 0;
75-
protected:
76-
double GetSerializedBytesPerRecord() const {
77-
return SerializedBytes / RecordCount;
78-
}
79-
double GetRawBytesPerRecord() const {
80-
return RawBytes / RecordCount;
81-
}
72+
double SerializedBytesPerRecord = 0;
73+
double RawBytesPerRecord = 0;
8274
public:
8375
TBatchSerializationStat() = default;
8476
TBatchSerializationStat(const ui64 bytes, const ui64 recordsCount, const ui64 rawBytes) {
8577
Y_ABORT_UNLESS(recordsCount);
86-
RecordCount = recordsCount;
87-
SerializedBytes = bytes;
88-
RawBytes = rawBytes;
78+
SerializedBytesPerRecord = 1.0 * bytes / recordsCount;
79+
RawBytesPerRecord = 1.0 * rawBytes / recordsCount;
8980
}
9081

9182
TString DebugString() const {
92-
return TStringBuilder() << "{sbpr=" << GetSerializedBytesPerRecord() << ";rbpr=" << GetRawBytesPerRecord() << "}";
83+
return TStringBuilder() << "{sbpr=" << SerializedBytesPerRecord << ";rbpr=" << RawBytesPerRecord << "}";
9384
}
9485

9586
TBatchSerializationStat(const TSimpleSerializationStat& simple) {
96-
RecordCount = simple.GetRecordsCount();
97-
SerializedBytes = simple.GetSerializedBytes();
98-
RawBytes = simple.GetRawBytes();
87+
SerializedBytesPerRecord = simple.GetSerializedBytesPerRecord();
88+
RawBytesPerRecord = simple.GetRawBytesPerRecord();
9989
}
10090

10191
void Merge(const TSimpleSerializationStat& item) {
102-
RecordCount += item.GetRecordsCount();
103-
SerializedBytes += item.GetSerializedBytes();
104-
RawBytes += item.GetRawBytes();
92+
SerializedBytesPerRecord += item.GetSerializedBytesPerRecord();
93+
RawBytesPerRecord += item.GetRawBytesPerRecord();
10594
}
10695

10796
std::vector<i64> SplitRecordsForBlobSize(const i64 recordsCount, const ui64 blobSize) const;
10897

109-
std::optional<ui64> PredictOptimalPackRecordsCount(const ui64 recordsCount, const ui64 blobSize) const {
110-
if (!SerializedBytes) {
98+
std::optional<ui64> PredictOptimalPackRecordsCount(const ui64 recordsCount, const ui64 blobSize) const {
99+
if (!SerializedBytesPerRecord) {
111100
return {};
112101
}
113-
const ui64 fullSize = recordsCount * GetSerializedBytesPerRecord();
102+
const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord;
114103
if (fullSize < blobSize) {
115104
return recordsCount;
116105
} else {
117-
return std::floor(blobSize / GetSerializedBytesPerRecord());
106+
return std::floor(1.0 * blobSize / SerializedBytesPerRecord);
118107
}
119108
}
120109

121110
std::optional<ui64> PredictOptimalSplitFactor(const ui64 recordsCount, const ui64 blobSize) const {
122-
if (!SerializedBytes) {
111+
if (!SerializedBytesPerRecord) {
123112
return {};
124113
}
125-
const ui64 fullSize = recordsCount * GetSerializedBytesPerRecord();
114+
const ui64 fullSize = 1.0 * recordsCount * SerializedBytesPerRecord;
126115
if (fullSize < blobSize) {
127116
return 1;
128117
} else {

0 commit comments

Comments
 (0)