@@ -10,62 +10,99 @@ namespace dfly::aggregate {
10
10
11
11
namespace {
12
12
13
- struct GroupStep {
14
- PipelineResult operator ()(PipelineResult result) {
15
- // Separate items into groups
16
- absl::flat_hash_map<absl::FixedArray<Value>, std::vector<DocValues>> groups;
17
- for (auto & value : result.values ) {
18
- groups[Extract (value)].push_back (std::move (value));
19
- }
13
+ using ValuesList = absl::FixedArray<Value>;
20
14
21
- // Restore DocValues and apply reducers
22
- std::vector<DocValues> out;
23
- while (!groups.empty ()) {
24
- auto node = groups.extract (groups.begin ());
25
- DocValues doc = Unpack (std::move (node.key ()));
26
- for (auto & reducer : reducers_) {
27
- doc[reducer.result_field ] = reducer.func ({reducer.source_field , node.mapped ()});
28
- }
29
- out.push_back (std::move (doc));
30
- }
15
+ ValuesList ExtractFieldsValues (const DocValues& dv, absl::Span<const std::string> fields) {
16
+ ValuesList out (fields.size ());
17
+ for (size_t i = 0 ; i < fields.size (); i++) {
18
+ auto it = dv.find (fields[i]);
19
+ out[i] = (it != dv.end ()) ? it->second : Value{};
20
+ }
21
+ return out;
22
+ }
31
23
32
- absl::flat_hash_set<std::string> fields_to_print;
33
- fields_to_print.reserve (fields_.size () + reducers_.size ());
24
+ DocValues PackFields (ValuesList values, absl::Span<const std::string> fields) {
25
+ DCHECK_EQ (values.size (), fields.size ());
26
+ DocValues out;
27
+ for (size_t i = 0 ; i < fields.size (); i++)
28
+ out[fields[i]] = std::move (values[i]);
29
+ return out;
30
+ }
34
31
35
- for (auto & field : fields_) {
36
- fields_to_print.insert (std::move (field));
37
- }
38
- for (auto & reducer : reducers_) {
39
- fields_to_print.insert (std::move (reducer.result_field ));
40
- }
32
+ const Value kEmptyValue = Value{};
33
+
34
+ } // namespace
41
35
42
- return {std::move (out), std::move (fields_to_print)};
36
+ void Aggregator::DoGroup (absl::Span<const std::string> fields, absl::Span<const Reducer> reducers) {
37
+ // Separate items into groups
38
+ absl::flat_hash_map<ValuesList, std::vector<DocValues>> groups;
39
+ for (auto & value : result.values ) {
40
+ groups[ExtractFieldsValues (value, fields)].push_back (std::move (value));
43
41
}
44
42
45
- absl::FixedArray<Value> Extract (const DocValues& dv) {
46
- absl::FixedArray<Value> out (fields_.size ());
47
- for (size_t i = 0 ; i < fields_.size (); i++) {
48
- auto it = dv.find (fields_[i]);
49
- out[i] = (it != dv.end ()) ? it->second : Value{};
43
+ // Restore DocValues and apply reducers
44
+ auto & values = result.values ;
45
+ values.clear ();
46
+ values.reserve (groups.size ());
47
+ while (!groups.empty ()) {
48
+ auto node = groups.extract (groups.begin ());
49
+ DocValues doc = PackFields (std::move (node.key ()), fields);
50
+ for (auto & reducer : reducers) {
51
+ doc[reducer.result_field ] = reducer.func ({reducer.source_field , node.mapped ()});
50
52
}
51
- return out ;
53
+ values. push_back ( std::move (doc)) ;
52
54
}
53
55
54
- DocValues Unpack (absl::FixedArray<Value>&& values) {
55
- DCHECK_EQ (values. size (), fields_. size () );
56
- DocValues out ;
57
- for ( size_t i = 0 ; i < fields_. size (); i++)
58
- out[fields_[i]] = std::move (values[i]);
59
- return out ;
56
+ auto & fields_to_print = result. fields_to_print ;
57
+ fields_to_print. clear ( );
58
+ fields_to_print. reserve (fields. size () + reducers. size ()) ;
59
+
60
+ for ( auto & field : fields) {
61
+ fields_to_print. insert (field) ;
60
62
}
63
+ for (auto & reducer : reducers) {
64
+ fields_to_print.insert (reducer.result_field );
65
+ }
66
+ }
61
67
62
- std::vector<std::string> fields_;
63
- std::vector<Reducer> reducers_;
64
- };
68
+ void Aggregator::DoSort (std::string_view field, bool descending) {
69
+ /*
70
+ Comparator for sorting DocValues by field.
71
+ If some of the fields is not present in the DocValues, comparator returns:
72
+ 1. l_it == l.end() && r_it != r.end()
73
+ asc -> false
74
+ desc -> false
75
+ 2. l_it != l.end() && r_it == r.end()
76
+ asc -> true
77
+ desc -> true
78
+ 3. l_it == l.end() && r_it == r.end()
79
+ asc -> false
80
+ desc -> false
81
+ */
82
+ auto comparator = [&](const DocValues& l, const DocValues& r) {
83
+ auto l_it = l.find (field);
84
+ auto r_it = r.find (field);
85
+
86
+ // If some of the values is not present
87
+ if (l_it == l.end () || r_it == r.end ()) {
88
+ return l_it != l.end ();
89
+ }
65
90
66
- const Value kEmptyValue = Value{};
91
+ auto & lv = l_it->second ;
92
+ auto & rv = r_it->second ;
93
+ return !descending ? lv < rv : lv > rv;
94
+ };
67
95
68
- } // namespace
96
+ std::sort (result.values .begin (), result.values .end (), std::move (comparator));
97
+
98
+ result.fields_to_print .insert (field);
99
+ }
100
+
101
+ void Aggregator::DoLimit (size_t offset, size_t num) {
102
+ auto & values = result.values ;
103
+ values.erase (values.begin (), values.begin () + std::min (offset, values.size ()));
104
+ values.resize (std::min (num, values.size ()));
105
+ }
69
106
70
107
const Value& ValueIterator::operator *() const {
71
108
auto it = values_.front ().find (field_);
@@ -109,48 +146,30 @@ Reducer::Func FindReducerFunc(ReducerFunc name) {
109
146
return nullptr ;
110
147
}
111
148
112
- PipelineStep MakeGroupStep (absl::Span<const std::string_view> fields,
113
- std::vector<Reducer> reducers) {
114
- return GroupStep{std::vector<std::string>(fields.begin (), fields.end ()), std::move (reducers)};
149
+ AggregationStep MakeGroupStep (std::vector<std::string> fields, std::vector<Reducer> reducers) {
150
+ return [fields = std::move (fields), reducers = std::move (reducers)](Aggregator* aggregator) {
151
+ aggregator->DoGroup (fields, reducers);
152
+ };
115
153
}
116
154
117
- PipelineStep MakeSortStep (std::string_view field, bool descending) {
118
- return [field = std::string (field), descending](PipelineResult result) -> PipelineResult {
119
- auto & values = result.values ;
120
-
121
- std::sort (values.begin (), values.end (), [field](const DocValues& l, const DocValues& r) {
122
- auto it1 = l.find (field);
123
- auto it2 = r.find (field);
124
- return it1 == l.end () || (it2 != r.end () && it1->second < it2->second );
125
- });
126
-
127
- if (descending) {
128
- std::reverse (values.begin (), values.end ());
129
- }
130
-
131
- result.fields_to_print .insert (field);
132
- return result;
155
+ AggregationStep MakeSortStep (std::string field, bool descending) {
156
+ return [field = std::move (field), descending](Aggregator* aggregator) {
157
+ aggregator->DoSort (field, descending);
133
158
};
134
159
}
135
160
136
- PipelineStep MakeLimitStep (size_t offset, size_t num) {
137
- return [offset, num](PipelineResult result) {
138
- auto & values = result.values ;
139
- values.erase (values.begin (), values.begin () + std::min (offset, values.size ()));
140
- values.resize (std::min (num, values.size ()));
141
- return result;
142
- };
161
+ AggregationStep MakeLimitStep (size_t offset, size_t num) {
162
+ return [=](Aggregator* aggregator) { aggregator->DoLimit (offset, num); };
143
163
}
144
164
145
- PipelineResult Process (std::vector<DocValues> values,
146
- absl::Span<const std::string_view> fields_to_print,
147
- absl::Span<const PipelineStep > steps) {
148
- PipelineResult result {std::move (values), {fields_to_print.begin (), fields_to_print.end ()}};
165
+ AggregationResult Process (std::vector<DocValues> values,
166
+ absl::Span<const std::string_view> fields_to_print,
167
+ absl::Span<const AggregationStep > steps) {
168
+ Aggregator aggregator {std::move (values), {fields_to_print.begin (), fields_to_print.end ()}};
149
169
for (auto & step : steps) {
150
- PipelineResult step_result = step (std::move (result));
151
- result = std::move (step_result);
170
+ step (&aggregator);
152
171
}
153
- return result;
172
+ return aggregator. result ;
154
173
}
155
174
156
175
} // namespace dfly::aggregate
0 commit comments