@@ -6,10 +6,13 @@ namespace olympia
6
6
{
7
7
constexpr char VectorUopGenerator::name[];
8
8
9
- VectorUopGenerator::VectorUopGenerator (sparta::TreeNode* node, const VectorUopGeneratorParameterSet* p) :
10
- sparta::Unit (node)
9
+ VectorUopGenerator::VectorUopGenerator (sparta::TreeNode* node,
10
+ const VectorUopGeneratorParameterSet* p) :
11
+ sparta::Unit (node),
12
+ vuops_generated_ (&unit_stat_set_, " vector_uops_generated" ,
13
+ " Number of vector uops generated" , sparta::Counter::COUNT_NORMAL)
11
14
{
12
- // Vector arithmetic uop generator, increment all src and dest register numbers
15
+ // Vector uop generator, increment all src and dest register numbers
13
16
// For a "vadd.vv v12, v4,v8" with an LMUL of 4:
14
17
// Uop 1: vadd.vv v12, v4, v8
15
18
// Uop 2: vadd.vv v13, v5, v9
@@ -19,26 +22,28 @@ namespace olympia
19
22
constexpr bool SINGLE_DEST = false ;
20
23
constexpr bool WIDE_DEST = false ;
21
24
constexpr bool ADD_DEST_AS_SRC = false ;
22
- uop_gen_function_map_.emplace (InstArchInfo::UopGenType::ARITH,
23
- &VectorUopGenerator::generateArithUop<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
25
+ uop_gen_function_map_.emplace (
26
+ InstArchInfo::UopGenType::ELEMENTWISE,
27
+ &VectorUopGenerator::generateUops<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
24
28
}
25
29
26
- // Vector arithmetic single dest uop generator, only increment all src register numbers
27
- // For a "vmseq.vv v12, v4,v8" with an LMUL of 4:
28
- // Uop 1: vadd .vv v12, v4, v8
29
- // Uop 2: vadd .vv v12, v5, v9
30
- // Uop 3: vadd .vv v12, v6, v10
31
- // Uop 4: vadd .vv v12, v7, v11
30
+ // Vector single dest uop generator, only increment all src register numbers
31
+ // For a "vmseq.vv v12, v4, v8" with an LMUL of 4:
32
+ // Uop 1: vmseq .vv v12, v4, v8
33
+ // Uop 2: vmseq .vv v12, v5, v9
34
+ // Uop 3: vmseq .vv v12, v6, v10
35
+ // Uop 4: vmseq .vv v12, v7, v11
32
36
{
33
37
constexpr bool SINGLE_DEST = true ;
34
38
constexpr bool WIDE_DEST = false ;
35
39
constexpr bool ADD_DEST_AS_SRC = false ;
36
- uop_gen_function_map_.emplace (InstArchInfo::UopGenType::ARITH_SINGLE_DEST,
37
- &VectorUopGenerator::generateArithUop<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
40
+ uop_gen_function_map_.emplace (
41
+ InstArchInfo::UopGenType::SINGLE_DEST,
42
+ &VectorUopGenerator::generateUops<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
38
43
}
39
44
40
- // Vector arithmetic wide dest uop generator, only increment src register numbers for even uops
41
- // For a "vwmul.vv v12, v4, v8" with an LMUL of 4:
45
+ // Vector wide dest uop generator, only increment src register numbers for even
46
+ // uops For a "vwmul.vv v12, v4, v8" with an LMUL of 4:
42
47
// Uop 1: vwmul.vv v12, v4, v8
43
48
// Uop 2: vwmul.vv v13, v4, v8
44
49
// Uop 3: vwmul.vv v14, v6, v10
@@ -49,14 +54,30 @@ namespace olympia
49
54
// Uop 8: vwmul.vv v19, v10, v14
50
55
{
51
56
constexpr bool SINGLE_DEST = false ;
52
- constexpr bool WIDE_DEST = true ;
57
+ constexpr bool WIDENING = true ;
53
58
constexpr bool ADD_DEST_AS_SRC = false ;
54
- uop_gen_function_map_.emplace (InstArchInfo::UopGenType::ARITH_WIDE_DEST,
55
- &VectorUopGenerator::generateArithUop<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
59
+ uop_gen_function_map_.emplace (
60
+ InstArchInfo::UopGenType::WIDENING,
61
+ &VectorUopGenerator::generateUops<SINGLE_DEST, WIDENING, ADD_DEST_AS_SRC>);
56
62
}
57
63
58
- // Vector arithmetic multiplay -add wide dest uop generator, add dest as source
64
+ // Vector arithmetic multiply -add uop generator, add dest as source
59
65
// For a "vmacc.vv v12, v4, v8" with an LMUL of 4:
66
+ // Uop 1: vmacc.vv v12, v4, v8, v12
67
+ // Uop 2: vmacc.vv v13, v5, v9, v13
68
+ // Uop 3: vmacc.vv v14, v6, v10, v14
69
+ // Uop 4: vmacc.vv v15, v7, v11, v15
70
+ {
71
+ constexpr bool SINGLE_DEST = false ;
72
+ constexpr bool WIDE_DEST = false ;
73
+ constexpr bool ADD_DEST_AS_SRC = true ;
74
+ uop_gen_function_map_.emplace (
75
+ InstArchInfo::UopGenType::MAC,
76
+ &VectorUopGenerator::generateUops<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
77
+ }
78
+
79
+ // Vector multiply-add wide dest uop generator, add dest as source
80
+ // For a "vwmacc.vv v12, v4, v8" with an LMUL of 4:
60
81
// Uop 1: vwmacc.vv v12, v4, v8, v12
61
82
// Uop 2: vwmacc.vv v13, v4, v8, v13
62
83
// Uop 3: vwmacc.vv v14, v5, v9, v14
@@ -65,69 +86,54 @@ namespace olympia
65
86
// Uop 6: vwmacc.vv v17, v6, v10, v17
66
87
// Uop 7: vwmacc.vv v18, v7, v11, v18
67
88
// Uop 8: vwmacc.vv v19, v7, v11, v19
68
- {
69
- constexpr bool SINGLE_DEST = false ;
70
- constexpr bool WIDE_DEST = false ;
71
- constexpr bool ADD_DEST_AS_SRC = true ;
72
- uop_gen_function_map_.emplace (InstArchInfo::UopGenType::ARITH_MAC,
73
- &VectorUopGenerator::generateArithUop<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
74
- }
75
-
76
- // Vector arithmetic multiplay-add uop generator, add dest as source
77
- // For a "vmacc.vv v12, v4, v8" with an LMUL of 4:
78
- // Uop 1: vmacc.vv v12, v4, v8, v12
79
- // Uop 2: vmacc.vv v13, v5, v9, v13
80
- // Uop 3: vmacc.vv v14, v6, v10, v14
81
- // Uop 4: vmacc.vv v15, v7, v11, v15
82
89
{
83
90
constexpr bool SINGLE_DEST = false ;
84
91
constexpr bool WIDE_DEST = true ;
85
92
constexpr bool ADD_DEST_AS_SRC = true ;
86
- uop_gen_function_map_.emplace (InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST,
87
- &VectorUopGenerator::generateArithUop<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
93
+ uop_gen_function_map_.emplace (
94
+ InstArchInfo::UopGenType::MAC_WIDE,
95
+ &VectorUopGenerator::generateUops<SINGLE_DEST, WIDE_DEST, ADD_DEST_AS_SRC>);
88
96
}
89
97
}
90
98
91
- void VectorUopGenerator::onBindTreeLate_ ()
92
- {
93
- mavis_facade_ = getMavis (getContainer ());
94
- }
99
+ void VectorUopGenerator::onBindTreeLate_ () { mavis_facade_ = getMavis (getContainer ()); }
95
100
96
101
void VectorUopGenerator::setInst (const InstPtr & inst)
97
102
{
98
103
sparta_assert (current_inst_ == nullptr ,
99
- " Cannot start generating uops for a new vector instruction, "
100
- " current instruction has not finished: " << current_inst_);
104
+ " Cannot start generating uops for a new vector instruction, "
105
+ " current instruction has not finished: "
106
+ << current_inst_);
101
107
102
108
const auto uop_gen_type = inst->getUopGenType ();
103
109
sparta_assert (uop_gen_type != InstArchInfo::UopGenType::UNKNOWN,
104
- " Inst: " << current_inst_ << " uop gen type is unknown" );
110
+ " Inst: " << current_inst_ << " uop gen type is unknown" );
105
111
sparta_assert (uop_gen_type != InstArchInfo::UopGenType::NONE,
106
- " Inst: " << current_inst_ << " uop gen type is none" );
112
+ " Inst: " << current_inst_ << " uop gen type is none" );
107
113
108
114
// Number of vector elements processed by each uop
109
115
const VectorConfigPtr & vector_config = inst->getVectorConfig ();
110
116
const uint64_t num_elems_per_uop = VectorConfig::VLEN / vector_config->getSEW ();
111
117
// TODO: For now, generate uops for all elements even if there is a tail
112
118
num_uops_to_generate_ = std::ceil (vector_config->getVLMAX () / num_elems_per_uop);
113
119
114
- if ((uop_gen_type == InstArchInfo::UopGenType::ARITH_WIDE_DEST) ||
115
- (uop_gen_type == InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST ))
120
+ if ((uop_gen_type == InstArchInfo::UopGenType::WIDENING)
121
+ || (uop_gen_type == InstArchInfo::UopGenType::MAC_WIDE ))
116
122
{
117
123
// TODO: Add parameter to support dual dests
118
124
num_uops_to_generate_ *= 2 ;
119
125
}
120
126
121
127
current_inst_ = inst;
122
- ILOG (" Inst: " << current_inst_ <<
123
- " is being split into " << num_uops_to_generate_ << " UOPs" );
128
+ ILOG (" Inst: " << current_inst_ << " is being split into " << num_uops_to_generate_
129
+ << " UOPs" );
124
130
}
125
131
126
132
const InstPtr VectorUopGenerator::generateUop ()
127
133
{
128
134
const auto uop_gen_type = current_inst_->getUopGenType ();
129
135
sparta_assert (uop_gen_type <= InstArchInfo::UopGenType::NONE,
130
- " Inst: " << current_inst_ << " uop gen type is unknown" );
136
+ " Inst: " << current_inst_ << " uop gen type is unknown" );
131
137
132
138
// Generate uop
133
139
auto uop_gen_func = uop_gen_function_map_.at (uop_gen_type);
@@ -141,6 +147,7 @@ namespace olympia
141
147
uop->setVectorConfig (vector_config);
142
148
uop->setUOpID (num_uops_generated_);
143
149
++num_uops_generated_;
150
+ ++vuops_generated_;
144
151
145
152
// Set weak pointer to parent vector instruction (first uop)
146
153
sparta::SpartaWeakPointer<olympia::Inst> parent_weak_ptr = current_inst_;
@@ -151,7 +158,7 @@ namespace olympia
151
158
uop->setTail ((num_elems_per_uop * num_uops_generated_) > vector_config->getVL ());
152
159
153
160
// Handle last uop
154
- if (num_uops_generated_ == num_uops_to_generate_)
161
+ if (num_uops_generated_ == num_uops_to_generate_)
155
162
{
156
163
reset_ ();
157
164
}
@@ -161,8 +168,8 @@ namespace olympia
161
168
return uop;
162
169
}
163
170
164
- template <bool SINGLE_DEST, bool WIDE_DEST, bool ADD_DEST_AS_SRC>
165
- const InstPtr VectorUopGenerator::generateArithUop ()
171
+ template <bool SINGLE_DEST, bool WIDE_DEST, bool ADD_DEST_AS_SRC>
172
+ const InstPtr VectorUopGenerator::generateUops ()
166
173
{
167
174
// Increment source and destination register values
168
175
auto srcs = current_inst_->getSourceOpInfoList ();
@@ -177,8 +184,7 @@ namespace olympia
177
184
if constexpr (WIDE_DEST == true )
178
185
{
179
186
// Only increment source values for even uops
180
- src.field_value += (num_uops_generated_ % 2 ) ? num_uops_generated_ - 1
181
- : num_uops_generated_;
187
+ src.field_value += num_uops_generated_ / 2 ;
182
188
}
183
189
else
184
190
{
@@ -188,16 +194,18 @@ namespace olympia
188
194
189
195
// Add a destination to the list of sources
190
196
auto add_dest_as_src = [](auto & srcs, auto & dest)
191
- {
192
- // OperandFieldID is an enum with RS1 = 0, RS2 = 1, etc. with a max RS of RS4
193
- using OperandFieldID = mavis::InstMetaData::OperandFieldID;
194
- const OperandFieldID field_id = static_cast <OperandFieldID>(srcs.size ());
195
- sparta_assert (field_id <= OperandFieldID::RS_MAX,
196
- " Mavis does not support instructions with more than " << std::dec <<
197
- static_cast <std::underlying_type_t <OperandFieldID>>(OperandFieldID::RS_MAX) <<
198
- " sources" );
199
- srcs.emplace_back (field_id, dest.operand_type , dest.field_value );
200
- };
197
+ {
198
+ // OperandFieldID is an enum with RS1 = 0, RS2 = 1, etc. with a max RS of RS4
199
+ using OperandFieldID = mavis::InstMetaData::OperandFieldID;
200
+ const OperandFieldID field_id = static_cast <OperandFieldID>(srcs.size ());
201
+ sparta_assert (
202
+ field_id <= OperandFieldID::RS_MAX,
203
+ " Mavis does not support instructions with more than "
204
+ << std::dec
205
+ << static_cast <std::underlying_type_t <OperandFieldID>>(OperandFieldID::RS_MAX)
206
+ << " sources" );
207
+ srcs.emplace_back (field_id, dest.operand_type , dest.field_value );
208
+ };
201
209
202
210
auto dests = current_inst_->getDestOpInfoList ();
203
211
if constexpr (SINGLE_DEST == false )
@@ -219,7 +227,8 @@ namespace olympia
219
227
{
220
228
const VectorConfigPtr & vector_config = current_inst_->getVectorConfig ();
221
229
const uint32_t num_elems_per_uop = vector_config->getVLMAX () / vector_config->getSEW ();
222
- const bool uop_contains_tail_elems = (num_elems_per_uop * num_uops_generated_) > vector_config->getVL ();
230
+ const bool uop_contains_tail_elems =
231
+ (num_elems_per_uop * num_uops_generated_) > vector_config->getVL ();
223
232
224
233
if (uop_contains_tail_elems && (vector_config->getVTA () == false ))
225
234
{
@@ -234,17 +243,13 @@ namespace olympia
234
243
InstPtr uop;
235
244
if (current_inst_->hasImmediate ())
236
245
{
237
- mavis::ExtractorDirectOpInfoList ex_info (current_inst_->getMnemonic (),
238
- srcs,
239
- dests,
246
+ mavis::ExtractorDirectOpInfoList ex_info (current_inst_->getMnemonic (), srcs, dests,
240
247
current_inst_->getImmediate ());
241
248
uop = mavis_facade_->makeInstDirectly (ex_info, getClock ());
242
249
}
243
250
else
244
251
{
245
- mavis::ExtractorDirectOpInfoList ex_info (current_inst_->getMnemonic (),
246
- srcs,
247
- dests);
252
+ mavis::ExtractorDirectOpInfoList ex_info (current_inst_->getMnemonic (), srcs, dests);
248
253
uop = mavis_facade_->makeInstDirectly (ex_info, getClock ());
249
254
}
250
255
@@ -253,7 +258,7 @@ namespace olympia
253
258
254
259
void VectorUopGenerator::handleFlush (const FlushManager::FlushingCriteria & flush_criteria)
255
260
{
256
- if (current_inst_ && flush_criteria.includedInFlush (current_inst_))
261
+ if (current_inst_ && flush_criteria.includedInFlush (current_inst_))
257
262
{
258
263
reset_ ();
259
264
}
0 commit comments