@@ -38,6 +38,7 @@ module fpnew_divsqrt_multi #(
38
38
input TagType tag_i,
39
39
input logic mask_i,
40
40
input AuxType aux_i,
41
+ input logic vectorial_op_i,
41
42
// Input Handshake
42
43
input logic in_valid_i,
43
44
output logic in_ready_o,
@@ -95,6 +96,7 @@ module fpnew_divsqrt_multi #(
95
96
TagType [0 : NUM_INP_REGS ] inp_pipe_tag_q;
96
97
logic [0 : NUM_INP_REGS ] inp_pipe_mask_q;
97
98
AuxType [0 : NUM_INP_REGS ] inp_pipe_aux_q;
99
+ logic [0 : NUM_INP_REGS ] inp_pipe_vec_op_q;
98
100
logic [0 : NUM_INP_REGS ] inp_pipe_valid_q;
99
101
// Ready signal is combinatorial for all stages
100
102
logic [0 : NUM_INP_REGS ] inp_pipe_ready;
@@ -107,8 +109,9 @@ module fpnew_divsqrt_multi #(
107
109
assign inp_pipe_tag_q[0 ] = tag_i;
108
110
assign inp_pipe_mask_q[0 ] = mask_i;
109
111
assign inp_pipe_aux_q[0 ] = aux_i;
112
+ assign inp_pipe_vec_op_q[0 ] = vectorial_op_i;
110
113
assign inp_pipe_valid_q[0 ] = in_valid_i;
111
- // Input stage: Propagate pipeline ready signal to updtream circuitry
114
+ // Input stage: Propagate pipeline ready signal to upstream circuitry
112
115
assign in_ready_o = inp_pipe_ready[0 ];
113
116
// Generate the register stages
114
117
for (genvar i = 0 ; i < NUM_INP_REGS ; i++ ) begin : gen_input_pipeline
@@ -130,6 +133,7 @@ module fpnew_divsqrt_multi #(
130
133
`FFL (inp_pipe_tag_q[i+ 1 ], inp_pipe_tag_q[i], reg_ena, TagType ' ('0 ))
131
134
`FFL (inp_pipe_mask_q[i+ 1 ], inp_pipe_mask_q[i], reg_ena, '0 )
132
135
`FFL (inp_pipe_aux_q[i+ 1 ], inp_pipe_aux_q[i], reg_ena, AuxType ' ('0 ))
136
+ `FFL (inp_pipe_vec_op_q[i+ 1 ], inp_pipe_vec_op_q[i], reg_ena, AuxType ' ('0 ))
133
137
end
134
138
// Output stage: assign selected pipe outputs to signals for later use
135
139
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS ];
@@ -173,27 +177,45 @@ module fpnew_divsqrt_multi #(
173
177
logic op_starting; // high in the cycle a new operation starts
174
178
logic out_valid, out_ready; // output handshake with downstream
175
179
logic unit_busy; // valid data in flight
180
+ logic simd_synch_done;
176
181
// FSM states
177
182
typedef enum logic [1 : 0 ] { IDLE , BUSY , HOLD } fsm_state_e ;
178
183
fsm_state_e state_q, state_d;
179
184
180
- // Ready synch with other lanes
181
- // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
182
- assign divsqrt_ready_o = in_ready;
183
- // Upstream ready comes from sanitization FSM, and it is synched among all the lanes
184
- assign inp_pipe_ready[NUM_INP_REGS ] = simd_synch_rdy_i;
185
+ // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
186
+ assign div_valid = in_valid_q & (op_q == fpnew_pkg :: DIV ) & in_ready & ~ flush_i;
187
+ assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg :: DIV ) & in_ready & ~ flush_i;
188
+ assign op_starting = div_valid | sqrt_valid;
189
+
190
+ // Hold additional information while the operation is in progress
191
+ logic result_is_fp8_q;
192
+ TagType result_tag_q;
193
+ logic result_mask_q;
194
+ AuxType result_aux_q;
195
+ logic result_vec_op_q;
196
+
197
+ // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
198
+ `FFL (result_is_fp8_q, input_is_fp8, op_starting, '0 )
199
+ `FFL (result_tag_q, inp_pipe_tag_q[NUM_INP_REGS ], op_starting, '0 )
200
+ `FFL (result_mask_q, inp_pipe_mask_q[NUM_INP_REGS ],op_starting, '0 )
201
+ `FFL (result_aux_q, inp_pipe_aux_q[NUM_INP_REGS ], op_starting, '0 )
202
+ `FFL (result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS ], op_starting, '0 )
203
+
204
+ // Wait for other lanes only if the operation is vectorial
205
+ assign simd_synch_done = simd_synch_done_i || ~ result_vec_op_q;
185
206
186
207
// Valid synch with other lanes
187
208
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
188
209
// As soon as all the lanes are over, we can clear this FF and start with a new operation
189
- `FFLARNC (unit_done_q, unit_done, unit_done, simd_synch_done_i , 1'b0 , clk_i, rst_ni);
210
+ `FFLARNC (unit_done_q, unit_done, unit_done, simd_synch_done , 1'b0 , clk_i, rst_ni);
190
211
// Tell the other units that this unit has finished now or in the past
191
- assign divsqrt_done_o = unit_done_q | unit_done;
212
+ assign divsqrt_done_o = ( unit_done_q | unit_done) & result_vec_op_q ;
192
213
193
- // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
194
- assign div_valid = in_valid_q & (op_q == fpnew_pkg :: DIV ) & in_ready & ~ flush_i;
195
- assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg :: DIV ) & in_ready & ~ flush_i;
196
- assign op_starting = div_valid | sqrt_valid;
214
+ // Ready synch with other lanes
215
+ // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
216
+ assign divsqrt_ready_o = in_ready;
217
+ // Upstream ready comes from sanitization FSM, and it is synched among all the lanes
218
+ assign inp_pipe_ready[NUM_INP_REGS ] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
197
219
198
220
// FSM to safely apply and receive data from DIVSQRT unit
199
221
always_comb begin : flag_fsm
@@ -215,13 +237,13 @@ module fpnew_divsqrt_multi #(
215
237
BUSY : begin
216
238
unit_busy = 1'b1 ; // data in flight
217
239
// If all the lanes are done with processing
218
- if (simd_synch_done_i) begin
240
+ if (simd_synch_done_i || ( ~ result_vec_op_q && unit_done) ) begin
219
241
out_valid = 1'b1 ; // try to commit result downstream
220
242
// If downstream accepts our result
221
243
if (out_ready) begin
222
244
state_d = IDLE ; // we anticipate going back to idling..
245
+ in_ready = 1'b1 ; // we acknowledge the instruction
223
246
if (in_valid_q && unit_ready) begin // ..unless new work comes in
224
- in_ready = 1'b1 ; // we acknowledge the instruction
225
247
state_d = BUSY ; // and stay busy with it
226
248
end
227
249
// Otherwise if downstream is not ready for the result
@@ -258,18 +280,6 @@ module fpnew_divsqrt_multi #(
258
280
// FSM status register (asynch active low reset)
259
281
`FF (state_q, state_d, IDLE )
260
282
261
- // Hold additional information while the operation is in progress
262
- logic result_is_fp8_q;
263
- TagType result_tag_q;
264
- logic result_mask_q;
265
- AuxType result_aux_q;
266
-
267
- // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
268
- `FFL (result_is_fp8_q, input_is_fp8, op_starting, '0 )
269
- `FFL (result_tag_q, inp_pipe_tag_q[NUM_INP_REGS ], op_starting, '0 )
270
- `FFL (result_mask_q, inp_pipe_mask_q[NUM_INP_REGS ],op_starting, '0 )
271
- `FFL (result_aux_q, inp_pipe_aux_q[NUM_INP_REGS ], op_starting, '0 )
272
-
273
283
// -----------------
274
284
// DIVSQRT instance
275
285
// -----------------
@@ -298,9 +308,9 @@ module fpnew_divsqrt_multi #(
298
308
// Adjust result width and fix FP8
299
309
assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
300
310
301
- // Hold the result when one lane has finished execution, except when all the lanes finish together
302
- // and the result can be accepted downstream
303
- assign hold_en = unit_done & (~ simd_synch_done_i | ~ out_ready);
311
+ // Hold the result when one lane has finished execution, except when all the lanes finish together,
312
+ // or the operation is not vectorial, and the result can be accepted downstream
313
+ assign hold_en = unit_done & (~ simd_synch_done_i | ~ out_ready) & ~ ( ~ result_vec_op_q & out_ready) ;
304
314
// The Hold register (load, no reset)
305
315
`FFLNR (held_result_q, adjusted_result, hold_en, clk_i)
306
316
`FFLNR (held_status_q, unit_status, hold_en, clk_i)
0 commit comments