Skip to content

Commit 98986bc

Browse files
Fix DivSqrt lanes synchronization (#90)
* Fix DivSqrt lanes synchronization * Tie further unused signals in fpnew_opgroup_multifmt to zero
1 parent f1846d6 commit 98986bc

File tree

2 files changed

+59
-32
lines changed

2 files changed

+59
-32
lines changed

src/fpnew_divsqrt_multi.sv

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ module fpnew_divsqrt_multi #(
3838
input TagType tag_i,
3939
input logic mask_i,
4040
input AuxType aux_i,
41+
input logic vectorial_op_i,
4142
// Input Handshake
4243
input logic in_valid_i,
4344
output logic in_ready_o,
@@ -95,6 +96,7 @@ module fpnew_divsqrt_multi #(
9596
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
9697
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
9798
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
99+
logic [0:NUM_INP_REGS] inp_pipe_vec_op_q;
98100
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
99101
// Ready signal is combinatorial for all stages
100102
logic [0:NUM_INP_REGS] inp_pipe_ready;
@@ -107,8 +109,9 @@ module fpnew_divsqrt_multi #(
107109
assign inp_pipe_tag_q[0] = tag_i;
108110
assign inp_pipe_mask_q[0] = mask_i;
109111
assign inp_pipe_aux_q[0] = aux_i;
112+
assign inp_pipe_vec_op_q[0] = vectorial_op_i;
110113
assign inp_pipe_valid_q[0] = in_valid_i;
111-
// Input stage: Propagate pipeline ready signal to updtream circuitry
114+
// Input stage: Propagate pipeline ready signal to upstream circuitry
112115
assign in_ready_o = inp_pipe_ready[0];
113116
// Generate the register stages
114117
for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline
@@ -130,6 +133,7 @@ module fpnew_divsqrt_multi #(
130133
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
131134
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
132135
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
136+
`FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0))
133137
end
134138
// Output stage: assign selected pipe outputs to signals for later use
135139
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
@@ -173,27 +177,45 @@ module fpnew_divsqrt_multi #(
173177
logic op_starting; // high in the cycle a new operation starts
174178
logic out_valid, out_ready; // output handshake with downstream
175179
logic unit_busy; // valid data in flight
180+
logic simd_synch_done;
176181
// FSM states
177182
typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
178183
fsm_state_e state_q, state_d;
179184

180-
// Ready synch with other lanes
181-
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
182-
assign divsqrt_ready_o = in_ready;
183-
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
184-
assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i;
185+
// Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
186+
assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
187+
assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
188+
assign op_starting = div_valid | sqrt_valid;
189+
190+
// Hold additional information while the operation is in progress
191+
logic result_is_fp8_q;
192+
TagType result_tag_q;
193+
logic result_mask_q;
194+
AuxType result_aux_q;
195+
logic result_vec_op_q;
196+
197+
// Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
198+
`FFL(result_is_fp8_q, input_is_fp8, op_starting, '0)
199+
`FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
200+
`FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
201+
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
202+
`FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0)
203+
204+
// Wait for other lanes only if the operation is vectorial
205+
assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q;
185206

186207
// Valid synch with other lanes
187208
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
188209
// As soon as all the lanes are over, we can clear this FF and start with a new operation
189-
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni);
210+
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni);
190211
// Tell the other units that this unit has finished now or in the past
191-
assign divsqrt_done_o = unit_done_q | unit_done;
212+
assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q;
192213

193-
// Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr.
194-
assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i;
195-
assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i;
196-
assign op_starting = div_valid | sqrt_valid;
214+
// Ready synch with other lanes
215+
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
216+
assign divsqrt_ready_o = in_ready;
217+
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
218+
assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
197219

198220
// FSM to safely apply and receive data from DIVSQRT unit
199221
always_comb begin : flag_fsm
@@ -215,13 +237,13 @@ module fpnew_divsqrt_multi #(
215237
BUSY: begin
216238
unit_busy = 1'b1; // data in flight
217239
// If all the lanes are done with processing
218-
if (simd_synch_done_i) begin
240+
if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin
219241
out_valid = 1'b1; // try to commit result downstream
220242
// If downstream accepts our result
221243
if (out_ready) begin
222244
state_d = IDLE; // we anticipate going back to idling..
245+
in_ready = 1'b1; // we acknowledge the instruction
223246
if (in_valid_q && unit_ready) begin // ..unless new work comes in
224-
in_ready = 1'b1; // we acknowledge the instruction
225247
state_d = BUSY; // and stay busy with it
226248
end
227249
// Otherwise if downstream is not ready for the result
@@ -258,18 +280,6 @@ module fpnew_divsqrt_multi #(
258280
// FSM status register (asynch active low reset)
259281
`FF(state_q, state_d, IDLE)
260282

261-
// Hold additional information while the operation is in progress
262-
logic result_is_fp8_q;
263-
TagType result_tag_q;
264-
logic result_mask_q;
265-
AuxType result_aux_q;
266-
267-
// Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
268-
`FFL(result_is_fp8_q, input_is_fp8, op_starting, '0)
269-
`FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
270-
`FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
271-
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
272-
273283
// -----------------
274284
// DIVSQRT instance
275285
// -----------------
@@ -298,9 +308,9 @@ module fpnew_divsqrt_multi #(
298308
// Adjust result width and fix FP8
299309
assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;
300310

301-
// Hold the result when one lane has finished execution, except when all the lanes finish together
302-
// and the result can be accepted downstream
303-
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready);
311+
// Hold the result when one lane has finished execution, except when all the lanes finish together,
312+
// or the operation is not vectorial, and the result can be accepted downstream
313+
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready);
304314
// The Hold register (load, no reset)
305315
`FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
306316
`FFLNR(held_status_q, unit_status, hold_en, clk_i)

src/fpnew_opgroup_multifmt_slice.sv

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ or set Features.FpFmtMask to support only FP32");
137137
// CONV passes one operand for assembly after the unit: opC for cpk, opB for others
138138
if (OpGroup == fpnew_pkg::CONV) begin : conv_target
139139
assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
140+
end else begin : not_conv_target
141+
assign conv_target_d = '0;
140142
end
141143

142144
// For 2-operand units, prepare boxing info
@@ -302,6 +304,7 @@ or set Features.FpFmtMask to support only FP32");
302304
.tag_i,
303305
.mask_i ( simd_mask_i[lane] ),
304306
.aux_i ( aux_data ),
307+
.vectorial_op_i ( vectorial_op ),
305308
.in_valid_i ( in_valid ),
306309
.in_ready_o ( lane_in_ready[lane] ),
307310
.divsqrt_done_o ( divsqrt_done[lane] ),
@@ -373,7 +376,13 @@ or set Features.FpFmtMask to support only FP32");
373376
end else begin : inactive_lane
374377
assign lane_out_valid[lane] = 1'b0; // unused lane
375378
assign lane_in_ready[lane] = 1'b0; // unused lane
376-
assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box
379+
assign lane_aux[lane] = 1'b0; // unused lane
380+
assign lane_masks[lane] = 1'b1; // unused lane
381+
assign lane_tags[lane] = 1'b0; // unused lane
382+
assign divsqrt_done[lane] = 1'b0; // unused lane
383+
assign divsqrt_ready[lane] = 1'b0; // unused lane
384+
assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane
385+
assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box
377386
assign lane_status[lane] = '0;
378387
assign lane_busy[lane] = 1'b0;
379388
end
@@ -420,10 +429,17 @@ or set Features.FpFmtMask to support only FP32");
420429
assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
421430
end
422431

423-
// Mute int results if unused
424-
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled
432+
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : extend_or_mute_int_result
433+
// Mute int results if unused
425434
if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
426435
assign ifmt_slice_result[ifmt] = '0;
436+
437+
// Extend slice result if needed
438+
end else begin : extend_int_result
439+
// Set up some constants
440+
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
441+
if (NUM_LANES*INT_WIDTH < Width)
442+
assign ifmt_slice_result[ifmt][Width-1:NUM_LANES*INT_WIDTH] = '0;
427443
end
428444
end
429445

@@ -465,6 +481,7 @@ or set Features.FpFmtMask to support only FP32");
465481
assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
466482
end else begin : no_conv
467483
assign {result_vec_op, result_is_cpk} = '0;
484+
assign conv_target_q = '0;
468485
end
469486

470487
if (PulpDivsqrt) begin

0 commit comments

Comments
 (0)