[Cryptech-Commits] [core/math/modexpng] 53/92: Small change to the reductor module to try to get past 180 MHz. Previously BRAM outputs were going directry into a LUT-based ternary adder which was causing timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF we have BRAM -> FF -> LUT -> FF. This increases core latency by (number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks.
git at cryptech.is
git at cryptech.is
Sat Mar 14 18:19:32 UTC 2020
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.
commit 157d5dedd90fede9ea392e2aeda6562d839a30e1
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Nov 20 14:34:36 2019 +0300
Small change to the reductor module to try to get past 180 MHz. Previously BRAM
outputs were going directry into a LUT-based ternary adder which was causing
timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF
we have BRAM -> FF -> LUT -> FF. This increases core latency by
(number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks.
---
rtl/modexpng_reductor.v | 168 ++++++++++++++++++++++++------------------------
1 file changed, 83 insertions(+), 85 deletions(-)
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index dd9cfd9..7404eba 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -174,116 +174,114 @@ module modexpng_reductor
//
- // Pipeline (Delay Match)
+ // Pipeline rd_wide_*
//
- reg rcmb_xy_valid_dly1 = 1'b0;
- reg rcmb_xy_valid_dly2 = 1'b0;
- reg rcmb_xy_valid_dly3 = 1'b0;
+ reg [WORD_EXT_W -1:0] rd_wide_x_din_aux_pipe;
+ reg [WORD_EXT_W -1:0] rd_wide_y_din_aux_pipe;
- reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1;
- reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2;
- reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3;
+ always @(posedge clk)
+ //
+ {rd_wide_x_din_aux_pipe, rd_wide_y_din_aux_pipe} <=
+ {rd_wide_x_din_aux, rd_wide_y_din_aux } ;
- reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1;
- reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2;
- reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3;
- reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1;
- reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2;
- reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3;
- reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1;
- reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2;
- reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3;
+ //
+ // Delay rcmb_final_* to match rd_wide_*
+ //
+ reg rcmb_xy_valid_dly1_x = 1'b0;
+ reg rcmb_xy_valid_dly2_x = 1'b0;
+ reg rcmb_xy_valid_dly3_x = 1'b0;
+ reg rcmb_xy_valid_dly4_x = 1'b0;
+
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1_x;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2_x;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3_x;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly4_x;
+
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1_x;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2_x;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3_x;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly4_x;
+
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1_x;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2_x;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3_x;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly4_x;
+
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1_x;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2_x;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3_x;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly4_x;
always @(posedge clk or negedge rst_n)
//
- if (!rst_n) begin
- rcmb_xy_valid_dly1 <= 1'b0;
- rcmb_xy_valid_dly2 <= 1'b0;
- rcmb_xy_valid_dly3 <= 1'b0;
- end else begin
- rcmb_xy_valid_dly1 <= rcmb_final_xy_valid;
- rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1;
- rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2;
- end
+ if (!rst_n) {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= 4'b0000;
+ else {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <=
+ {rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x, rcmb_final_xy_valid } ;
always @(posedge clk) begin
//
- if (rcmb_final_xy_valid) begin
- rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
- rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
- rcmb_x_dout_dly1 <= rcmb_final_x_din;
- rcmb_y_dout_dly1 <= rcmb_final_y_din;
- end
- //
- if (rcmb_xy_valid_dly1) begin
- rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1;
- rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1;
- rcmb_x_dout_dly2 <= rcmb_x_dout_dly1;
- rcmb_y_dout_dly2 <= rcmb_y_dout_dly1;
- end
- //
- if (rcmb_xy_valid_dly2) begin
- rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2;
- rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2;
- rcmb_x_dout_dly3 <= rcmb_x_dout_dly2;
- rcmb_y_dout_dly3 <= rcmb_y_dout_dly2;
- end
+ if (rcmb_final_xy_valid) {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} <=
+ {rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_din, rcmb_final_y_din } ;
+ if (rcmb_xy_valid_dly1_x) {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} <=
+ {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} ;
+ if (rcmb_xy_valid_dly2_x) {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} <=
+ {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} ;
+ if (rcmb_xy_valid_dly3_x) {rcmb_xy_bank_dly4_x, rcmb_xy_addr_dly4_x, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x} <=
+ {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} ;
//
end
//
+ // LSB Carry Logic
//
- //
- reg [ CARRY_W -1:0] rcmb_x_lsb_carry;
- reg [ WORD_W -1:0] rcmb_x_lsb_dummy;
- reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout;
-
- reg [ CARRY_W -1:0] rcmb_y_lsb_carry;
- reg [ WORD_W -1:0] rcmb_y_lsb_dummy;
- reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout;
+ reg [ CARRY_W -1:0] rcmb_x_lsb_carry;
+ reg [ CARRY_W -1:0] rcmb_y_lsb_carry;
+ reg [ WORD_W -1:0] rcmb_x_lsb_dummy;
+ reg [ WORD_W -1:0] rcmb_y_lsb_dummy;
+ wire [WORD_EXT_W -1:0] rcmb_x_lsb_carry_ext = {WORD_ZERO, rcmb_x_lsb_carry};
+ wire [WORD_EXT_W -1:0] rcmb_y_lsb_carry_ext = {WORD_ZERO, rcmb_y_lsb_carry};
+
+ task calc_rcmb_xy_lsb_carry;
+ begin
+ {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe + rcmb_x_lsb_carry_ext;
+ {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe + rcmb_y_lsb_carry_ext;
+ end
+ endtask
//
- // Carry Computation
+ // LSB Carry Computation
//
always @(posedge clk)
//
if (ena) begin
+ //
rcmb_x_lsb_carry <= CARRY_ZERO;
rcmb_y_lsb_carry <= CARRY_ZERO;
- end else if (rcmb_xy_valid_dly3)
//
- case (rcmb_xy_bank_dly3)
-
- BANK_RCMB_ML: begin
- {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
- {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
- end
-
- BANK_RCMB_MH:
- if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin
- {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
- {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
- end
-
+ end else if (rcmb_xy_valid_dly4_x)
+ //
+ case (rcmb_xy_bank_dly4_x)
+ BANK_RCMB_ML: calc_rcmb_xy_lsb_carry;
+ BANK_RCMB_MH: if (rcmb_xy_addr_dly4_x == OP_ADDR_ZERO) calc_rcmb_xy_lsb_carry;
endcase
//
+ // MSB Sum Logic
//
- //
- wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux;
- wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux;
+ wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe;
+ wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe;
- wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_ZERO, rcmb_x_lsb_carry};
- wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_ZERO, rcmb_y_lsb_carry};
+ wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + rcmb_x_lsb_carry_ext;
+ wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + rcmb_y_lsb_carry_ext;
//
- //
+ // MSB Sum Computation
//
always @(posedge clk or negedge rst_n)
//
@@ -295,22 +293,22 @@ module modexpng_reductor
clear_rdct_wide;
clear_rdct_narrow;
//
- if (rcmb_xy_valid_dly3)
+ if (rcmb_xy_valid_dly4_x)
//
- case (rcmb_xy_bank_dly3)
+ case (rcmb_xy_bank_dly4_x)
BANK_RCMB_MH:
- if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) begin
+ if (rcmb_xy_addr_dly4_x == OP_ADDR_ONE) begin
set_rdct_wide (sel_wide_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry);
set_rdct_narrow(sel_narrow_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry);
- end else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) begin
- set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
- set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+ end else if (rcmb_xy_addr_dly4_x > OP_ADDR_ONE) begin
+ set_rdct_wide (sel_wide_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y);
+ set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y);
end
BANK_RCMB_EXT: begin
- set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
- set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+ set_rdct_wide (sel_wide_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x);
+ set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x);
end
endcase
@@ -335,10 +333,10 @@ module modexpng_reductor
always @(posedge clk or negedge rst_n)
//
- if (!rst_n) busy_next <= 1'b0;
+ if (!rst_n) busy_next <= 1'b0;
else begin
- if (rdy && ena) busy_next <= 1'b1;
- if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0;
+ if (rdy && ena) busy_next <= 1'b1;
+ if (!rdy && rcmb_xy_valid_dly4_x && (rcmb_xy_bank_dly4_x == BANK_RCMB_EXT)) busy_next <= 1'b0;
end
More information about the Commits
mailing list