[Cryptech-Commits] [user/shatov/modexpng] 11/14: New partial product recombination algorithm.
git at cryptech.is
git at cryptech.is
Tue Feb 11 13:04:33 UTC 2020
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.
commit 77d11487d8987e13403f426537dc9bc59141f3f3
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Feb 3 22:49:25 2020 +0300
New partial product recombination algorithm.
---
rtl/modexpng_recombinator_block.v | 219 ++++++++++++++++++++++----------------
rtl/modexpng_recombinator_cell.v | 188 +++++++++++++++++++++++++-------
2 files changed, 278 insertions(+), 129 deletions(-)
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index cc89db0..e3cb50f 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -153,7 +153,7 @@ module modexpng_recombinator_block
// index - latch
reg [MAC_INDEX_W-1:0] xy_index_latch_lsb;
- // purge - index
+ // purge - latch
reg xy_purge_latch_lsb = 1'b0;
reg xy_purge_latch_msb = 1'b0;
@@ -496,21 +496,25 @@ module modexpng_recombinator_block
reg rcmb_xy_lsb_ce = 1'b0;
reg rcmb_xy_lsb_ce_aux = 1'b0;
+ reg rcmb_xy_lsb_ce_aux_dly = 1'b0;
reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
- wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
+ wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
+ wire rcmb_xy_lsb_ce_combined_ext = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0] | rcmb_xy_lsb_ce_aux_dly;
reg rcmb_xy_lsb_clr;
wire rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1];
- reg [ MAC_W -1:0] rcmb_x_lsb_din;
- reg [ MAC_W -1:0] rcmb_y_lsb_din;
- wire [WORD_W -1:0] rcmb_x_lsb_dout;
- wire [WORD_W -1:0] rcmb_y_lsb_dout;
- wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext;
- wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext;
+ reg [ MAC_W -1:0] rcmb_x_lsb_din;
+ reg [ MAC_W -1:0] rcmb_y_lsb_din;
+ wire [WORD_W -1:0] rcmb_x_lsb_dout;
+ wire [WORD_W -1:0] rcmb_y_lsb_dout;
+ wire [WORD_W :0] rcmb_x_lsb_doutw;
+ wire [WORD_W :0] rcmb_y_lsb_doutw;
reg rcmb_xy_msb_ce = 1'b0;
reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
- wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
+ reg rcmb_xy_msb_ce_purge0_rectangle_dly = 1'b0;
+ wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
+ wire rcmb_xy_msb_ce_combined_ext = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0] | rcmb_xy_msb_ce_purge0_rectangle_dly;
reg rcmb_xy_msb_clr;
reg [ MAC_W -1:0] rcmb_x_msb_din;
@@ -518,42 +522,45 @@ module modexpng_recombinator_block
wire [WORD_W -1:0] rcmb_x_msb_dout;
wire [WORD_W -1:0] rcmb_y_msb_dout;
- modexpng_recombinator_cell recomb_x_lsb
+ always @(posedge clk) rcmb_xy_lsb_ce_aux_dly <= rcmb_xy_lsb_ce_aux;
+ always @(posedge clk) rcmb_xy_msb_ce_purge0_rectangle_dly <= rcmb_mode == RCMB_MODE_RECTANGLE ? rcmb_xy_msb_ce_purge[0] : 1'b0;
+
+ modexpng_recombinator_cell recomb_x_lsb_new
(
- .clk (clk),
- .ce (rcmb_xy_lsb_ce_combined),
- .clr (rcmb_xy_lsb_clr),
- .din (rcmb_x_lsb_din),
- .dout (rcmb_x_lsb_dout),
- .dout_ext (rcmb_x_lsb_dout_ext)
+ .clk (clk),
+ .ce (rcmb_xy_lsb_ce_combined_ext),
+ .clr (rcmb_xy_lsb_clr),
+ .din (rcmb_x_lsb_din),
+ .dout (rcmb_x_lsb_dout),
+ .doutw (rcmb_x_lsb_doutw)
);
- modexpng_recombinator_cell recomb_y_lsb
+ modexpng_recombinator_cell recomb_y_lsb_new
(
- .clk (clk),
- .ce (rcmb_xy_lsb_ce_combined),
- .clr (rcmb_xy_lsb_clr),
- .din (rcmb_y_lsb_din),
- .dout (rcmb_y_lsb_dout),
- .dout_ext (rcmb_y_lsb_dout_ext)
+ .clk (clk),
+ .ce (rcmb_xy_lsb_ce_combined_ext),
+ .clr (rcmb_xy_lsb_clr),
+ .din (rcmb_y_lsb_din),
+ .dout (rcmb_y_lsb_dout),
+ .doutw (rcmb_y_lsb_doutw)
);
- modexpng_recombinator_cell recomb_x_msb
+ modexpng_recombinator_cell recomb_x_msb_new
(
- .clk (clk),
- .ce (rcmb_xy_msb_ce_combined),
- .clr (rcmb_xy_msb_clr),
- .din (rcmb_x_msb_din),
- .dout (rcmb_x_msb_dout),
- .dout_ext ()
+ .clk (clk),
+ .ce (rcmb_xy_msb_ce_combined_ext),
+ .clr (rcmb_xy_msb_clr),
+ .din (rcmb_x_msb_din),
+ .dout (rcmb_x_msb_dout),
+ .doutw ()
);
- modexpng_recombinator_cell recomb_y_msb
+ modexpng_recombinator_cell recomb_y_msb_new
(
- .clk (clk),
- .ce (rcmb_xy_msb_ce_combined),
- .clr (rcmb_xy_msb_clr),
- .din (rcmb_y_msb_din),
- .dout (rcmb_y_msb_dout),
- .dout_ext ()
+ .clk (clk),
+ .ce (rcmb_xy_msb_ce_combined_ext),
+ .clr (rcmb_xy_msb_clr),
+ .din (rcmb_y_msb_din),
+ .dout (rcmb_y_msb_dout),
+ .doutw ()
);
always @(posedge clk) begin
@@ -704,7 +711,6 @@ module modexpng_recombinator_block
xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]};
end
//
- //
for (i=1; i<6; i=i+1) begin
xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
@@ -753,17 +759,52 @@ module modexpng_recombinator_block
end
- reg rcmb_xy_lsb_ce_combined_dly = 1'b0;
- reg rcmb_xy_msb_ce_combined_dly = 1'b0;
+ reg rcmb_xy_lsb_ce_combined_dly1 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly1 = 1'b0;
+
+ reg rcmb_xy_lsb_ce_combined_dly2 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly2 = 1'b0;
+
+ reg rcmb_xy_lsb_ce_combined_dly3 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly3 = 1'b0;
+
+ reg rcmb_xy_lsb_ce_combined_dly4 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly4 = 1'b0;
+
+ reg rcmb_xy_lsb_ce_combined_dly5 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly5 = 1'b0;
+
+ reg rcmb_xy_lsb_ce_combined_dly6 = 1'b0;
+ reg rcmb_xy_msb_ce_combined_dly6 = 1'b0;
always @(posedge clk or negedge rst_n)
//
if (!rst_n) begin
- rcmb_xy_lsb_ce_combined_dly <= 1'b0;
- rcmb_xy_msb_ce_combined_dly <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly1 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly1 <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly2 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly2 <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly3 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly3 <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly4 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly4 <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly5 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly5 <= 1'b0;
+ rcmb_xy_lsb_ce_combined_dly6 <= 1'b0;
+ rcmb_xy_msb_ce_combined_dly6 <= 1'b0;
end else begin
- rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined;
- rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined;
+ rcmb_xy_lsb_ce_combined_dly1 <= rcmb_xy_lsb_ce_combined;
+ rcmb_xy_msb_ce_combined_dly1 <= rcmb_xy_msb_ce_combined;
+ rcmb_xy_lsb_ce_combined_dly2 <= rcmb_xy_lsb_ce_combined_dly1;
+ rcmb_xy_msb_ce_combined_dly2 <= rcmb_xy_msb_ce_combined_dly1;
+ rcmb_xy_lsb_ce_combined_dly3 <= rcmb_xy_lsb_ce_combined_dly2;
+ rcmb_xy_msb_ce_combined_dly3 <= rcmb_xy_msb_ce_combined_dly2;
+ rcmb_xy_lsb_ce_combined_dly4 <= rcmb_xy_lsb_ce_combined_dly3;
+ rcmb_xy_msb_ce_combined_dly4 <= rcmb_xy_msb_ce_combined_dly3;
+ rcmb_xy_lsb_ce_combined_dly5 <= rcmb_xy_lsb_ce_combined_dly4;
+ rcmb_xy_msb_ce_combined_dly5 <= rcmb_xy_msb_ce_combined_dly4;
+ rcmb_xy_lsb_ce_combined_dly6 <= rcmb_xy_lsb_ce_combined_dly5;
+ rcmb_xy_msb_ce_combined_dly6 <= rcmb_xy_msb_ce_combined_dly5;
end
reg rcmb_xy_lsb_valid = 1'b0;
@@ -775,8 +816,8 @@ module modexpng_recombinator_block
rcmb_xy_lsb_valid <= 1'b0;
rcmb_xy_msb_valid <= 1'b0;
end else begin
- rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly;
- rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly;
+ rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly6;
+ rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly6;
end
@@ -811,17 +852,17 @@ module modexpng_recombinator_block
reg cnt_lsb_wrapped;
reg cnt_msb_wrapped;
- reg [31:0] rcmb_xy_msb_delay_0;
- reg [31:0] rcmb_xy_msb_delay_1;
- reg [31:0] rcmb_xy_msb_delay_2;
+ reg [WORD_W-1:0] rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y;
+ reg [WORD_W-1:0] rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y;
+ reg [WORD_W-1:0] rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y;
- reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO;
- reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO;
- reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO;
+ reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_0 = OP_ADDR_ZERO;
+ reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_1 = OP_ADDR_ZERO;
+ reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_2 = OP_ADDR_ZERO;
- reg rcmb_msb_flag_delay_0 = 1'b0;
- reg rcmb_msb_flag_delay_1 = 1'b0;
- reg rcmb_msb_flag_delay_2 = 1'b0;
+ reg rcmb_msb_flag_dly_0 = 1'b0;
+ reg rcmb_msb_flag_dly_1 = 1'b0;
+ reg rcmb_msb_flag_dly_2 = 1'b0;
//
@@ -870,24 +911,24 @@ module modexpng_recombinator_block
input flag;
begin
//
- rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
- rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
- rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
+ {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y} <= {dout_x, dout_y};
+ {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y} <= {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y};
+ {rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y} <= {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y};
//
- rcmb_msb_cnt_delay_0 <= cnt;
- rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
- rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
+ rcmb_msb_cnt_dly_0 <= cnt;
+ rcmb_msb_cnt_dly_1 <= rcmb_msb_cnt_dly_0;
+ rcmb_msb_cnt_dly_2 <= rcmb_msb_cnt_dly_1;
//
- rcmb_msb_flag_delay_0 <= flag;
- rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
- rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
+ rcmb_msb_flag_dly_0 <= flag;
+ rcmb_msb_flag_dly_1 <= rcmb_msb_flag_dly_0;
+ rcmb_msb_flag_dly_2 <= rcmb_msb_flag_dly_1;
//
end
endtask
task _update_rcmb_msb_carry;
- input [WORD_W -1:0] dout_x;
- input [WORD_W -1:0] dout_y;
+ input [WORD_W-1:0] dout_x;
+ input [WORD_W-1:0] dout_y;
begin
rcmb_x_msb_carry_0 <= dout_x;
rcmb_y_msb_carry_0 <= dout_y;
@@ -1031,8 +1072,8 @@ module modexpng_recombinator_block
always @(posedge clk)
//
if (ena) begin
- clr_cnt_lsb();
- clr_cnt_msb();
+ clr_cnt_lsb;
+ clr_cnt_msb;
end else if (!rdy)
//
case (rcmb_mode)
@@ -1040,22 +1081,22 @@ module modexpng_recombinator_block
RCMB_MODE_TRIANGLE: recombine_triangle();
RCMB_MODE_RECTANGLE: recombine_rectangle();
endcase
-
-
+
+
//
// Padding
//
wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout};
wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout};
- wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext};
- wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext};
+ wire [WORD_EXT_W-1:0] rcmb_x_lsb_doutw_pad = {1'b0, rcmb_x_lsb_doutw};
+ wire [WORD_EXT_W-1:0] rcmb_y_lsb_doutw_pad = {1'b0, rcmb_y_lsb_doutw};
wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout};
wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout};
- wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]};
- wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]};
+ wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_x_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_x};
+ wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_y_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_y};
//
@@ -1079,25 +1120,25 @@ module modexpng_recombinator_block
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ 2'b00: if (rcmb_msb_flag_dly_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
else clear_wide;
//
- 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: if (cnt_msb < OP_ADDR_TWO) clear_wide;
- else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+ else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
- 2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
else begin
- if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+ if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
end
//
endcase
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+ 2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
//
2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry;
//
@@ -1120,7 +1161,7 @@ module modexpng_recombinator_block
begin
//
case (rcmb_xy_valid)
- 2'b01: inc_cnt_lsb();
+ 2'b01: inc_cnt_lsb;
endcase
//
case (rcmb_xy_valid)
@@ -1156,7 +1197,7 @@ module modexpng_recombinator_block
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ 2'b00: if (rcmb_msb_flag_dly_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
else clear_rdct;
//
2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
@@ -1166,14 +1207,14 @@ module modexpng_recombinator_block
else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
end else set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
- 2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+ 2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
//
endcase
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+ 2'b00: if (rcmb_msb_flag_dly_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
//
2'b01: if (rcmb_xy_lsb_cry) pop_rcmb_msb_carry;
//
@@ -1200,9 +1241,9 @@ module modexpng_recombinator_block
else if (!rdy_reg)
//
case (rcmb_mode)
- RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
- RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase
- RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
+ RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
+ RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase
+ RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
endcase
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
index 45fc68c..0c9ab00 100644
--- a/rtl/modexpng_recombinator_cell.v
+++ b/rtl/modexpng_recombinator_cell.v
@@ -34,7 +34,7 @@ module modexpng_recombinator_cell
(
clk,
ce, clr,
- din, dout, dout_ext
+ din, dout, doutw
);
@@ -54,63 +54,172 @@ module modexpng_recombinator_cell
input clr;
input [ MAC_W -1:0] din;
output [WORD_W -1:0] dout;
- output [WORD_W :0] dout_ext;
+ output [WORD_W :0] doutw;
//
// din <=> {z[13:0], y[15:0], x[15:0]}
//
- wire [WORD_W -3:0] din_z = din[3 * WORD_W -3 : 2 * WORD_W]; // [45:32]
- wire [WORD_W -1:0] din_y = din[2 * WORD_W -1 : WORD_W]; // [31:16]
- wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0]
-
+ wire [WORD_W -1:0] din_z = {2'b00, din[3 * WORD_W -3 : 2 * WORD_W]}; // [47:46][45:32]
+ wire [WORD_W -1:0] din_y = { din[2 * WORD_W -1 : WORD_W]}; // [31:16]
+ wire [WORD_W -1:0] din_x = { din[ WORD_W -1 : 0]}; // [15: 0]
+
//
- // Delayed Clock Enable
+ // Delayed Clock Enables
+ //
+ reg ce_dly1 = 1'b0, ce_dly2 = 1'b0, ce_dly3 = 1'b0, ce_dly4 = 1'b0, ce_dly5 = 1'b0, ce_dly6 = 1'b0;
+ always @(posedge clk) {ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5, ce_dly6} <= {ce, ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5};
+
+
+ //
+ // Delayed Clear
//
- reg ce_dly = 1'b0;
- always @(posedge clk) ce_dly <= ce;
+ reg clr_dly1, clr_dly2, clr_dly3, clr_dly4;
+ always @(posedge clk) {clr_dly1, clr_dly2, clr_dly3, clr_dly4} <= {clr, clr_dly1, clr_dly2, clr_dly3};
+
+ //
+ // Phase Flip-Flop
+ //
+ reg phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5;
+ always @(posedge clk)
+ if (ce) phase_ff <= ~phase_ff;
+ else if (clr) phase_ff <= 1'b0;
+
+ always @(posedge clk)
+ {phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5} <= {phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4};
+
//
- // DSP Slice Buses
+ // Shift Registers
//
- wire [DSP48E1_A_W-1:0] a_int;
- wire [DSP48E1_B_W-1:0] b_int;
- wire [DSP48E1_C_W-1:0] c_int;
- wire [DSP48E1_P_W-1:0] p_int;
+ reg [WORD_W-1:0] din_x_dly1;
+ reg [WORD_W-1:0] din_y_dly1;
+ reg [WORD_W-1:0] din_z_dly1;
+ reg [WORD_W-1:0] din_z_dly2;
- assign {a_int, b_int} = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}}, din_z, din_y[WORD_W-1], din_y};
- assign {c_int} = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}}, WORD_ZERO, din_x[WORD_W-1], din_x};
+ always @(posedge clk) begin
+ //
+ if (ce) {din_x_dly1, din_y_dly1, din_z_dly1} <= {din_x, din_y, din_z};
+ else if (clr) {din_x_dly1, din_y_dly1, din_z_dly1} <= {WORD_ZERO, WORD_ZERO, WORD_ZERO};
+ //
+ if (ce) {din_z_dly2} <= {din_z_dly1};
+ else if (clr) {din_z_dly2} <= {WORD_ZERO};
+ //
+ end
+
+ //
+ // DSP Input Registers
//
- // Combinational OPMODE Switch
+ reg [2 * WORD_W-1:0] master_ab_reg;
+ reg [2 * WORD_W-1:0] master_c_reg;
+
+ reg [ WORD_W+1:0] slave_ab_reg;
+ reg [ WORD_W+1:0] slave_ab_next_reg;
+
+
+ //
+ // DSP Cascade Bus
+ //
+ wire [DSP48E1_P_W-1:0] master_slave_p_int;
+
+
+ //
+ // DSP Output Buses
//
- reg [DSP48E1_OPMODE_W-1:0] opmode;
+ wire [DSP48E1_P_W-1:0] master_p_int;
+ wire [DSP48E1_P_W-1:0] slave_p_int;
+
- always @(clr)
- //
- case (clr)
- 1'b1: opmode = DSP48E1_OPMODE_Z0_YC_X0;
- 1'b0: opmode = DSP48E1_OPMODE_ZP17_YC_XAB;
- endcase
+ //
+ // DSP Input Mapping
+ //
+ wire [DSP48E1_C_W-1:0] master_ab_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_ab_reg};
+ wire [DSP48E1_C_W-1:0] master_c_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_c_reg};
+
+ wire [DSP48E1_C_W-1:0] slave_ab_int = {{(DSP48E1_C_W - (WORD_W+3)){1'b0}}, slave_ab_reg[WORD_W+1:WORD_W], 1'b1, slave_ab_reg[WORD_W-1:0]};
+ wire [DSP48E1_C_W-1:0] slave_c_int = {DSP48E1_C_W{1'b0}};
//
- // DSP Slice Instance
+ // Master DSP Input Logic
//
- `MODEXPNG_DSP_SLICE_ADDSUB #(.AB_REG(2)) dsp_inst
+ always @(posedge clk)
+ //
+ if (ce) begin
+ master_ab_reg <= !phase_ff ? {din_y, din_y_dly1} : {din_x, din_x_dly1};
+ master_c_reg <= !phase_ff ? {din_z_dly1, din_z_dly2} : {WORD_DNC, WORD_DNC};
+ end else begin
+ master_ab_reg <= {WORD_DNC, WORD_DNC};
+ master_c_reg <= {WORD_DNC, WORD_DNC};
+ end
+
+
+ //
+ // Slave DSP Input Logic
+ //
+ always @(posedge clk) begin
+ //
+ slave_ab_reg <= {(WORD_W+2){1'bX}};
+ slave_ab_next_reg <= {(WORD_W+2){1'bX}};
+ //
+ if (ce_dly3 && phase_ff_dly3) slave_ab_next_reg <= {master_p_int[2*WORD_W+1:WORD_W]};
+ //
+ if (ce_dly3 && phase_ff_dly3) slave_ab_reg <= {2'b00, master_p_int[WORD_W-1:0]};
+ if (ce_dly4 && phase_ff_dly4) slave_ab_reg <= slave_ab_next_reg;
+ //
+ end
+
+
+ //
+ // OPMODE Logic
+ //
+ reg [DSP48E1_OPMODE_W-1:0] master_opmode;
+ reg [DSP48E1_OPMODE_W-1:0] slave_opmode;
+
+ always @(posedge clk) begin
+ //
+ if (ce) master_opmode <= !phase_ff ? DSP48E1_OPMODE_Z0_YC_XAB : DSP48E1_OPMODE_ZP_Y0_XAB;
+ else master_opmode <= DSP48E1_OPMODE_DNC;
+ //
+ if (ce_dly4) slave_opmode <= clr_dly4 ? DSP48E1_OPMODE_Z0_Y0_XAB : DSP48E1_OPMODE_ZP17_Y0_XAB;
+ else slave_opmode <= DSP48E1_OPMODE_DNC;
+ //
+ end
+
+
+ //
+ // DSP Slice Instances
+ //
+ `MODEXPNG_DSP_SLICE_ADDSUB dsp_master_inst
+ (
+ .clk (clk),
+ .ce_abc (ce_dly1),
+ .ce_p (ce_dly2),
+ .ce_ctrl (ce_dly1),
+ .ab (master_ab_int),
+ .c (master_c_int),
+ .p (master_p_int),
+ .op_mode (master_opmode),
+ .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
+ .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
+ .casc_p_in (),
+ .casc_p_out (),
+ .carry_out ()
+ );
+
+ `MODEXPNG_DSP_SLICE_ADDSUB dsp_slave_inst
(
.clk (clk),
- .ce_ab1 (ce),
- .ce_ab2 (ce_dly),
- .ce_c (ce),
- .ce_p (ce_dly),
- .ce_ctrl (ce),
- .ab ({a_int, b_int}),
- .c (c_int),
- .p (p_int),
- .op_mode (opmode),
+ .ce_abc (ce_dly5),
+ .ce_p (ce_dly6),
+ .ce_ctrl (ce_dly5),
+ .ab (slave_ab_int),
+ .c (slave_c_int),
+ .p (slave_p_int),
+ .op_mode (slave_opmode),
.alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
.carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
.casc_p_in (),
@@ -120,10 +229,9 @@ module modexpng_recombinator_cell
//
- // Output Mapping
- //
- assign dout = {p_int[WORD_W-1:0]};
- assign dout_ext = {p_int[WORD_W+1], dout};
-
+ // Output Register
+ //
+ assign dout = {slave_p_int[WORD_W-1:0]};
+ assign doutw = {slave_p_int[WORD_W+1], dout};
endmodule
More information about the Commits
mailing list