[Cryptech-Commits] [core/math/modexpng] 26/92: Further work on the Montgomery modular multiplier. Added the third "rectangular" stage of the multiplication process, i.e. computation of how many copies of the modulus N to add to the intermediate product AB to zeroize the lower half: M = Q * N.
git at cryptech.is
git at cryptech.is
Sat Mar 14 18:19:05 UTC 2020
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.
commit ecf0374b7bbf1c1ea56fea8f1acaeea85c3612d2
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Oct 1 15:07:56 2019 +0300
Further work on the Montgomery modular multiplier. Added the third
"rectangular" stage of the multiplication process, i.e. computation of how many
copies of the modulus N to add to the intermediate product AB to zeroize the
lower half: M = Q * N.
---
bench/tb_square.v | 362 ++++++++++++++++++++++++++-------
rtl/modexpng_mmm_fsm.vh | 11 +-
rtl/modexpng_parameters.vh | 26 +--
rtl/modexpng_part_recombinator.v | 425 ++++++++++++++++++++++++++++++++-------
4 files changed, 668 insertions(+), 156 deletions(-)
diff --git a/bench/tb_square.v b/bench/tb_square.v
index 23831db..d35a5cc 100644
--- a/bench/tb_square.v
+++ b/bench/tb_square.v
@@ -41,6 +41,8 @@ module tb_square;
reg [17:0] AB[0:63];
reg [17:0] N_COEFF[0:32];
reg [17:0] Q[0:32];
+ reg [17:0] N[0:31];
+ reg [17:0] M[0:64];
//
@@ -103,6 +105,33 @@ module tb_square;
Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a;
Q[32] = 18'h0eef3;
//
+ N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7;
+ N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8;
+ N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9;
+ N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035;
+ N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c;
+ N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf;
+ N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b;
+ N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000;
+ //
+ M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e;
+ M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873;
+ M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a;
+ M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631;
+ M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750;
+ M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5;
+ M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295;
+ M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1;
+ M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef;
+ M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5;
+ M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862;
+ M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0;
+ M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361;
+ M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2;
+ M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615;
+ M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d;
+ M[64] = 18'h0e005;
+ //
end
@@ -134,6 +163,12 @@ module tb_square;
reg [ 7:0] tb_slim_bram_xy_addr;
reg [17:0] tb_slim_bram_x_din;
reg [17:0] tb_slim_bram_y_din;
+
+ reg mgr_slim_bram_xy_ena = 1'b0;
+ reg [ 1:0] mgr_slim_bram_xy_bank;
+ reg [ 7:0] mgr_slim_bram_xy_addr;
+ reg [17:0] mgr_slim_bram_x_din;
+ reg [17:0] mgr_slim_bram_y_din;
reg mac_slim_bram_xy_ena = 1'b0;
reg mac_slim_bram_xy_reg_ena = 1'b0;
@@ -195,10 +230,10 @@ module tb_square;
ip_bram_18k slim_bram_x
(
.clka (clk),
- .ena (tb_slim_bram_xy_ena),
- .wea (tb_slim_bram_xy_ena),
- .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
- .dina (tb_slim_bram_x_din),
+ .ena (mgr_slim_bram_xy_ena),
+ .wea (mgr_slim_bram_xy_ena),
+ .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+ .dina (mgr_slim_bram_x_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
@@ -210,10 +245,10 @@ module tb_square;
ip_bram_18k slim_bram_y
(
.clka (clk),
- .ena (tb_slim_bram_xy_ena),
- .wea (tb_slim_bram_xy_ena),
- .addra ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
- .dina (tb_slim_bram_y_din),
+ .ena (mgr_slim_bram_xy_ena),
+ .wea (mgr_slim_bram_xy_ena),
+ .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+ .dina (mgr_slim_bram_y_din),
.clkb (clk),
.enb (mac_slim_bram_xy_ena),
@@ -266,14 +301,23 @@ module tb_square;
wait_clock_tick;
end
for (i=32; i<33; i=i+1) begin
- tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT;
- tb_slim_bram_xy_addr = 0;
+ tb_slim_bram_xy_bank = BANK_SLIM_EXT;
+ tb_slim_bram_xy_addr = 0; // !
tb_slim_bram_x_din = N_COEFF[i];
tb_slim_bram_y_din = N_COEFF[i];
wait_clock_tick;
end
+ for (i=0; i<32; i=i+1) begin
+ tb_fat_bram_xy_bank = BANK_FAT_N;
+ tb_fat_bram_xy_addr = i[7:0];
+ tb_fat_bram_x_din = N[i];
+ tb_fat_bram_y_din = N[i];
+
+ wait_clock_tick;
+ end
+
tb_fat_bram_xy_ena = 1'b0;
tb_slim_bram_xy_ena = 1'b0;
@@ -299,6 +343,7 @@ module tb_square;
verify_ab;
verify_q;
+ verify_m;
end
@@ -418,25 +463,23 @@ module tb_square;
wire mult_square_addr_almost_done_comb;
reg mult_square_addr_almost_done_flop;
-
- //wire mult_square_addr_surely_done_comb;
reg mult_square_addr_surely_done_flop;
- reg mult_triangle_addr_almost_done_comb;
- reg mult_triangle_addr_almost_done_flop;
-
- //wire mult_triangle_addr_surely_done_comb;
+ wire mult_triangle_addr_almost_done_comb;
+ reg mult_triangle_addr_almost_done_flop;
reg mult_triangle_addr_surely_done_flop;
reg mult_triangle_addr_tardy_done_flop;
+
+ wire mult_rectangle_addr_almost_done_comb;
+ reg mult_rectangle_addr_almost_done_flop;
+ reg mult_rectangle_addr_surely_done_flop;
+ reg mult_rectangle_addr_tardy_done_flop;
+
assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
+ assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+ assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
- always @*
- //
- //if (!col_is_last)
- mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
- //else
- //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
@@ -482,6 +525,29 @@ module tb_square;
//
end
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+ mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
+ //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
+
+ default:
+ mult_rectangle_addr_almost_done_flop <= 1'b0;
+ //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
+
+ endcase
+
+ always @(posedge clk) begin
+ //
+ mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop;
+ mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop;
+ //
+ end
//
@@ -489,6 +555,7 @@ module tb_square;
//
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
always @(posedge clk)
@@ -510,6 +577,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
8'd0 : mac_slim_bram_xy_addr + 1'b1;
//
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+ 8'd1 : mac_slim_bram_xy_addr + 1'b1;
+ //
default: mac_slim_bram_xy_addr <= 8'dX;
endcase
@@ -543,7 +618,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
//
default: mac_fat_bram_xy_addr[j] <= 8'dX;
endcase
@@ -564,7 +646,14 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
//
default: mac_fat_bram_xy_addr[4] <= 8'dX;
endcase
@@ -574,19 +663,30 @@ module tb_square;
always @(posedge clk)
//
case (fsm_state_next)
+ //
FSM_STATE_MULT_SQUARE_COL_0_INIT,
FSM_STATE_MULT_SQUARE_COL_N_INIT,
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+ //
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
- BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF;
+ BANK_SLIM_EXT : BANK_SLIM_N_COEFF;
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+ BANK_SLIM_EXT : BANK_SLIM_Q;
+ //
default: mac_slim_bram_xy_bank <= 2'bXX;
endcase
@@ -605,6 +705,12 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL};
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}};
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_N}};
default: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}};
endcase
@@ -625,6 +731,12 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop;
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop;
default: mac_slim_bram_xy_ena <= 1'b0;
endcase
@@ -642,7 +754,13 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
default: mac_fat_bram_xy_ena <= 1'b0;
endcase
@@ -654,12 +772,30 @@ module tb_square;
always @(posedge clk)
//
mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
-
+
+ reg ladder_mode = 1'b0; // 0 = X:T1*T2, Y:T2*T2
+ // 1 = X:T1*T2, Y:T2*T1
+
+ reg dsp_swap_xy;
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_swap_xy <= 1'b1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0;
+ endcase
+
always @(posedge clk)
//
- if (mac_slim_bram_xy_reg_ena_dly)
- {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+ if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite
+ if (!dsp_swap_xy)
+ {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+ else begin
+ if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+ else {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+ end
+ end
else
{dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
@@ -711,7 +847,8 @@ module tb_square;
case (fsm_state_next)
//
FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin
+ FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
col_index <= 5'd0;
col_index_last <= index_last[7:3];
col_index_next1 <= 5'd1;
@@ -721,7 +858,8 @@ module tb_square;
end
//
FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin
+ FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
col_index <= col_index_next1;
col_is_last <= col_index_next1 == col_index_last;
col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;
@@ -730,8 +868,9 @@ module tb_square;
//
endcase
- assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
- assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
always @(posedge clk)
//
@@ -741,9 +880,13 @@ module tb_square;
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly);
default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
endcase
@@ -774,26 +917,26 @@ module tb_square;
calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
end
endfunction
- /*
- function [NUM_MULTS:0] calc_mac_mode_z_triangle;
+
+ function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
input [ 4:0] col_index_value;
input [ 7:0] mac_slim_bram_xy_addr_value;
begin
if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
case (mac_slim_bram_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+ 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
endcase
else
- calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+ calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
end
- endfunction*/
+ endfunction
reg recomb_x_ena = 1'b0;
reg recomb_y_ena = 1'b0;
@@ -810,44 +953,66 @@ module tb_square;
wire [17:0] recomb_fat_bram_x_dout;
wire [17:0] recomb_fat_bram_y_dout;
wire recomb_fat_bram_xy_dout_valid;
+ wire [ 2:0] recomb_slim_bram_xy_bank;
+ wire [ 7:0] recomb_slim_bram_xy_addr;
+ wire [17:0] recomb_slim_bram_x_dout;
+ wire [17:0] recomb_slim_bram_y_dout;
+ wire recomb_slim_bram_xy_dout_valid;
wire recomb_rdy;
modexpng_part_recombinator recomb
(
- .clk (clk),
- .rdy (recomb_rdy),
- .fsm_state_next (fsm_state_next),
- .index_last (index_last),
- .dsp_x_ce_p (dsp_x_ce_p),
- .dsp_y_ce_p (dsp_y_ce_p),
- .ena_x (recomb_x_ena),
- .ena_y (recomb_y_ena),
- .dsp_x_p (dsp_x_p),
- .dsp_y_p (dsp_y_p),
- .col_index (col_index),
- .col_index_last (col_index_last),
- .slim_bram_xy_addr (mac_slim_bram_xy_addr),
- .slim_bram_xy_bank (mac_slim_bram_xy_bank),
- .fat_bram_xy_bank (recomb_fat_bram_xy_bank),
- .fat_bram_xy_addr (recomb_fat_bram_xy_addr),
- .fat_bram_x_dout (recomb_fat_bram_x_dout),
- .fat_bram_y_dout (recomb_fat_bram_y_dout),
- .fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid)
+ .clk (clk),
+ .rdy (recomb_rdy),
+ .fsm_state_next (fsm_state_next),
+ .index_last (index_last),
+ .dsp_x_ce_p (dsp_x_ce_p),
+ .dsp_y_ce_p (dsp_y_ce_p),
+ .ena_x (recomb_x_ena),
+ .ena_y (recomb_y_ena),
+ .dsp_x_p (dsp_x_p),
+ .dsp_y_p (dsp_y_p),
+ .col_index (col_index),
+ .col_index_last (col_index_last),
+ .slim_bram_xy_addr (mac_slim_bram_xy_addr),
+ .slim_bram_xy_bank (mac_slim_bram_xy_bank),
+ .rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank),
+ .rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr),
+ .rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout),
+ .rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout),
+ .rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid),
+ .rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank),
+ .rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr),
+ .rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout),
+ .rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout),
+ .rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid)
);
reg [17:0] AB_READ[0:63];
reg [17:0] Q_READ[0:32];
+ reg [17:0] M_READ[0:64];
- always @(posedge clk)
+ always @(posedge clk) begin
//
if (recomb_fat_bram_xy_dout_valid)
//
case (recomb_fat_bram_xy_bank)
- 3'd1: AB_READ[ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd3: Q_READ [ (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- 3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+ BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+ BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
endcase
+ //
+ if (recomb_slim_bram_xy_dout_valid)
+ //
+ case (recomb_slim_bram_xy_bank)
+ BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout;
+ BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1)
+ Q_READ[32] <= recomb_slim_bram_x_dout;
+ endcase
+ //
+ end
always @(posedge clk)
@@ -873,6 +1038,29 @@ module tb_square;
end
+ always @(posedge clk)
+ //
+ if (tb_slim_bram_xy_ena) begin
+ mgr_slim_bram_xy_ena <= 1'b1;
+ mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank;
+ mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr;
+ mgr_slim_bram_x_din <= tb_slim_bram_x_din;
+ mgr_slim_bram_y_din <= tb_slim_bram_y_din;
+ end else if (recomb_slim_bram_xy_dout_valid) begin
+ mgr_slim_bram_xy_ena <= 1'b1;
+ mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank;
+ mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr;
+ mgr_slim_bram_x_din <= recomb_slim_bram_x_dout;
+ mgr_slim_bram_y_din <= recomb_slim_bram_y_dout;
+ end else begin
+ mgr_slim_bram_xy_ena <= 1'b0;
+ mgr_slim_bram_xy_bank <= 3'bXXX;
+ mgr_slim_bram_xy_addr <= 8'hXX;
+ mgr_slim_bram_x_din <= {18{1'bX}};
+ mgr_slim_bram_y_din <= {18{1'bX}};
+ end
+
+
task verify_ab;
reg verify_ab_ok;
begin
@@ -911,9 +1099,29 @@ module tb_square;
endtask
+ task verify_m;
+ reg verify_m_ok;
+ begin
+ verify_m_ok = 1;
+ for (i=0; i<65; i=i+1)
+ if (M_READ[i] === M[i])
+ $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]);
+ else begin
+ $display("M / M_READ [%02d] = 0x%05x / 0x%05x <???>", i, M[i], M_READ[i]);
+ verify_m_ok = 0;
+ end
+ if (verify_m_ok)
+ $display("M is OK.");
+ else
+ $display("M is WRONG!");
+ end
+ endtask
+
+
wire mult_square_addr_done = mult_square_addr_surely_done_flop;
-
wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
+ wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop;
+
always @* begin
//
@@ -940,7 +1148,17 @@ module tb_square;
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
- FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+ FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+
+ FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
default: fsm_state_next = FSM_STATE_IDLE ;
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh
index 2700a42..3bdae66 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm.vh
@@ -30,5 +30,14 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37;
+
localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
-
\ No newline at end of file
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index f846119..57eef35 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -3,19 +3,19 @@
//localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere!
-localparam [2:0] BANK_FAT_T1T2 = 3'd0;
-localparam [2:0] BANK_FAT_ABL = 3'd1;
-localparam [2:0] BANK_FAT_ABH = 3'd2;
-localparam [2:0] BANK_FAT_Q = 3'd3;
-localparam [2:0] BANK_FAT_Q_EXT = 3'd4;
-localparam [2:0] BANK_FAT_ML = 3'd5;
-localparam [2:0] BANK_FAT_MH = 3'd6;
-localparam [2:0] BANK_FAT_MH_EXT = 3'd7;
-
-localparam [1:0] BANK_SLIM_T1T2 = 2'd0;
-localparam [1:0] BANK_SLIM_N = 2'd1;
-localparam [1:0] BANK_SLIM_N_COEFF = 2'd2;
-localparam [1:0] BANK_SLIM_N_COEFF_EXT = 2'd3;
+localparam [2:0] BANK_FAT_T1T2 = 3'd0;
+localparam [2:0] BANK_FAT_ABL = 3'd1;
+localparam [2:0] BANK_FAT_ABH = 3'd2;
+localparam [2:0] BANK_FAT_N = 3'd3;
+localparam [2:0] BANK_FAT_ML = 3'd4;
+localparam [2:0] BANK_FAT_MH = 3'd5;
+localparam [2:0] BANK_FAT_EXT = 3'd6; // 0 -> MH'
+localparam [2:0] BANK_FAT_UNUSED = 3'd7;
+
+localparam [1:0] BANK_SLIM_T1T2 = 2'd0;
+localparam [1:0] BANK_SLIM_N_COEFF = 2'd1;
+localparam [1:0] BANK_SLIM_Q = 2'd2;
+localparam [1:0] BANK_SLIM_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q'
//localparam BANK_Y_T2 = 3'd0;
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
index c51e7ef..567ecd5 100644
--- a/rtl/modexpng_part_recombinator.v
+++ b/rtl/modexpng_part_recombinator.v
@@ -9,7 +9,8 @@ module modexpng_part_recombinator
dsp_x_p, dsp_y_p,
col_index, col_index_last,
slim_bram_xy_addr, slim_bram_xy_bank,
- fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid
+ rcmb_fat_bram_xy_bank, rcmb_fat_bram_xy_addr, rcmb_fat_bram_x_dout, rcmb_fat_bram_y_dout, rcmb_fat_bram_xy_dout_valid,
+ rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid
);
@@ -36,11 +37,17 @@ module modexpng_part_recombinator
input [ 7:0] slim_bram_xy_addr;
input [ 1:0] slim_bram_xy_bank;
- output [ 2:0] fat_bram_xy_bank;
- output [ 7:0] fat_bram_xy_addr;
- output [ 17:0] fat_bram_x_dout;
- output [ 17:0] fat_bram_y_dout;
- output fat_bram_xy_dout_valid;
+ output [ 2:0] rcmb_fat_bram_xy_bank;
+ output [ 7:0] rcmb_fat_bram_xy_addr;
+ output [ 17:0] rcmb_fat_bram_x_dout;
+ output [ 17:0] rcmb_fat_bram_y_dout;
+ output rcmb_fat_bram_xy_dout_valid;
+
+ output [ 2:0] rcmb_slim_bram_xy_bank;
+ output [ 7:0] rcmb_slim_bram_xy_addr;
+ output [ 17:0] rcmb_slim_bram_x_dout;
+ output [ 17:0] rcmb_slim_bram_y_dout;
+ output rcmb_slim_bram_xy_dout_valid;
//
@@ -148,10 +155,10 @@ module modexpng_part_recombinator
if (ena_x && ena_y)
//
case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
- //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
- default: rcmb_mode <= 2'd0;
+ FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
+ FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
+ default: rcmb_mode <= 2'd0;
endcase
@@ -203,7 +210,7 @@ module modexpng_part_recombinator
input [1:0] slim_bram_xy_bank_value;
begin
//
- if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT)
+ if (slim_bram_xy_bank_value == BANK_SLIM_EXT)
calc_triangle_aux_lsb = 1'b1;
else
calc_triangle_aux_lsb = 1'b0;
@@ -216,6 +223,21 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_valid_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT;
+ else
+ calc_rectangle_valid_lsb = 1'b0;
+ //
+ end
+ endfunction
+
function [7:0] calc_square_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -265,6 +287,32 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function [7:0] calc_rectangle_bitmap_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
+ 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
+ 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
+ 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
+ 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
+ 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
+ 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
+ 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
+ endcase
+ //
+ else
+ calc_rectangle_bitmap_lsb = {8{1'b0}};
+ //
+ end
+ endfunction
function [2:0] calc_square_index_lsb;
input [4:0] col_index_value;
@@ -315,6 +363,32 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function [2:0] calc_rectangle_index_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+ //
+ case (slim_bram_xy_addr_value[2:0])
+ 3'b000: calc_rectangle_index_lsb = 3'd0;
+ 3'b001: calc_rectangle_index_lsb = 3'd1;
+ 3'b010: calc_rectangle_index_lsb = 3'd2;
+ 3'b011: calc_rectangle_index_lsb = 3'd3;
+ 3'b100: calc_rectangle_index_lsb = 3'd4;
+ 3'b101: calc_rectangle_index_lsb = 3'd5;
+ 3'b110: calc_rectangle_index_lsb = 3'd6;
+ 3'b111: calc_rectangle_index_lsb = 3'd7;
+ endcase
+ //
+ else
+ calc_rectangle_index_lsb = 3'dX;
+ //
+ end
+ endfunction
function calc_square_purge_lsb;
input [4:0] col_index_value;
@@ -330,6 +404,20 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_purge_lsb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ begin
+ //
+ if (slim_bram_xy_addr_value[7:3] == col_index_value)
+ calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
+ else
+ calc_rectangle_purge_lsb = 1'b0;
+ //
+ end
+ endfunction
+
function calc_square_valid_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -344,6 +432,22 @@ module modexpng_part_recombinator
//
end
endfunction
+
+ function calc_rectangle_valid_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+ calc_rectangle_valid_msb = 1'b1;
+ else
+ calc_rectangle_valid_msb = 1'b0;
+ //
+ end
+ endfunction
function [7:0] calc_square_bitmap_msb;
input [4:0] col_index_value;
@@ -361,6 +465,22 @@ module modexpng_part_recombinator
end
endfunction
+ function [7:0] calc_rectangle_bitmap_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin
+ calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
+ end else
+ calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
+ //
+ end
+ endfunction
+
function calc_square_purge_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
@@ -376,6 +496,22 @@ module modexpng_part_recombinator
end
endfunction
+ function calc_rectangle_purge_msb;
+ input [4:0] col_index_value;
+ input [4:0] col_index_last_value;
+ input [7:0] slim_bram_xy_addr_value;
+ input [1:0] slim_bram_xy_bank_value;
+ input [7:0] index_last_value;
+ begin
+ //
+ if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+ calc_rectangle_purge_msb = col_index_value == col_index_last_value;
+ else
+ calc_rectangle_purge_msb = 1'b0;
+ //
+ end
+ endfunction
+
reg recomb_lsb_ce = 1'b0;
reg recomb_lsb_ce_aux;
@@ -494,7 +630,24 @@ module modexpng_part_recombinator
xy_bitmap_msb_adv[6] <= {8{1'b0}};
xy_purge_msb_adv [6] <= 1'b0;
//
- end
+ end
+ //
+ FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+ //
+ xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_aux_lsb_adv [6] <= 1'b0;
+ xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+ xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
+ //
+ xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+ //
+ end
//
default: begin
//
@@ -586,15 +739,24 @@ module modexpng_part_recombinator
end
-
reg [ 2:0] fat_bram_xy_bank_reg;
reg [ 7:0] fat_bram_xy_addr_reg;
- reg [ 7:0] fat_bram_xy_cnt_lsb;
- reg [ 7:0] fat_bram_xy_cnt_msb;
reg [17:0] fat_bram_x_dout_reg;
reg [17:0] fat_bram_y_dout_reg;
reg fat_bram_xy_dout_valid_reg = 1'b0;
+ reg [ 2:0] slim_bram_xy_bank_reg;
+ reg [ 7:0] slim_bram_xy_addr_reg;
+ reg [17:0] slim_bram_x_dout_reg;
+ reg [17:0] slim_bram_y_dout_reg;
+ reg slim_bram_xy_dout_valid_reg = 1'b0;
+
+ reg [ 7:0] bram_xy_cnt_lsb;
+ reg [ 7:0] bram_xy_cnt_msb;
+
+ reg bram_xy_cnt_lsb_wrapped;
+ reg bram_xy_cnt_msb_wrapped;
+
reg [15:0] recomb_msb_dout_carry_0;
reg [15:0] recomb_msb_dout_carry_1;
@@ -606,11 +768,21 @@ module modexpng_part_recombinator
reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0;
reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0;
- assign fat_bram_xy_bank = fat_bram_xy_bank_reg;
- assign fat_bram_xy_addr = fat_bram_xy_addr_reg;
- assign fat_bram_x_dout = fat_bram_x_dout_reg;
- assign fat_bram_y_dout = fat_bram_y_dout_reg;
- assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+ reg recomb_msb_flag_delay_0;
+ reg recomb_msb_flag_delay_1;
+ reg recomb_msb_flag_delay_2;
+
+ assign rcmb_fat_bram_xy_bank = fat_bram_xy_bank_reg;
+ assign rcmb_fat_bram_xy_addr = fat_bram_xy_addr_reg;
+ assign rcmb_fat_bram_x_dout = fat_bram_x_dout_reg;
+ assign rcmb_fat_bram_y_dout = fat_bram_y_dout_reg;
+ assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+
+ assign rcmb_slim_bram_xy_bank = slim_bram_xy_bank_reg;
+ assign rcmb_slim_bram_xy_addr = slim_bram_xy_addr_reg;
+ assign rcmb_slim_bram_x_dout = slim_bram_x_dout_reg;
+ assign rcmb_slim_bram_y_dout = slim_bram_y_dout_reg;
+ assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg;
reg rdy_reg = 1'b1;
reg rdy_adv = 1'b1;
@@ -629,7 +801,9 @@ module modexpng_part_recombinator
task advance_recomb_msb_dout_delay;
input [15:0] dout;
input [ 7:0] cnt;
+ input flag;
begin
+ //
recomb_msb_dout_delay_0 <= dout;
recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
@@ -637,6 +811,11 @@ module modexpng_part_recombinator
recomb_msb_cnt_delay_0 <= cnt;
recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
+ //
+ recomb_msb_flag_delay_0 <= flag;
+ recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0;
+ recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1;
+ //
end
endtask
@@ -659,10 +838,24 @@ module modexpng_part_recombinator
fat_bram_xy_addr_reg <= addr;
fat_bram_x_dout_reg <= dout_x;
fat_bram_y_dout_reg <= dout_y;
- fat_bram_xy_dout_valid_reg <= 1'b1;
+ fat_bram_xy_dout_valid_reg <= valid;
end
endtask
+ task _update_slim_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ input valid;
+ begin
+ slim_bram_xy_bank_reg <= bank;
+ slim_bram_xy_addr_reg <= addr;
+ slim_bram_x_dout_reg <= dout_x;
+ slim_bram_y_dout_reg <= dout_y;
+ slim_bram_xy_dout_valid_reg <= valid;
+ end
+ endtask
task set_fat_bram_regs;
input [ 2:0] bank;
@@ -674,44 +867,73 @@ module modexpng_part_recombinator
end
endtask
+ task set_slim_bram_regs;
+ input [ 2:0] bank;
+ input [ 7:0] addr;
+ input [17:0] dout_x;
+ input [17:0] dout_y;
+ begin
+ _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
+ end
+ endtask
+
task clear_fat_bram_regs;
begin
_update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
+
+ task clear_slim_bram_regs;
+ begin
+ _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ end
+ endtask
- task _set_fat_bram_cnt_lsb;
+ task _set_bram_cnt_lsb;
input [7:0] cnt;
+ input wrapped;
begin
- fat_bram_xy_cnt_lsb <= cnt;
+ bram_xy_cnt_lsb <= cnt;
+ bram_xy_cnt_lsb_wrapped <= wrapped;
end
endtask
- task _set_fat_bram_cnt_msb;
+
+ task _set_bram_cnt_msb;
input [7:0] cnt;
+ input wrapped;
begin
- fat_bram_xy_cnt_msb <= cnt;
+ bram_xy_cnt_msb <= cnt;
+ bram_xy_cnt_msb_wrapped <= wrapped;
end
endtask
- task inc_fat_bram_cnt_lsb;
+ task inc_bram_cnt_lsb;
begin
- _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1);
+ if (bram_xy_cnt_lsb == index_last)
+ _set_bram_cnt_lsb(8'd0, 1'b1);
+ else
+ _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped);
end
endtask
- task inc_fat_bram_cnt_msb;
+
+ task inc_bram_cnt_msb;
begin
- _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1);
+ if (bram_xy_cnt_msb == index_last)
+ _set_bram_cnt_msb(8'd0, 1'b1);
+ else
+ _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped);
end
endtask
- task clr_fat_bram_cnt_lsb;
+ task clr_bram_cnt_lsb;
begin
- _set_fat_bram_cnt_lsb(8'd0);
+ _set_bram_cnt_lsb(8'd0, 1'b0);
end
endtask
- task clr_fat_bram_cnt_msb;
+
+ task clr_bram_cnt_msb;
begin
- _set_fat_bram_cnt_msb(8'd0);
+ _set_bram_cnt_msb(8'd0, 1'b0);
end
endtask
@@ -724,51 +946,53 @@ module modexpng_part_recombinator
always @(posedge clk)
//
if (ena_x & ena_y) begin
- clr_fat_bram_cnt_lsb();
- clr_fat_bram_cnt_msb();
+ clr_bram_cnt_lsb();
+ clr_bram_cnt_msb();
end else begin // if not ready???
//
case (rcmb_mode)
2'd1: recombine_square();
2'd2: recombine_triangle();
+ 2'd3: recombine_rectangle();
endcase
//
end
task recombine_square;
+ //
begin
//
case (rcmb_xy_dout_valid)
//
- 2'b01: inc_fat_bram_cnt_lsb();
- 2'b10: inc_fat_bram_cnt_msb();
+ 2'b01: inc_bram_cnt_lsb();
+ 2'b10: inc_bram_cnt_msb();
2'b11: begin
- if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb();
- else inc_fat_bram_cnt_lsb();
- inc_fat_bram_cnt_msb();
+ inc_bram_cnt_lsb();
+ inc_bram_cnt_msb();
end
//
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
- else clear_fat_bram_regs();
- 2'b01: set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
- else set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
- 2'b11: if (fat_bram_xy_cnt_lsb < index_last) set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
- else set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+ else clear_fat_bram_regs();
+ 2'b01: set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
+ else set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+ 2'b11: if (bram_xy_cnt_lsb_wrapped) set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
+ else set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ default: clear_fat_bram_regs(); // DEBUG!!!
//
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: if (recomb_msb_cnt_delay_2 > 8'd0) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0);
- 2'b10: if (fat_bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
- //
- 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb);
- if (fat_bram_xy_cnt_lsb < index_last) shift_recomb_msb_dout_carry({16{1'bX}});
+ 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ 2'b10: if (bram_xy_cnt_msb < 8'd2) shift_recomb_msb_dout_carry(recomb_msb_dout);
+// //
+ 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+ if (bram_xy_cnt_lsb_wrapped) shift_recomb_msb_dout_carry({16{1'bX}});
end
//
endcase
@@ -779,50 +1003,111 @@ module modexpng_part_recombinator
task recombine_triangle;
+ //
begin
//
case (rcmb_xy_dout_valid)
//
- 2'b01: begin inc_fat_bram_cnt_lsb();
- if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb();
- end
- //
+ 2'b01: inc_bram_cnt_lsb();
+ //
endcase
//
case (rcmb_xy_dout_valid)
//
- 2'b00: clear_fat_bram_regs();
- 2'b01: if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
- else set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b00: clear_slim_bram_regs();
+ 2'b01: if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ else set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: clear_slim_bram_regs();
+ 2'b11: clear_slim_bram_regs();
//
endcase
//
end
+ //
endtask
+
+ task recombine_rectangle;
+ //
+ begin
+ //
+ case (rcmb_xy_dout_valid)
+ //
+ 2'b01: inc_bram_cnt_lsb();
+ 2'b10: inc_bram_cnt_msb();
+ 2'b11: begin
+ inc_bram_cnt_lsb();
+ inc_bram_cnt_msb();
+ end
+ //
+ endcase
+// //
+ case (rcmb_xy_dout_valid)
+// //
+ 2'b00: if (recomb_msb_flag_delay_2) set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+ else clear_fat_bram_regs();
+ 2'b01: set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+ 2'b10: if (!bram_xy_cnt_msb_wrapped) begin
+ if (bram_xy_cnt_msb < 8'd2) clear_fat_bram_regs();
+ else set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+ end else
+ set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}});
+
+ 2'b11: set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}});
+// //
+ endcase
+// //
+ case (rcmb_xy_dout_valid)
+// //
+ 2'b00: if (recomb_msb_flag_delay_2) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ 2'b10: begin
+ if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped) shift_recomb_msb_dout_carry(recomb_msb_dout);
+ if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+ end
+// //
+ 2'b11: begin advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+ shift_recomb_msb_dout_carry({16{1'bX}});
+ end
+// //
+ endcase
+ //
+ end
+ //
+ endtask
always @(posedge clk)
//
if (ena_x & ena_y) begin
rdy_adv <= 1'b0;
- end else begin
+ end else if (!rdy_reg) begin
//
- case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid})
+ case (rcmb_mode)
+ //
+ 2'd1: case (rcmb_xy_dout_valid)
+ //
+ 2'b00: begin
+ //
+ if (recomb_msb_flag_delay_2) begin
+ //
+ rdy_adv <= ~recomb_msb_flag_delay_1;
+ //
+ end
+ //
+ end
+ endcase
+ //
+ 2'd2: case (rcmb_xy_dout_valid)
+ //
+ 2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped; //
+ //
+ endcase
//
- 2'b00: begin
- //
- if (recomb_msb_cnt_delay_2 > 8'd0) begin
- //
- rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0;
- //
- end
- //
- end
endcase
//
end
+ // add ready for mode=3
endmodule
More information about the Commits
mailing list