[Cryptech-Commits] [user/shatov/modexpng] branch master updated: Redesigned core architecture, unified bank structure. All storage blocks now have eight 4kbit entries and occupy one 36K BRAM tile.
git at cryptech.is
git at cryptech.is
Tue Oct 1 13:20:31 UTC 2019
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.
The following commit(s) were added to refs/heads/master by this push:
new 71f7025 Redesigned core architecture, unified bank structure. All storage blocks now have eight 4kbit entries and occupy one 36K BRAM tile.
71f7025 is described below
commit 71f70252dfc7e41103dde420a721be8aa48486d5
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Oct 1 16:18:33 2019 +0300
Redesigned core architecture, unified bank structure. All storage blocks now
have eight 4kbit entries and occupy one 36K BRAM tile.
---
rtl/_modexpng_mmm_dual_x8.v | 961 +++++++++++++++++++++
rtl/_modexpng_mmm_fsm.vh | 43 +
...ator_block.v => _modexpng_recombinator_block.v} | 306 +++----
rtl/_modexpng_recombinator_cell.v | 40 +
rtl/{modexpng_reductor.v => _modexpng_reductor.v} | 158 ++--
...g_storage_block.v => _modexpng_storage_block.v} | 107 ++-
...orage_manager.v => _modexpng_storage_manager.v} | 129 ++-
rtl/dev/temp.txt | 384 --------
rtl/dsp/dsp_array.v | 143 ---
rtl/dsp/dsp_slice.v | 125 ---
rtl/modexpng_dsp48e1.vh | 8 +
rtl/modexpng_dsp_array_block.v | 72 +-
rtl/modexpng_dsp_slice_wrapper.v | 72 +-
...{modexpng_mmm_dual_x8.v => modexpng_mmm_dual.v} | 51 +-
rtl/modexpng_mmm_fsm_old.vh | 43 -
rtl/modexpng_parameters.vh | 71 ++
rtl/modexpng_parameters_old.vh | 40 -
rtl/modexpng_parameters_x8.vh | 4 +
rtl/modexpng_parameters_x8_old.vh | 1 -
rtl/modexpng_recombinator_block.v | 90 +-
rtl/modexpng_recombinator_cell.v | 35 -
rtl/modexpng_reductor.v | 36 +-
rtl/modexpng_sdp_36k_wrapper.v | 72 ++
rtl/modexpng_storage_block.v | 136 ++-
rtl/modexpng_storage_manager.v | 109 ++-
25 files changed, 1831 insertions(+), 1405 deletions(-)
diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v
new file mode 100644
index 0000000..ffd5ccf
--- /dev/null
+++ b/rtl/_modexpng_mmm_dual_x8.v
@@ -0,0 +1,961 @@
+module modexpng_mmm_dual_x8
+(
+ clk, rst,
+
+ ena, rdy,
+
+ ladder_mode,
+ word_index_last,
+ word_index_last_minus1,
+
+ sel_wide_in,
+ sel_narrow_in,
+ sel_wide_out,
+ sel_narrow_out,
+
+ rd_wide_xy_ena,
+ rd_wide_xy_ena_aux,
+ rd_wide_xy_bank,
+ rd_wide_xy_bank_aux,
+ rd_wide_xy_addr,
+ rd_wide_xy_addr_aux,
+ rd_wide_x_dout,
+ rd_wide_y_dout,
+ rd_wide_x_dout_aux,
+ rd_wide_y_dout_aux,
+
+ rd_narrow_xy_ena,
+ rd_narrow_xy_bank,
+ rd_narrow_xy_addr,
+ rd_narrow_x_dout,
+ rd_narrow_y_dout,
+
+ rcmb_wide_xy_bank,
+ rcmb_wide_xy_addr,
+ rcmb_wide_x_dout,
+ rcmb_wide_y_dout,
+ rcmb_wide_xy_valid,
+
+ rcmb_narrow_xy_bank,
+ rcmb_narrow_xy_addr,
+ rcmb_narrow_x_dout,
+ rcmb_narrow_y_dout,
+ rcmb_narrow_xy_valid,
+
+ rcmb_xy_bank,
+ rcmb_xy_addr,
+ rcmb_x_dout,
+ rcmb_y_dout,
+ rcmb_xy_valid,
+
+ rdct_ena, rdct_rdy
+);
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_mmm_fsm.vh"
+ `include "../rtl/modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input rst;
+
+ input ena;
+ output rdy;
+
+ input ladder_mode;
+ input [ OP_ADDR_W -1:0] word_index_last;
+ input [ OP_ADDR_W -1:0] word_index_last_minus1;
+
+ input [ BANK_ADDR_W -1:0] sel_wide_in;
+ input [ BANK_ADDR_W -1:0] sel_narrow_in;
+ input [ BANK_ADDR_W -1:0] sel_wide_out;
+ input [ BANK_ADDR_W -1:0] sel_narrow_out;
+
+ output rd_wide_xy_ena;
+ output rd_wide_xy_ena_aux;
+ output [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
+ output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
+ input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
+ input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
+
+ output rd_narrow_xy_ena;
+ output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] rd_narrow_x_dout;
+ input [ WORD_EXT_W -1:0] rd_narrow_y_dout;
+
+ output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+ output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+ output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
+ output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
+ output rcmb_wide_xy_valid;
+
+ output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
+ output rcmb_narrow_xy_valid;
+
+ output [ BANK_ADDR_W -1:0] rcmb_xy_bank;
+ output [ OP_ADDR_W -1:0] rcmb_xy_addr;
+ output [ WORD_EXT_W -1:0] rcmb_x_dout;
+ output [ WORD_EXT_W -1:0] rcmb_y_dout;
+ output rcmb_xy_valid;
+
+ output rdct_ena;
+ input rdct_rdy;
+
+
+ //
+ // FSM Declaration
+ //
+ reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE;
+ reg [MMM_FSM_STATE_W-1:0] fsm_state_next;
+
+ wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square;
+ wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle;
+ wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle;
+
+
+ //
+ // FSM Process
+ //
+ always @(posedge clk)
+ //
+ if (rst) fsm_state <= MMM_FSM_STATE_IDLE;
+ else fsm_state <= fsm_state_next;
+
+
+ //
+ // Storage Control Interface
+ //
+ reg wide_xy_ena = 1'b0;
+ reg wide_xy_ena_aux = 1'b0;
+ reg [ BANK_ADDR_W -1:0] wide_xy_bank;
+ reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux;
+ reg [OP_ADDR_W -1:0] wide_xy_addr[0:3];
+ reg [OP_ADDR_W -1:0] wide_xy_addr_aux;
+
+ reg narrow_xy_ena = 1'b0;
+ reg [ BANK_ADDR_W -1:0] narrow_xy_bank;
+ reg [OP_ADDR_W -1:0] narrow_xy_addr;
+ reg [OP_ADDR_W -1:0] narrow_xy_addr_dly;
+
+ assign rd_wide_xy_ena = wide_xy_ena;
+ assign rd_wide_xy_ena_aux = wide_xy_ena_aux;
+ assign rd_wide_xy_bank = wide_xy_bank;
+ assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
+ assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
+
+ assign rd_narrow_xy_ena = narrow_xy_ena;
+ assign rd_narrow_xy_bank = narrow_xy_bank;
+ assign rd_narrow_xy_addr = narrow_xy_addr;
+
+ genvar z;
+ generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
+ begin : gen_rd_wide_xy_addr
+ assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
+ end
+ endgenerate
+
+ //
+ // Column Counter
+ //
+ reg [4:0] col_index; // current column index
+ reg [4:0] col_index_prev; // delayed column index value
+ reg [4:0] col_index_last; // index of the very last column
+ reg [4:0] col_index_next; // precomputed next column index
+ reg col_is_last; // flag set during the very last column
+
+ always @(posedge clk)
+ //
+ col_index_prev <= col_index;
+
+ //
+ // Column Counter Increment Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
+ col_index <= 5'd0;
+ col_index_last <= word_index_last[7:3];
+ col_index_next <= 5'd1;
+ col_is_last <= 1'b0;
+
+ end
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
+ col_index <= col_index_next;
+ col_is_last <= col_index_next == col_index_last;
+ col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;
+ end
+ //
+ endcase
+
+
+ //
+ // Completion Flags
+ //
+ wire square_almost_done_comb;
+ reg square_almost_done_flop = 1'b0;
+ reg square_surely_done_flop = 1'b0;
+
+ wire triangle_almost_done_comb;
+ reg triangle_almost_done_flop = 1'b0;
+ reg triangle_surely_done_flop = 1'b0;
+ reg triangle_tardy_done_flop = 1'b0;
+
+ wire rectangle_almost_done_comb;
+ reg rectangle_almost_done_flop = 1'b0;
+ reg rectangle_surely_done_flop = 1'b0;
+ reg rectangle_tardy_done_flop = 1'b0;
+
+ assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
+ assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index);
+ assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
+
+ //
+ // Square Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:
+ square_almost_done_flop <= square_almost_done_comb;
+ //
+ default:
+ square_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ square_surely_done_flop <= square_almost_done_flop;
+ //
+ end
+
+ //
+ // Triangle Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
+ triangle_almost_done_flop <= triangle_almost_done_comb;
+ //
+ default:
+ triangle_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ triangle_surely_done_flop <= triangle_almost_done_flop;
+ triangle_tardy_done_flop <= triangle_surely_done_flop;
+ //
+ end
+
+ //
+ // Rectangle Completion Flags
+ //
+ always @(posedge clk) begin
+ //
+ case (fsm_state)
+ //
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+ rectangle_almost_done_flop <= rectangle_almost_done_comb;
+ //
+ default:
+ rectangle_almost_done_flop <= 1'b0;
+ //
+ endcase
+ //
+ rectangle_surely_done_flop <= rectangle_almost_done_flop;
+ rectangle_tardy_done_flop <= rectangle_surely_done_flop;
+ //
+ end
+
+
+ //
+ // Narrow Storage Control Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) narrow_xy_ena <= 1'b0;
+ else begin
+ //
+ // Narrow Address
+ //
+ case (fsm_state_next)
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
+ //
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
+ 8'd0 : narrow_xy_addr + 1'b1;
+ //
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+ 8'd1 : narrow_xy_addr + 1'b1;
+ //
+ default: narrow_xy_addr <= 8'dX;
+ //
+ endcase
+ //
+ // Narrow Bank
+ //
+ case (fsm_state_next)
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
+ //
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
+ BANK_NARROW_EXT : BANK_NARROW_COEFF;
+ //
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+ BANK_NARROW_EXT : BANK_NARROW_Q;
+ //
+ default: narrow_xy_bank <= 2'bXX;
+ //
+ endcase
+ //
+ case (fsm_state_next)
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
+ //
+ default: narrow_xy_ena <= 1'b0;
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Wide Storage Control Logic
+ //
+
+ wire [2:0] wide_offset_rom[0:3];
+
+ generate for (z=1; z<NUM_MULTS; z=z+2)
+ begin : gen_wide_offset_rom
+ assign wide_offset_rom[(z-1)/2] = z[2:0];
+ end
+ endgenerate
+
+ function [7:0] wide_xy_addr_next;
+ input [7:0] wide_xy_addr_current;
+ input [7:0] wide_xy_addr_last;
+ begin
+ if (wide_xy_addr_current > 8'd0)
+ wide_xy_addr_next = wide_xy_addr_current - 1'b1;
+ else
+ wide_xy_addr_next = wide_xy_addr_last;
+ end
+ endfunction
+
+ integer j;
+ always @(posedge clk)
+ //
+ if (rst) begin
+ wide_xy_ena <= 1'b0;
+ wide_xy_ena_aux <= 1'b0;
+ end else begin
+ //
+ // Wide Address
+ //
+ for (j=0; j<(NUM_MULTS/2); j=j+1)
+ //
+ case (fsm_state_next)
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+ //
+ default: wide_xy_addr[j] <= 8'dX;
+ endcase
+ //
+ // Wide Aux Address
+ //
+ case (fsm_state_next)
+ //
+ // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+ //
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+ //
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+ //
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
+ //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
+ //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
+ //
+ default: wide_xy_addr_aux <= 8'dX;
+ endcase
+ //
+ // Wide Bank
+ //
+ case (fsm_state_next)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; // ? combine ?
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
+ default: wide_xy_bank <= 3'bXXX;
+ endcase
+ //
+ // Wide Aux Bank
+ //
+ case (fsm_state_next)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
+ case (rcmb_xy_bank)
+ BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
+ BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
+ //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
+ default: wide_xy_bank_aux <= 3'bXXX;
+ endcase
+ else wide_xy_bank_aux <= 3'bXXX;
+ default: wide_xy_bank_aux <= 3'bXXX;
+ endcase
+ //
+ // Wide Enable
+ //
+ case (fsm_state_next)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
+ default: wide_xy_ena <= 1'b0;
+ endcase
+ //
+ // Wide Aux Enable
+ //
+ case (fsm_state_next)
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
+ default: wide_xy_ena_aux <= 1'b0;
+ endcase
+ //
+ end
+
+
+ //
+ // Delay Lines
+ //
+ always @(posedge clk)
+ //
+ narrow_xy_addr_dly <= narrow_xy_addr;
+
+
+ //
+ // DSP Array Logic
+ //
+ reg dsp_xy_ce_a = 1'b0;
+ reg dsp_xy_ce_b = 1'b0;
+ reg dsp_xy_ce_b_dly = 1'b0;
+ reg dsp_xy_ce_m = 1'b0;
+ reg dsp_xy_ce_p = 1'b0;
+ reg dsp_xy_ce_mode = 1'b0;
+
+ reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}};
+
+ wire [5*18-1:0] dsp_x_a;
+ wire [5*18-1:0] dsp_y_a;
+
+ reg [1*16-1:0] dsp_x_b;
+ reg [1*16-1:0] dsp_y_b;
+
+ reg [ 1:0] dsp_xy_b_carry;
+
+ wire [9*47-1:0] dsp_x_p;
+ wire [9*47-1:0] dsp_y_p;
+
+ //generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ //begin : gen_dsp_xy_a_split
+ //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z];
+ //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z];
+ //end
+ //endgenerate
+
+ assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
+ assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
+
+ //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
+ //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
+
+ always @(posedge clk)
+ //
+ dsp_xy_ce_b_dly <= dsp_xy_ce_b;
+
+
+ modexpng_dsp_array_block dsp_array_block_x
+ (
+ .clk (clk),
+
+ .ce_a (dsp_xy_ce_a),
+ .ce_b (dsp_xy_ce_b),
+ .ce_m (dsp_xy_ce_m),
+ .ce_p (dsp_xy_ce_p),
+ .ce_mode (dsp_xy_ce_mode),
+
+ .mode_z (dsp_xy_mode_z),
+
+ .a (dsp_x_a),
+ .b (dsp_x_b),
+ .p (dsp_x_p)
+ );
+
+ modexpng_dsp_array_block dsp_array_block_y
+ (
+ .clk (clk),
+
+ .ce_a (dsp_xy_ce_a),
+ .ce_b (dsp_xy_ce_b),
+ .ce_m (dsp_xy_ce_m),
+ .ce_p (dsp_xy_ce_p),
+ .ce_mode (dsp_xy_ce_mode),
+
+ .mode_z (dsp_xy_mode_z),
+
+ .a (dsp_y_a),
+ .b (dsp_y_b),
+ .p (dsp_y_p)
+ );
+
+
+
+
+ //
+ // DSP Control Logic
+ //
+ reg narrow_xy_ena_dly1 = 1'b0;
+ reg narrow_xy_ena_dly2 = 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ narrow_xy_ena_dly1 <= 1'b0;
+ narrow_xy_ena_dly2 <= 1'b0;
+ //
+ dsp_xy_ce_a <= 1'b0;
+ dsp_xy_ce_b <= 1'b0;
+ dsp_xy_ce_m <= 1'b0;
+ dsp_xy_ce_p <= 1'b0;
+ dsp_xy_ce_mode <= 1'b0;
+ //
+ end else begin
+ //
+ narrow_xy_ena_dly1 <= narrow_xy_ena;
+ narrow_xy_ena_dly2 <= narrow_xy_ena_dly1;
+ //
+ dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
+ dsp_xy_ce_b <= narrow_xy_ena_dly2;
+ dsp_xy_ce_m <= dsp_xy_ce_b_dly;
+ dsp_xy_ce_p <= dsp_xy_ce_m;
+ dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
+ //
+ end
+
+ //
+ // DSP Feed Logic
+ //
+ reg dsp_merge_xy_b;
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
+ endcase
+
+ //
+ // On-the-fly Carry Recombination
+ //
+ wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
+ wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
+
+ always @(posedge clk)
+ //
+ if (narrow_xy_ena_dly2) begin // rewrite
+ //
+ if (!dsp_merge_xy_b) begin
+ dsp_x_b <= rd_narrow_x_dout[15:0];
+ dsp_y_b <= rd_narrow_y_dout[15:0];
+ dsp_xy_b_carry <= 2'b00;
+ end else begin
+ dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0];
+ dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0];
+ dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
+ end
+ //
+ end else begin
+ //
+ dsp_x_b <= {16{1'bX}};
+ dsp_y_b <= {16{1'bX}};
+ //
+ dsp_xy_b_carry <= 2'b00;
+ //
+ end
+
+
+ reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
+ reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
+
+ function [NUM_MULTS:0] calc_mac_mode_z_square;
+ input [ 4:0] col_index_value;
+ input [ 7:0] narrow_xy_addr_value;
+ begin
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+ endcase
+ else
+ calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+ end
+ endfunction
+
+ function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
+ input [ 4:0] col_index_value;
+ input [ 7:0] narrow_xy_addr_value;
+ begin
+ if (narrow_xy_addr_value[7:3] == col_index_value)
+ case (narrow_xy_addr_value[2:0])
+ 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
+ 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
+ 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
+ 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
+ 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
+ 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
+ 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
+ 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
+ endcase
+ else
+ calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
+ end
+ endfunction
+
+ always @(posedge clk)
+ //
+ case (fsm_state_next)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
+ default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
+ endcase
+
+ always @(posedge clk) begin
+ dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
+ //
+ dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
+ dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
+ dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
+ end
+
+
+
+
+
+ //
+ // Recombinator
+ //
+ reg rcmb_ena = 1'b0;
+ wire rcmb_rdy;
+
+ modexpng_recombinator_block recombinator_block
+ (
+ .clk (clk),
+ .rst (rst),
+
+ .ena (rcmb_ena),
+ .rdy (rcmb_rdy),
+
+ .mmm_fsm_state_next (fsm_state_next),
+
+ .word_index_last (word_index_last),
+
+ .dsp_xy_ce_p (dsp_xy_ce_p),
+ .dsp_x_p (dsp_x_p),
+ .dsp_y_p (dsp_y_p),
+
+ .col_index (col_index),
+ .col_index_last (col_index_last),
+
+ .rd_narrow_xy_addr (narrow_xy_addr),
+ .rd_narrow_xy_bank (narrow_xy_bank),
+
+ .rcmb_wide_xy_bank (rcmb_wide_xy_bank),
+ .rcmb_wide_xy_addr (rcmb_wide_xy_addr),
+ .rcmb_wide_x_dout (rcmb_wide_x_dout),
+ .rcmb_wide_y_dout (rcmb_wide_y_dout),
+ .rcmb_wide_xy_valid (rcmb_wide_xy_valid),
+
+ .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
+ .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
+ .rcmb_narrow_x_dout (rcmb_narrow_x_dout),
+ .rcmb_narrow_y_dout (rcmb_narrow_y_dout),
+ .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid),
+
+ .rdct_narrow_xy_bank (rcmb_xy_bank),
+ .rdct_narrow_xy_addr (rcmb_xy_addr),
+ .rdct_narrow_x_dout (rcmb_x_dout),
+ .rdct_narrow_y_dout (rcmb_y_dout),
+ .rdct_narrow_xy_valid (rcmb_xy_valid)
+
+ );
+
+
+ //
+ // Recombinator Enable Logic
+ //
+ always @(posedge clk)
+ //
+ if (rst) rcmb_ena <= 1'b0;
+ else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
+
+
+ //
+ // Handy Completion Flags
+ //
+ wire square_done = square_surely_done_flop;
+ wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
+ wire rectangle_done = rectangle_tardy_done_flop;
+
+
+ //
+ // FSM Transition Logic
+ //
+ assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT;
+ assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+
+ always @* begin
+ //
+ fsm_state_next = MMM_FSM_STATE_IDLE;
+ //
+ case (fsm_state)
+ MMM_FSM_STATE_IDLE: fsm_state_next = ena ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE;
+
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+
+ MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
+
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
+
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
+
+ MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
+
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+
+ MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+
+ default: fsm_state_next = MMM_FSM_STATE_IDLE ;
+
+ endcase
+ //
+ end
+
+
+ //
+ // Reductor Control Logic
+ //
+ reg rdct_ena_reg = 1'b0;
+
+ assign rdct_ena = rdct_ena_reg;
+
+ always @(posedge clk) // add reset!!!
+ //
+ case (fsm_state)
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
+ default: rdct_ena_reg <= 1'b0;
+ endcase
+
+
+
+endmodule
diff --git a/rtl/_modexpng_mmm_fsm.vh b/rtl/_modexpng_mmm_fsm.vh
new file mode 100644
index 0000000..1c2a57b
--- /dev/null
+++ b/rtl/_modexpng_mmm_fsm.vh
@@ -0,0 +1,43 @@
+localparam MMM_FSM_STATE_W = 32;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE = 0;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1 = 1;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2 = 2;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3 = 3;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1 = 4;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2 = 5;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3 = 6;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT = 11;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT = 14;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF = 17;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP = 40;
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v
similarity index 81%
copy from rtl/modexpng_recombinator_block.v
copy to rtl/_modexpng_recombinator_block.v
index d6b1ad1..61bf734 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/_modexpng_recombinator_block.v
@@ -2,7 +2,7 @@ module modexpng_recombinator_block
(
clk, rst,
ena, rdy,
- fsm_state_next,
+ mmm_fsm_state_next,
word_index_last,
dsp_xy_ce_p,
dsp_x_p, dsp_y_p,
@@ -17,63 +17,62 @@ module modexpng_recombinator_block
//
// Headers
//
- `include "../rtl_1/modexpng_mmm_fsm_old.vh"
- `include "../rtl_1/modexpng_parameters_old.vh"
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
-
-
- input clk;
- input rst;
- input ena;
- output rdy;
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;
- input [7:0] word_index_last;
- input dsp_xy_ce_p;
- input [9*47-1:0] dsp_x_p;
- input [9*47-1:0] dsp_y_p;
- input [ 4:0] col_index;
- input [ 4:0] col_index_last;
+ `include "../rtl/modexpng_parameters.vh"
+ `include "../rtl/modexpng_mmm_fsm.vh"
+
+
+ input clk;
+ input rst;
+ input ena;
+ output rdy;
+ input [ MMM_FSM_STATE_W -1:0] mmm_fsm_state_next;
+ input [ OP_ADDR_W -1:0] word_index_last;
+ input dsp_xy_ce_p;
+ input [(NUM_MULTS+1) * MAC_W -1:0] dsp_x_p;
+ input [(NUM_MULTS+1) * MAC_W -1:0] dsp_y_p;
+ input [ COL_INDEX_W -1:0] col_index;
+ input [ COL_INDEX_W -1:0] col_index_last;
- input [ 7:0] rd_narrow_xy_addr;
- input [ 1:0] rd_narrow_xy_bank;
+ input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
- output [ 1:0] rcmb_wide_xy_bank;
- output [ 7:0] rcmb_wide_xy_addr;
- output [ 17:0] rcmb_wide_x_dout;
- output [ 17:0] rcmb_wide_y_dout;
- output rcmb_wide_xy_valid;
+ output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+ output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+ output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
+ output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
+ output rcmb_wide_xy_valid;
- output [ 1:0] rcmb_narrow_xy_bank;
- output [ 7:0] rcmb_narrow_xy_addr;
- output [ 17:0] rcmb_narrow_x_dout;
- output [ 17:0] rcmb_narrow_y_dout;
- output rcmb_narrow_xy_valid;
+ output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
+ output rcmb_narrow_xy_valid;
- output [ 1:0] rdct_narrow_xy_bank;
- output [ 7:0] rdct_narrow_xy_addr;
- output [ 17:0] rdct_narrow_x_dout;
- output [ 17:0] rdct_narrow_y_dout;
- output rdct_narrow_xy_valid;
+ output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
+ output rdct_narrow_xy_valid;
//
// Latches
//
- reg [1*47-1:0] dsp_x_p_latch[0:8];
- reg [1*47-1:0] dsp_y_p_latch[0:8];
+ reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS];
+ reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS];
//
// Mapping
//
- wire [46:0] dsp_x_p_split[0:8];
- wire [46:0] dsp_y_p_split[0:8];
+ wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS];
+ wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS];
genvar z;
- generate for (z=0; z<(NUM_MULTS+1); z=z+1)
+ generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
begin : gen_dsp_xy_p_split
- assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
- assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
+ assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
+ assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
end
endgenerate
@@ -99,8 +98,8 @@ module modexpng_recombinator_block
reg xy_valid_msb = 1'b0;
// bitmap
- reg [7:0] xy_bitmap_lsb = {8{1'b0}};
- reg [7:0] xy_bitmap_msb = {8{1'b0}};
+ reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
+ reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
// index
reg [2:0] xy_index_lsb = 3'dX;
@@ -116,11 +115,11 @@ module modexpng_recombinator_block
reg xy_aux_latch_lsb = 1'b0;
// bitmap - latch
- reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}};
- reg [7:0] xy_bitmap_latch_msb = {8{1'b0}};
+ reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = MULT_BITMAP_ZEROES;
+ reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = MULT_BITMAP_ZEROES;
// index - latch
- reg [2:0] xy_index_latch_lsb = 3'dX;
+ reg [MAC_INDEX_W-1:0] xy_index_latch_lsb = MAC_INDEX_DONT_CARE;
// purge - index
reg xy_purge_latch_lsb = 1'b0;
@@ -130,10 +129,10 @@ module modexpng_recombinator_block
reg xy_valid_lsb_adv[1:6];
reg xy_valid_msb_adv[1:6];
reg xy_aux_lsb_adv[1:6];
- reg [7:0] xy_bitmap_lsb_adv[1:6];
- reg [7:0] xy_bitmap_msb_adv[1:6];
- reg [2:0] xy_index_lsb_adv[1:6];
- reg [2:0] xy_index_msb_adv[1:6];
+ reg [NUM_MULTS-1:0] xy_bitmap_lsb_adv[1:6];
+ reg [NUM_MULTS-1:0] xy_bitmap_msb_adv[1:6];
+ reg [MAC_INDEX_W-1:0] xy_index_lsb_adv[1:6];
+ reg [MAC_INDEX_W-1:0] xy_index_msb_adv[1:6];
reg xy_purge_lsb_adv[1:6];
reg xy_purge_msb_adv[1:6];
@@ -143,10 +142,10 @@ module modexpng_recombinator_block
//
if (ena)
//
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
+ case (mmm_fsm_state_next)
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
default: rcmb_mode <= 2'd0;
endcase
@@ -165,10 +164,11 @@ module modexpng_recombinator_block
end
function calc_square_triangle_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
+ //
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
+ input [ OP_ADDR_W-1:0] narrow_xy_addr_value;
begin
//
if (narrow_xy_addr_value[7:3] == col_index_value)
@@ -179,20 +179,20 @@ module modexpng_recombinator_block
end
endfunction
- function calc_square_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
+ function calc_square_valid_lsb;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
+ input [ OP_ADDR_W-1:0] narrow_xy_addr_value;
begin
calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
end
endfunction
function calc_triangle_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -200,9 +200,9 @@ module modexpng_recombinator_block
endfunction
function calc_rectangle_valid_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -215,9 +215,9 @@ module modexpng_recombinator_block
endfunction
function calc_triangle_aux_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -230,9 +230,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_square_triangle_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -256,9 +256,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_square_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -266,9 +266,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_triangle_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -276,9 +276,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_rectangle_bitmap_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -311,9 +311,9 @@ module modexpng_recombinator_block
*/
function [2:0] calc_square_triangle_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -337,9 +337,9 @@ module modexpng_recombinator_block
endfunction
function [2:0] calc_square_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -347,9 +347,9 @@ module modexpng_recombinator_block
endfunction
function [2:0] calc_triangle_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -357,9 +357,9 @@ module modexpng_recombinator_block
endfunction
function [2:0] calc_rectangle_index_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] slim_bram_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] slim_bram_xy_bank_value;
input [7:0] slim_bram_xy_addr_value;
begin
//
@@ -383,9 +383,9 @@ module modexpng_recombinator_block
endfunction
function calc_square_rectangle_purge_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -398,9 +398,9 @@ module modexpng_recombinator_block
endfunction
function calc_square_purge_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -408,9 +408,9 @@ module modexpng_recombinator_block
endfunction
function calc_rectangle_purge_lsb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -418,9 +418,9 @@ module modexpng_recombinator_block
endfunction
function calc_square_valid_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -434,9 +434,9 @@ module modexpng_recombinator_block
endfunction
function calc_rectangle_valid_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -450,9 +450,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_square_bitmap_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -467,9 +467,9 @@ module modexpng_recombinator_block
endfunction
function [7:0] calc_rectangle_bitmap_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -483,9 +483,9 @@ module modexpng_recombinator_block
endfunction
function calc_square_purge_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -499,9 +499,9 @@ module modexpng_recombinator_block
endfunction
function calc_rectangle_purge_msb;
- input [4:0] col_index_value;
- input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [COL_INDEX_W-1:0] col_index_value;
+ input [COL_INDEX_W-1:0] col_index_last_value;
+ input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -626,12 +626,12 @@ module modexpng_recombinator_block
always @(posedge clk)
//
- case (fsm_state_next)
+ case (mmm_fsm_state_next)
//
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= 1'b0;
@@ -645,10 +645,10 @@ module modexpng_recombinator_block
//
end
//
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
@@ -662,10 +662,10 @@ module modexpng_recombinator_block
//
end
//
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+ MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
//
xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
xy_aux_lsb_adv [6] <= 1'b0;
@@ -783,19 +783,19 @@ module modexpng_recombinator_block
end
- reg [ 1:0] wide_xy_bank;
+ reg [BANK_ADDR_W-1:0] wide_xy_bank;
reg [ 7:0] wide_xy_addr;
reg [17:0] wide_x_dout;
reg [17:0] wide_y_dout;
reg wide_xy_valid = 1'b0;
- reg [ 1:0] narrow_xy_bank;
+ reg [BANK_ADDR_W-1:0] narrow_xy_bank;
reg [ 7:0] narrow_xy_addr;
reg [17:0] narrow_x_dout;
reg [17:0] narrow_y_dout;
reg narrow_xy_valid = 1'b0;
- reg [ 1:0] rdct_xy_bank;
+ reg [BANK_ADDR_W-1:0] rdct_xy_bank;
reg [ 7:0] rdct_xy_addr;
reg [17:0] rdct_x_dout;
reg [17:0] rdct_y_dout;
@@ -883,7 +883,7 @@ module modexpng_recombinator_block
endtask
task _update_wide;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -898,7 +898,7 @@ module modexpng_recombinator_block
endtask
task _update_narrow;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -913,7 +913,7 @@ module modexpng_recombinator_block
endtask
task _update_rdct;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -928,7 +928,7 @@ module modexpng_recombinator_block
endtask
task set_wide;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -938,7 +938,7 @@ module modexpng_recombinator_block
endtask
task set_narrow;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -948,7 +948,7 @@ module modexpng_recombinator_block
endtask
task set_rdct;
- input [ 1:0] bank;
+ input [BANK_ADDR_W-1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -959,19 +959,19 @@ module modexpng_recombinator_block
task clear_wide;
begin
- _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_wide(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
task clear_narrow;
begin
- _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_narrow(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
task clear_rdct;
begin
- _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_rdct(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
end
endtask
@@ -1074,16 +1074,16 @@ module modexpng_recombinator_block
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
else clear_wide;
//
- 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: if (cnt_msb < 8'd2) clear_wide;
- else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+ else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
- 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
- else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
+ else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
endcase
//
diff --git a/rtl/_modexpng_recombinator_cell.v b/rtl/_modexpng_recombinator_cell.v
new file mode 100644
index 0000000..b72395e
--- /dev/null
+++ b/rtl/_modexpng_recombinator_cell.v
@@ -0,0 +1,40 @@
+module modexpng_recombinator_cell
+(
+ clk,
+ ce, clr,
+ din, dout
+);
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+ //
+ // Ports
+ //
+ input clk;
+ input ce;
+ input clr;
+ input [ MAC_W -1:0] din;
+ output [WORD_W -1:0] dout;
+
+ reg [WORD_W -2:0] z;
+ reg [WORD_W :0] y;
+ reg [WORD_W +1:0] x;
+
+ assign dout = x[WORD_W-1:0];
+
+ wire [WORD_W -2:0] din_z = din[3*WORD_W -2 :2*WORD_W]; // [46:32]
+ wire [WORD_W -1:0] din_y = din[2*WORD_W -1 : WORD_W]; // [31:16]
+ wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0]
+
+ always @(posedge clk)
+ //
+ if (ce) begin
+ z <= din_z;
+ y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z};
+ x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_NULL, x[WORD_EXT_W-1:WORD_W]};
+ end
+
+endmodule
diff --git a/rtl/modexpng_reductor.v b/rtl/_modexpng_reductor.v
similarity index 58%
copy from rtl/modexpng_reductor.v
copy to rtl/_modexpng_reductor.v
index 0f5e461..25cf394 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/_modexpng_reductor.v
@@ -2,58 +2,41 @@ module modexpng_reductor
(
clk, rst,
ena, rdy,
- //fsm_state_next,
word_index_last,
- //dsp_xy_ce_p,
- //dsp_x_p, dsp_y_p,
- //col_index, col_index_last,
rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
- //rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid
);
-
//
// Headers
//
- //`include "../rtl_1/modexpng_mmm_fsm.vh"
- `include "../rtl_1/modexpng_parameters_old.vh"
- //`include "../rtl_1/modexpng_parameters_x8.vh"
+ `include "../rtl/modexpng_parameters.vh"
- input clk;
- input rst;
- input ena;
- output rdy;
- /*
- input [FSM_STATE_WIDTH-1:0] fsm_state_next;*/
- input [7:0] word_index_last;/*
- input dsp_xy_ce_p;
- *//*
- input [9*47-1:0] dsp_x_p;
- input [9*47-1:0] dsp_y_p;
- input [ 4:0] col_index;
- input [ 4:0] col_index_last;
- *//*
- input [ 7:0] rd_narrow_xy_addr;
- input [ 1:0] rd_narrow_xy_bank;
- */
- input [ 1:0] rd_wide_xy_bank_aux;
- input [ 7:0] rd_wide_xy_addr_aux;
- input [ 17:0] rd_wide_x_dout_aux;
- input [ 17:0] rd_wide_y_dout_aux;
+ input clk;
+ input rst;
//
- input [ 1:0] rcmb_final_xy_bank;
- input [ 7:0] rcmb_final_xy_addr;
- input [ 17:0] rcmb_final_x_dout;
- input [ 17:0] rcmb_final_y_dout;
- input rcmb_final_xy_valid;
-
- output [ 7:0] rdct_final_xy_addr;
- output [ 17:0] rdct_final_x_dout;
- output [ 17:0] rdct_final_y_dout;
- output rdct_final_xy_valid;
+ input ena;
+ output rdy;
+ //
+ input [ OP_ADDR_W -1:0] word_index_last;
+ //
+ input [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
+ //
+ input [BANK_ADDR_W -1:0] rcmb_final_xy_bank;
+ input [ OP_ADDR_W -1:0] rcmb_final_xy_addr;
+ input [ WORD_EXT_W -1:0] rcmb_final_x_dout;
+ input [ WORD_EXT_W -1:0] rcmb_final_y_dout;
+ input rcmb_final_xy_valid;
+ //
+ output [ OP_ADDR_W -1:0] rdct_final_xy_addr;
+ output [ WORD_EXT_W -1:0] rdct_final_x_dout;
+ output [ WORD_EXT_W -1:0] rdct_final_y_dout;
+ output rdct_final_xy_valid;
//
@@ -71,8 +54,7 @@ module modexpng_reductor
if (rdy && ena) rdy_reg <= 1'b0;
if (!rdy && !busy_now) rdy_reg <= 1'b1;
end
-
-
+
//
// Pipeline (Delay Match)
@@ -81,21 +63,21 @@ module modexpng_reductor
reg rcmb_xy_valid_dly2 = 1'b0;
reg rcmb_xy_valid_dly3 = 1'b0;
- reg [2:0] rcmb_xy_bank_dly1;
- reg [2:0] rcmb_xy_bank_dly2;
- reg [2:0] rcmb_xy_bank_dly3;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3;
- reg [7:0] rcmb_xy_addr_dly1;
- reg [7:0] rcmb_xy_addr_dly2;
- reg [7:0] rcmb_xy_addr_dly3;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2;
+ reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3;
- reg [17:0] rcmb_x_dout_dly1;
- reg [17:0] rcmb_x_dout_dly2;
- reg [17:0] rcmb_x_dout_dly3;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2;
+ reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3;
- reg [17:0] rcmb_y_dout_dly1;
- reg [17:0] rcmb_y_dout_dly2;
- reg [17:0] rcmb_y_dout_dly3;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2;
+ reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3;
always @(posedge clk)
//
@@ -134,19 +116,19 @@ module modexpng_reductor
end
//
end
-
-
- reg [ 1:0] rcmb_x_lsb_carry;
- reg [15:0] rcmb_x_lsb_dummy;
- reg [17:0] rcmb_x_lsb_dout;
- reg [ 1:0] rcmb_y_lsb_carry;
- reg [15:0] rcmb_y_lsb_dummy;
- reg [17:0] rcmb_y_lsb_dout;
- //reg [17:0] reductor_fat_bram_x_msb_dout;
- //reg reductor_fat_bram_x_msb_dout_valid = 1'b0;
- //reg [ 7:0] reductor_fat_bram_x_msb_addr;
+ //
+ // Carry Logic
+ //
+ reg [RDCT_CARRY_W -1:0] rcmb_x_lsb_carry;
+ reg [WORD_W -1:0] rcmb_x_lsb_dummy;
+ reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout;
+
+ reg [RDCT_CARRY_W -1:0] rcmb_y_lsb_carry;
+ reg [WORD_W -1:0] rcmb_y_lsb_dummy;
+ reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout;
+
//
// Carry Computation
@@ -154,8 +136,8 @@ module modexpng_reductor
always @(posedge clk)
//
if (ena) begin
- rcmb_x_lsb_carry <= 2'b00;
- rcmb_y_lsb_carry <= 2'b00;
+ rcmb_x_lsb_carry <= RDCT_CARRY_ZEROES;
+ rcmb_y_lsb_carry <= RDCT_CARRY_ZEROES;
end else if (rcmb_xy_valid_dly3)
//
case (rcmb_xy_bank_dly3)
@@ -166,7 +148,7 @@ module modexpng_reductor
end
BANK_RCMB_MH:
- if (rcmb_xy_addr_dly3 == 8'd0) begin
+ if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin
{rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
{rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
end
@@ -177,10 +159,10 @@ module modexpng_reductor
//
// Reduction
//
- reg [ 7:0] rdct_xy_addr;
- reg [ 17:0] rdct_x_dout;
- reg [ 17:0] rdct_y_dout;
- reg rdct_xy_valid = 1'b0;
+ reg [ OP_ADDR_W -1:0] rdct_xy_addr;
+ reg [WORD_EXT_W -1:0] rdct_x_dout;
+ reg [WORD_EXT_W -1:0] rdct_y_dout;
+ reg rdct_xy_valid = 1'b0;
assign rdct_final_xy_addr = rdct_xy_addr;
assign rdct_final_x_dout = rdct_x_dout;
@@ -188,10 +170,10 @@ module modexpng_reductor
assign rdct_final_xy_valid = rdct_xy_valid;
task _update_rdct;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
+ input [ OP_ADDR_W -1:0] addr;
+ input [WORD_EXT_W -1:0] dout_x;
+ input [WORD_EXT_W -1:0] dout_y;
+ input valid;
begin
rdct_xy_addr <= addr;
rdct_x_dout <= dout_x;
@@ -201,9 +183,9 @@ module modexpng_reductor
endtask
task set_rdct;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
+ input [ OP_ADDR_W -1:0] addr;
+ input [WORD_EXT_W -1:0] dout_x;
+ input [WORD_EXT_W -1:0] dout_y;
begin
_update_rdct(addr, dout_x, dout_y, 1'b1);
end
@@ -211,19 +193,19 @@ module modexpng_reductor
task clear_rdct;
begin
- _update_rdct(8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_rdct(OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
end
endtask
//
+ // Helper Wires
//
- //
- wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux;
- wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux;
+ wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux;
+ wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux;
- wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry};
- wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry};
+ wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_NULL, rcmb_x_lsb_carry};
+ wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_NULL, rcmb_y_lsb_carry};
//
@@ -241,9 +223,9 @@ module modexpng_reductor
case (rcmb_xy_bank_dly3)
BANK_RCMB_MH:
- if (rcmb_xy_addr_dly3 == 8'd1)
- set_rdct(8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
- else if (rcmb_xy_addr_dly3 > 8'd1)
+ if (rcmb_xy_addr_dly3 == OP_ADDR_ONE)
+ set_rdct(OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry);
+ else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE)
set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
BANK_RCMB_EXT:
diff --git a/rtl/modexpng_storage_block.v b/rtl/_modexpng_storage_block.v
similarity index 61%
copy from rtl/modexpng_storage_block.v
copy to rtl/_modexpng_storage_block.v
index d6f9fb1..d6ef1ee 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/_modexpng_storage_block.v
@@ -36,43 +36,43 @@ module modexpng_storage_block
//
// Headers
//
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
+ `include "modexpng_parameters.vh"
//
// Ports
//
- input clk;
- input rst;
-
- input wr_wide_xy_ena;
- input [ 1:0] wr_wide_xy_bank;
- input [ 7:0] wr_wide_xy_addr;
- input [17:0] wr_wide_x_din;
- input [17:0] wr_wide_y_din;
+ input clk;
+ input rst;
+
+ input wr_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_wide_x_din;
+ input [ WORD_EXT_W -1:0] wr_wide_y_din;
- input wr_narrow_xy_ena;
- input [ 1:0] wr_narrow_xy_bank;
- input [ 7:0] wr_narrow_xy_addr;
- input [17:0] wr_narrow_x_din;
- input [17:0] wr_narrow_y_din;
-
- input rd_wide_xy_ena;
- input rd_wide_xy_ena_aux;
- input [ 1:0] rd_wide_xy_bank;
- input [ 1:0] rd_wide_xy_bank_aux;
- input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
- input [ 8-1:0] rd_wide_xy_addr_aux;
- output [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
- output [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
- output [ 18-1:0] rd_wide_x_dout_aux;
- output [ 18-1:0] rd_wide_y_dout_aux;
+ input wr_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ input [ WORD_EXT_W -1:0] wr_narrow_y_din;
+
+ input rd_wide_xy_ena;
+ input rd_wide_xy_ena_aux;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
+ input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
+ output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
+ output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
input rd_narrow_xy_ena;
- input [ 1:0] rd_narrow_xy_bank;
- input [ 7:0] rd_narrow_xy_addr;
- output [18-1:0] rd_narrow_x_dout;
- output [18-1:0] rd_narrow_y_dout;
+ input [BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
//
@@ -94,31 +94,29 @@ module modexpng_storage_block
//
// Helper Signals
//
- wire [2+8-1:0] wr_wide_xy_offset;
- wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1];
- wire [2+8-1:0] rd_wide_xy_offset_aux;
- wire [2+8-1:0] wr_narrow_xy_offset;
- wire [2+8-1:0] rd_narrow_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1];
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr};
-
//
// "Wide" Storage
//
genvar z;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
begin : gen_wide_bram
//
- assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]};
+ assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
//
- ip_bram_18k wide_bram_x
+ modexpng_sdp_36k_wrapper wide_bram_x
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -128,13 +126,12 @@ module modexpng_storage_block
.enb (rd_wide_xy_ena),
.regceb (rd_wide_xy_reg_ena),
.addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_x_dout[18*z+:18])
+ .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
- ip_bram_18k wide_bram_y
+ modexpng_sdp_36k_wrapper wide_bram_y
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -144,7 +141,7 @@ module modexpng_storage_block
.enb (rd_wide_xy_ena),
.regceb (rd_wide_xy_reg_ena),
.addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_y_dout[18*z+:18])
+ .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
end
@@ -154,10 +151,9 @@ module modexpng_storage_block
//
// Auxilary Storage
//
- ip_bram_18k wide_bram_x_aux
+ modexpng_sdp_36k_wrapper wide_bram_x_aux
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -170,10 +166,9 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout_aux)
);
//
- ip_bram_18k wide_bram_y_aux
+ modexpng_sdp_36k_wrapper wide_bram_y_aux
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -190,10 +185,9 @@ module modexpng_storage_block
//
// "Narrow" Storage
//
- ip_bram_18k narrow_bram_x
+ modexpng_sdp_36k_wrapper narrow_bram_x
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_narrow_xy_ena),
.wea (wr_narrow_xy_ena),
@@ -206,10 +200,9 @@ module modexpng_storage_block
.doutb (rd_narrow_x_dout)
);
- ip_bram_18k narrow_bram_y
+ modexpng_sdp_36k_wrapper narrow_bram_y
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_narrow_xy_ena),
.wea (wr_narrow_xy_ena),
diff --git a/rtl/modexpng_storage_manager.v b/rtl/_modexpng_storage_manager.v
similarity index 51%
copy from rtl/modexpng_storage_manager.v
copy to rtl/_modexpng_storage_manager.v
index fa1e4a1..958596a 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/_modexpng_storage_manager.v
@@ -43,70 +43,69 @@ module modexpng_storage_manager
//
// Headers
//
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
+ `include "../rtl/modexpng_parameters.vh"
//
// Ports
//
- input clk;
- input rst;
+ input clk;
+ input rst;
- output wr_wide_xy_ena;
- output [ 1:0] wr_wide_xy_bank;
- output [ 7:0] wr_wide_xy_addr;
- output [17:0] wr_wide_x_din;
- output [17:0] wr_wide_y_din;
+ output wr_wide_xy_ena;
+ output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ output [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ output [ WORD_EXT_W -1:0] wr_wide_x_din;
+ output [ WORD_EXT_W -1:0] wr_wide_y_din;
- output wr_narrow_xy_ena;
- output [ 1:0] wr_narrow_xy_bank;
- output [ 7:0] wr_narrow_xy_addr;
- output [17:0] wr_narrow_x_din;
- output [17:0] wr_narrow_y_din;
+ output wr_narrow_xy_ena;
+ output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ output [ WORD_EXT_W -1:0] wr_narrow_y_din;
- input ext_wide_xy_ena;
- input [ 1:0] ext_wide_xy_bank;
- input [ 7:0] ext_wide_xy_addr;
- input [17:0] ext_wide_x_din;
- input [17:0] ext_wide_y_din;
+ input ext_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] ext_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] ext_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] ext_wide_x_din;
+ input [ WORD_EXT_W -1:0] ext_wide_y_din;
- input ext_narrow_xy_ena;
- input [ 1:0] ext_narrow_xy_bank;
- input [ 7:0] ext_narrow_xy_addr;
- input [17:0] ext_narrow_x_din;
- input [17:0] ext_narrow_y_din;
+ input ext_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] ext_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] ext_narrow_x_din;
+ input [ WORD_EXT_W -1:0] ext_narrow_y_din;
- input rcmb_wide_xy_ena;
- input [ 1:0] rcmb_wide_xy_bank;
- input [ 7:0] rcmb_wide_xy_addr;
- input [17:0] rcmb_wide_x_din;
- input [17:0] rcmb_wide_y_din;
+ input rcmb_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] rcmb_wide_x_din;
+ input [ WORD_EXT_W -1:0] rcmb_wide_y_din;
- input rcmb_narrow_xy_ena;
- input [ 1:0] rcmb_narrow_xy_bank;
- input [ 7:0] rcmb_narrow_xy_addr;
- input [17:0] rcmb_narrow_x_din;
- input [17:0] rcmb_narrow_y_din;
-
-
- reg wr_wide_xy_ena_reg = 1'b0;
- reg [ 1:0] wr_wide_xy_bank_reg;
- reg [ 7:0] wr_wide_xy_addr_reg;
- reg [17:0] wr_wide_x_din_reg;
- reg [17:0] wr_wide_y_din_reg;
+ input rcmb_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] rcmb_narrow_x_din;
+ input [ WORD_EXT_W -1:0] rcmb_narrow_y_din;
+
+ reg wr_wide_xy_ena_reg = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg;
- reg wr_narrow_xy_ena_reg = 1'b0;
- reg [ 1:0] wr_narrow_xy_bank_reg;
- reg [ 7:0] wr_narrow_xy_addr_reg;
- reg [17:0] wr_narrow_x_din_reg;
- reg [17:0] wr_narrow_y_din_reg;
+ reg wr_narrow_xy_ena_reg = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg;
task _update_wide;
- input xy_ena;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input xy_ena;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
wr_wide_xy_ena_reg <= xy_ena;
wr_wide_xy_bank_reg <= xy_bank;
@@ -117,11 +116,11 @@ module modexpng_storage_manager
endtask
task _update_narrow;
- input xy_ena;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input xy_ena;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
wr_narrow_xy_ena_reg <= xy_ena;
wr_narrow_xy_bank_reg <= xy_bank;
@@ -132,20 +131,20 @@ module modexpng_storage_manager
endtask
task enable_wide;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
_update_wide(1'b1, xy_bank, xy_addr, x_din, y_din);
end
endtask
task enable_narrow;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
_update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din);
end
@@ -153,13 +152,13 @@ module modexpng_storage_manager
task disable_wide;
begin
- _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
end
endtask
task disable_narrow;
begin
- _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
end
endtask
diff --git a/rtl/dev/temp.txt b/rtl/dev/temp.txt
deleted file mode 100644
index 987bd86..0000000
--- a/rtl/dev/temp.txt
+++ /dev/null
@@ -1,384 +0,0 @@
- //
- // Helper Functions
- //
- /*
- function [INDEX_WIDTH-1:0] calc_preset_a_index;
- input [INDEX_WIDTH-4:0] col_in;
- input integer x_in;
- integer index_out;
- begin
- index_out = col_in * NUM_MULTS + x_in;
- calc_preset_a_index = index_out[INDEX_WIDTH-1:0];
- end
- endfunction
-
- function [INDEX_WIDTH-1:0] calc_rotate_a_index;
- input [INDEX_WIDTH-1:0] current_index_in;
- input [INDEX_WIDTH-1:0] last_index_in;
- begin
- if (current_index_in > {INDEX_WIDTH{1'b0}})
- calc_rotate_a_index = current_index_in - 1'b1;
- else
- calc_rotate_a_index = last_index_in;
- end
- endfunction
- */
-
- /*
- //
- // Narrow Counters
- //
- reg [INDEX_WIDTH-1:0] din_addr_narrow_reg;
- reg [INDEX_WIDTH-1:0] din_addr_narrow_dly;
- localparam [INDEX_WIDTH-1:0] din_addr_narrow_zero = {INDEX_WIDTH{1'b0}};
- wire [INDEX_WIDTH-1:0] din_addr_narrow_next = (din_addr_narrow_reg < index_last) ?
- din_addr_narrow_reg + 1'b1 : din_addr_narrow_zero;
- wire din_addr_narrow_done = din_addr_narrow_reg == index_last;
-
- assign din_addr_narrow = din_addr_narrow_reg;
-
- always @(posedge clk)
- //
- din_addr_narrow_dly <= din_addr_narrow_reg;
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: din_addr_narrow_reg <= din_addr_narrow_next;
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_narrow_reg <= din_addr_narrow_zero;
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_narrow_reg <= din_addr_narrow_next;
- endcase
-
-
- //
- // Helper Functions
- //
- function [NUM_MULTS-1:0] calc_mac_clear_bitmask;
- input [2:0] t;
- begin
- case (t)
- 3'd0: calc_mac_clear_bitmask = 8'b00000001;
- 3'd1: calc_mac_clear_bitmask = 8'b00000010;
- 3'd2: calc_mac_clear_bitmask = 8'b00000100;
- 3'd3: calc_mac_clear_bitmask = 8'b00001000;
- 3'd4: calc_mac_clear_bitmask = 8'b00010000;
- 3'd5: calc_mac_clear_bitmask = 8'b00100000;
- 3'd6: calc_mac_clear_bitmask = 8'b01000000;
- 3'd7: calc_mac_clear_bitmask = 8'b10000000;
- endcase
- end
- endfunction
-
- function [NUM_MULTS:0] calc_mac_clear_square;
- input [INDEX_WIDTH-4:0] current_col_index;
- input [INDEX_WIDTH-1:0] b_addr_prev;
- begin
- if (b_addr_prev[INDEX_WIDTH-1:3] == current_col_index)
- calc_mac_clear_square = {1'b0, calc_mac_clear_bitmask(b_addr_prev[2:0])};
- else
- calc_mac_clear_square = {1'b0, {NUM_MULTS{1'b0}}};
- end
- endfunction
-
-
- //
- // Wide Counters
- //
- reg [INDEX_WIDTH-1:0] din_addr_wide_reg[0:NUM_MULTS-1];
-
- integer xi;
- always @(posedge clk)
- //
- for (xi=0; xi<NUM_MULTS; xi=xi+1)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(0, xi);
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: din_addr_wide_reg[xi] <= calc_preset_a_index(col_index + 1'b1, xi);
- //
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_addr_wide_reg[xi] <= calc_rotate_a_index(din_addr_wide_reg[xi], index_last);
- //
- endcase
-
-
- //
- // Enables
- //
- reg din_ena_narrow_reg = 1'b0;
- reg [NUM_MULTS-1:0] din_ena_wide_reg = {NUM_MULTS{1'b0}};
-
- assign din_ena_narrow = din_ena_narrow_reg;
- assign din_ena_wide = din_ena_wide_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) din_ena_narrow_reg <= 1'b0;
- else case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_narrow_reg <= 1'b1;
- default: din_ena_narrow_reg <= 1'b0;
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) din_ena_wide_reg <= {NUM_MULTS{1'b0}};
- else case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_ena_wide_reg <= {NUM_MULTS{1'b1}};
- default: din_ena_wide_reg <= {NUM_MULTS{1'b0}};
- endcase
-
-
- //
- // Modes
- //
- reg [2-1:0] din_mode_wide_reg;
- reg [2-1:0] din_mode_narrow_reg;
- reg [2-1:0] dout_mode_wide_reg;
- reg [2-1:0] dout_mode_narrow_reg;
-
- assign din_mode_wide = din_mode_wide_reg;
- assign din_mode_narrow = din_mode_narrow_reg;
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_wide_reg <= MODEXPNG_MODE_A;
- default: din_mode_wide_reg <= 2'bXX;
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: din_mode_narrow_reg <= MODEXPNG_MODE_B;
- default: din_mode_narrow_reg <= 2'bXX;
- endcase
-
-
- //
- // MAC Array
- //
- wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_a[0:NUM_MULTS];
- wire [MODEXPNG_WORD_WIDTH-1:0] mac_din_b;
- reg [ NUM_MULTS :0] mac_ce;
- reg [ NUM_MULTS :0] mac_clr;
- wire [ MODEXPNG_MAC_WIDTH-1:0] mac_p[0:NUM_MULTS];
- reg [ NUM_MULTS :0] mac_rdy_lsb;
- reg [ NUM_MULTS :0] mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1:0];
-
- //reg [ NUM_MULTS :0] mac_ce_dly[MODEXPNG_MAC_LATENCY-1:0];
- //wire [ NUM_MULTS :0] mac_rdy;
-
-
-
-
-
- assign mac_din_b = din_narrow;
-
-
- genvar x;
- generate for (x=0; x<=NUM_MULTS; x=x+1)
- begin : gen_macs
- //
- //assign mac_rdy[x] = mac_ce_dly[MODEXPNG_MAC_LATENCY-1][x];
- //
- modexpng_mac mac_inst
- (
- .clk (clk),
- .ce (mac_ce[x]),
- .clr (mac_clr[x]),
- .a (mac_din_a[x]),
- .b (mac_din_b),
- .p (mac_p[x])
- );
- //
- end
- //
- endgenerate
-
- generate for (x=0; x<NUM_MULTS; x=x+1)
- begin : gen_mac_din_a
- //
- assign mac_din_a[x] = din_wide[x*MODEXPNG_WORD_WIDTH+:MODEXPNG_WORD_WIDTH];
- //
- end
- endgenerate
-
- generate for (x=0; x<NUM_MULTS; x=x+1)
- begin : gen_din_addr_wide
- //
- assign din_addr_wide[x*INDEX_WIDTH+:INDEX_WIDTH] = din_addr_wide_reg[x];
- //
- end
- endgenerate
-
-
- //
- // MAC Clock Enable Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) mac_ce <= {1'b0, {NUM_MULTS{1'b0}}};
- else case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_ce <= {1'b0, {NUM_MULTS{1'b1}}};
- default: mac_ce <= {1'b0, {NUM_MULTS{1'b0}}};
- endcase
-
-
- //
- // MAC Valid Logic
- //
- integer y;
-
- always @(posedge clk)
- //
- for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
- mac_rdy_lsb_dly[0][xi] <= mac_rdy_lsb[xi];
- for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
- mac_rdy_lsb_dly[y][xi] <= mac_rdy_lsb_dly[y-1][xi];
- end
-
- always @(posedge clk) begin
- //
- fsm_state_dly[0] <= fsm_state;
- for (y=1; y<=MODEXPNG_MAC_LATENCY; y=y+1)
- fsm_state_dly[y] <= fsm_state_dly[y-1];
- end
-
- */
-
- /*
- always @(posedge clk)
- //
- for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
- mac_ce_dly[0][xi] <= mac_ce[xi];
- for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
- mac_ce_dly[y][xi] <= mac_ce_dly[y-1][xi];
- end
- */
- /*
- always @(posedge clk)
- //
- for (xi=0; xi<=NUM_MULTS; xi=xi+1) begin
- mac_clr_dly[0][xi] <= mac_clr[xi];
- for (y=1; y<MODEXPNG_MAC_LATENCY; y=y+1)
- mac_clr_dly[y][xi] <= mac_clr_dly[y-1][xi];
- end
- */
-
- /*
- //
- // MAC Clear Logic
- //
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_clr <= {1'b0, {NUM_MULTS{1'b1}}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_clr <= calc_mac_clear_square(col_index, din_addr_narrow_dly);
- default: mac_clr <= {1'bX, {NUM_MULTS{1'bX}}};
- endcase
-
-
- //
- // MAC Ready Logic
- //
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_rdy_lsb <= calc_mac_clear_square(col_index, din_addr_narrow);
- default: mac_rdy_lsb <= {1'bX, {NUM_MULTS{1'bX}}};
- endcase
-
-
-
- //
- // Recombinators
- //
- reg rcmb_lsb_ce;
- reg rcmb_lsb_clr;
- reg [MODEXPNG_MAC_WIDTH-1: 0] rcmb_lsb_din;
- wire [15: 0] rcmb_lsb_dout;
-
- modexpng_part_recombinator recomb_lsb
- (
- .clk (clk),
- .ce (rcmb_lsb_ce),
- .clr (rcmb_lsb_clr),
- .din (rcmb_lsb_din),
- .dout (rcmb_lsb_dout)
- );
-
-
- reg calc_rcmb_lsb_ce;
- always @*
- //
- calc_rcmb_lsb_ce = | mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0];
-
- reg [MODEXPNG_MAC_WIDTH-1:0] calc_rcmb_lsb_din;
-
- always @*
- //
- casez (mac_rdy_lsb_dly[MODEXPNG_MAC_LATENCY-1][NUM_MULTS-1:0])
- 8'b00000001: calc_rcmb_lsb_din = mac_p[0];
- 8'b00000010: calc_rcmb_lsb_din = mac_p[1];
- 8'b00000100: calc_rcmb_lsb_din = mac_p[2];
- 8'b00001000: calc_rcmb_lsb_din = mac_p[3];
- 8'b00010000: calc_rcmb_lsb_din = mac_p[4];
- 8'b00100000: calc_rcmb_lsb_din = mac_p[5];
- 8'b01000000: calc_rcmb_lsb_din = mac_p[6];
- 8'b10000000: calc_rcmb_lsb_din = mac_p[7];
- default: calc_rcmb_lsb_din = {MODEXPNG_MAC_WIDTH{1'bX}};
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- rcmb_lsb_ce <= 1'b0;
- else case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_ce <= calc_rcmb_lsb_ce;
- default: rcmb_lsb_ce <= 1'b0;
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: rcmb_lsb_clr <= 1'b1;
- default: rcmb_lsb_clr <= 1'b0;
- endcase
-
- always @(posedge clk)
- //
- case (fsm_state_dly[MODEXPNG_MAC_LATENCY])
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: rcmb_lsb_din <= calc_rcmb_lsb_din;
- default: rcmb_lsb_din <= {MODEXPNG_MAC_WIDTH{1'bX}};
- endcase
-
-
-
-*/
diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v
deleted file mode 100644
index 2a050d4..0000000
--- a/rtl/dsp/dsp_array.v
+++ /dev/null
@@ -1,143 +0,0 @@
-module dsp_array
-(
- input clk,
-
- input ce_a,
- input ce_b,
- input ce_m,
- input ce_p,
- input ce_mode,
-
- input [9 -1:0] mode_z,
-
- input [5*18-1:0] a,
- input [1*17-1:0] b,
- output [9*47-1:0] p
-);
-
- `include "../modexpng_parameters_x8.vh"
-
- wire [17:0] casc_a[0:3];
- wire [16:0] casc_b[0:3];
-
- wire ce_a0 = ce_a;
- reg ce_a1 = 1'b0;
- reg ce_a2 = 1'b0;
-
- wire ce_b0 = ce_b;
- reg ce_b1 = 1'b0;
-
- always @(posedge clk) begin
- ce_a1 <= ce_a0;
- ce_a2 <= ce_a1;
- ce_b1 <= ce_b0;
- end
-
-
- genvar z;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
- //
- begin : gen_DSP48E1
- //
- dsp_slice #
- (
- .AB_INPUT("DIRECT"),
- .B_REG(2)
- )
- dsp_direct
- (
- .clk (clk),
-
- .ce_a1 (ce_a0),
- .ce_b1 (ce_b0),
- .ce_a2 (ce_a1),
- .ce_b2 (ce_b1),
- .ce_m (ce_m),
- .ce_p (ce_p),
- .ce_mode (ce_mode),
-
- .a (a[z*18+:18]),
- .b (b),
- .p (p[47*2*z+:47]),
-
- .inmode (5'b00000),
- .opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
-
- .casc_a_in ({18{1'b0}}),
- .casc_b_in ({17{1'b0}}),
-
- .casc_a_out (casc_a[z]),
- .casc_b_out (casc_b[z])
- );
- //
- dsp_slice #
- (
- .AB_INPUT("CASCADE"),
- .B_REG(1)
- )
- dsp_cascade
- (
- .clk (clk),
-
- .ce_a1 (ce_a1),
- .ce_b1 (1'b0),
- .ce_a2 (ce_a2),
- .ce_b2 (ce_b1),
- .ce_m (ce_m),
- .ce_p (ce_p),
- .ce_mode (ce_mode),
-
- .a (a[z*18+:18]),
- .b (b),
- .p (p[47*(2*z+1)+:47]),
-
- .inmode (5'b00000),
- .opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
-
- .casc_a_in (casc_a[z]),
- .casc_b_in (casc_b[z]),
-
- .casc_a_out (),
- .casc_b_out ()
- );
- //
- end
- //
- endgenerate
-
- dsp_slice #
- (
- .AB_INPUT("DIRECT"),
- .B_REG(2)
- )
- dsp_aux
- (
- .clk (clk),
-
- .ce_a1 (ce_a0),
- .ce_b1 (ce_b0),
- .ce_a2 (ce_a1),
- .ce_b2 (ce_b1),
- .ce_m (ce_m),
- .ce_p (ce_p),
- .ce_mode (ce_mode),
-
- .a (a[4*18+:18]),
- .b (b),
- .p (p[47*2*4+:47]),
-
- .inmode (5'b00000),
- .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
-
- .casc_a_in ({18{1'b0}}),
- .casc_b_in ({17{1'b0}}),
-
- .casc_a_out (),
- .casc_b_out ()
- );
-
-
-endmodule
diff --git a/rtl/dsp/dsp_slice.v b/rtl/dsp/dsp_slice.v
deleted file mode 100644
index 9f1298b..0000000
--- a/rtl/dsp/dsp_slice.v
+++ /dev/null
@@ -1,125 +0,0 @@
-module dsp_slice #
-(
- AB_INPUT = "DIRECT",
- B_REG = 2
-)
-(
- input clk,
- input ce_a1,
- input ce_b1,
- input ce_a2,
- input ce_b2,
- input ce_m,
- input ce_p,
- input ce_mode,
- input [17:0] a,
- input [16:0] b,
- output [46:0] p,
- input [ 4:0] inmode,
- input [ 6:0] opmode,
- input [ 3:0] alumode,
- input [17:0] casc_a_in,
- input [16:0] casc_b_in,
- output [17:0] casc_a_out,
- output [16:0] casc_b_out
-);
-
- wire [30-18-1:0] casc_a_dummy;
- wire [18-17-1:0] casc_b_dummy;
- wire [48-47-1:0] p_dummy;
-
- DSP48E1 #
- (
- .AREG (2),
- .BREG (B_REG),
- .CREG (0),
- .DREG (0),
- .ADREG (0),
- .MREG (1),
- .PREG (1),
- .ACASCREG (1),
- .BCASCREG (1),
- .INMODEREG (0),
- .OPMODEREG (1),
- .ALUMODEREG (0),
- .CARRYINREG (0),
- .CARRYINSELREG (0),
-
- .A_INPUT (AB_INPUT),
- .B_INPUT (AB_INPUT),
-
- .USE_DPORT ("FALSE"),
- .USE_MULT ("DYNAMIC"),
- .USE_SIMD ("ONE48"),
-
- .MASK (48'h3fffffffffff),
- .PATTERN (48'h000000000000),
- .SEL_MASK ("MASK"),
- .SEL_PATTERN ("PATTERN"),
-
- .USE_PATTERN_DETECT ("NO_PATDET"),
- .AUTORESET_PATDET ("NO_RESET")
- )
- DSP48E1_inst
- (
- .CLK (clk),
-
- .CEA1 (ce_a1),
- .CEB1 (ce_b1),
- .CEA2 (ce_a2),
- .CEB2 (ce_b2),
- .CEAD (1'b0),
- .CEC (1'b0),
- .CED (1'b0),
- .CEM (ce_m),
- .CEP (ce_p),
- .CEINMODE (1'b0),
- .CECTRL (ce_mode),
- .CEALUMODE (1'b0),
- .CECARRYIN (1'b0),
-
- .A ({{(30-18){1'b0}}, a}),
- .B ({{(18-17){1'b0}}, b}),
- .C ({48{1'b0}}),
- .D ({25{1'b0}}),
- .P ({p_dummy, p}),
-
- .INMODE (inmode),
- .OPMODE (opmode),
- .ALUMODE (alumode),
-
- .ACIN ({{(30-18){1'b0}}, casc_a_in}),
- .BCIN ({{(18-17){1'b0}}, casc_b_in}),
- .ACOUT ({casc_a_dummy, casc_a_out}),
- .BCOUT ({casc_b_dummy, casc_b_out}),
- .PCIN ({48{1'b0}}),
- .PCOUT (),
- .CARRYCASCIN (1'b0),
- .CARRYCASCOUT (),
-
- .RSTA (1'b0),
- .RSTB (1'b0),
- .RSTC (1'b0),
- .RSTD (1'b0),
- .RSTM (1'b0),
- .RSTP (1'b0),
- .RSTINMODE (1'b0),
- .RSTCTRL (1'b0),
- .RSTALUMODE (1'b0),
- .RSTALLCARRYIN (1'b0),
-
- .UNDERFLOW (),
- .OVERFLOW (),
- .PATTERNDETECT (),
- .PATTERNBDETECT (),
-
- .CARRYIN (1'b0),
- .CARRYOUT (),
- .CARRYINSEL (3'b000),
-
- .MULTSIGNIN (1'b0),
- .MULTSIGNOUT ()
- );
-
-
-endmodule
diff --git a/rtl/modexpng_dsp48e1.vh b/rtl/modexpng_dsp48e1.vh
new file mode 100644
index 0000000..bc3d55c
--- /dev/null
+++ b/rtl/modexpng_dsp48e1.vh
@@ -0,0 +1,8 @@
+localparam DSP48E1_A_W = 30;
+localparam DSP48E1_B_W = 18;
+localparam DSP48E1_C_W = 48;
+localparam DSP48E1_D_W = 25;
+localparam DSP48E1_P_W = 48;
+localparam DSP48E1_INMODE_W = 5;
+localparam DSP48E1_OPMODE_W = 7;
+localparam DSP48E1_ALUMODE_W = 4;
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
index 9c4ee93..8ab64f0 100644
--- a/rtl/modexpng_dsp_array_block.v
+++ b/rtl/modexpng_dsp_array_block.v
@@ -1,24 +1,30 @@
module modexpng_dsp_array_block
(
- input clk,
-
- input ce_a,
- input ce_b,
- input ce_m,
- input ce_p,
- input ce_mode,
+ clk,
+ ce_a, ce_b, ce_m, ce_p, ce_mode,
+ mode_z,
+ a, b, p
+);
+
+ `include "modexpng_dsp48e1.vh"
+ `include "modexpng_parameters.vh"
- input [9 -1:0] mode_z,
+ input clk;
- input [5*18-1:0] a,
- input [1*16-1:0] b,
- output [9*47-1:0] p
-);
+ input ce_a;
+ input ce_b;
+ input ce_m;
+ input ce_p;
+ input ce_mode;
- `include "modexpng_parameters_x8.vh"
+ input [ NUM_MULTS_AUX -1:0] mode_z;
+
+ input [NUM_MULTS_HALF_AUX * WORD_EXT_W -1:0] a;
+ input [ WORD_W -1:0] b;
+ output [NUM_MULTS_AUX * MAC_W -1:0] p;
- wire [17:0] casc_a[0:3];
- wire [15:0] casc_b[0:3];
+ wire [WORD_EXT_W -1:0] casc_a[0:NUM_MULTS_HALF-1];
+ wire [ WORD_W -1:0] casc_b[0:NUM_MULTS_HALF-1];
wire ce_a0 = ce_a;
reg ce_a1 = 1'b0;
@@ -35,7 +41,7 @@ module modexpng_dsp_array_block
genvar z;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
//
begin : gen_DSP48E1
//
@@ -56,16 +62,16 @@ module modexpng_dsp_array_block
.ce_p (ce_p),
.ce_mode (ce_mode),
- .a (a[z*18+:18]),
+ .a (a[z*WORD_EXT_W +: WORD_EXT_W]),
.b (b),
- .p (p[47*2*z+:47]),
+ .p (p[(2*z)*MAC_W +: MAC_W]),
- .inmode (5'b00000),
+ .inmode ({DSP48E1_INMODE_W{1'b0}}),
.opmode ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
+ .alumode ({DSP48E1_ALUMODE_W{1'b0}}),
- .casc_a_in ({18{1'b0}}),
- .casc_b_in ({16{1'b0}}),
+ .casc_a_in (WORD_EXT_NULL),
+ .casc_b_in (WORD_NULL),
.casc_a_out (casc_a[z]),
.casc_b_out (casc_b[z])
@@ -88,13 +94,13 @@ module modexpng_dsp_array_block
.ce_p (ce_p),
.ce_mode (ce_mode),
- .a (a[z*18+:18]),
+ .a (a[z*WORD_EXT_W +: WORD_EXT_W]),
.b (b),
- .p (p[47*(2*z+1)+:47]),
+ .p (p[(2*z+1)*MAC_W +: MAC_W]),
- .inmode (5'b00000),
+ .inmode ({DSP48E1_INMODE_W{1'b0}}),
.opmode ({1'b0, mode_z[2*z+1], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
+ .alumode ({DSP48E1_ALUMODE_W{1'b0}}),
.casc_a_in (casc_a[z]),
.casc_b_in (casc_b[z]),
@@ -124,16 +130,16 @@ module modexpng_dsp_array_block
.ce_p (ce_p),
.ce_mode (ce_mode),
- .a (a[4*18+:18]),
+ .a (a[NUM_MULTS_HALF*WORD_EXT_W +: WORD_EXT_W]),
.b (b),
- .p (p[47*2*4+:47]),
+ .p (p[(2*NUM_MULTS_HALF)*MAC_W +: MAC_W]),
- .inmode (5'b00000),
- .opmode ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}),
- .alumode (4'b0000),
+ .inmode ({DSP48E1_INMODE_W{1'b0}}),
+ .opmode ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}),
+ .alumode ({DSP48E1_ALUMODE_W{1'b0}}),
- .casc_a_in ({18{1'b0}}),
- .casc_b_in ({16{1'b0}}),
+ .casc_a_in (WORD_EXT_NULL),
+ .casc_b_in (WORD_NULL),
.casc_a_out (),
.casc_b_out ()
diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper.v
index f565eec..3d13570 100644
--- a/rtl/modexpng_dsp_slice_wrapper.v
+++ b/rtl/modexpng_dsp_slice_wrapper.v
@@ -4,30 +4,41 @@ module modexpng_dsp_slice_wrapper #
B_REG = 2
)
(
- input clk,
- input ce_a1,
- input ce_b1,
- input ce_a2,
- input ce_b2,
- input ce_m,
- input ce_p,
- input ce_mode,
- input [17:0] a,
- input [15:0] b,
- output [46:0] p,
- input [ 4:0] inmode,
- input [ 6:0] opmode,
- input [ 3:0] alumode,
- input [17:0] casc_a_in,
- input [15:0] casc_b_in,
- output [17:0] casc_a_out,
- output [15:0] casc_b_out
+ clk,
+ ce_a1, ce_b1, ce_a2, ce_b2,
+ ce_m, ce_p, ce_mode,
+ a, b, p,
+ inmode, opmode, alumode,
+ casc_a_in, casc_b_in,
+ casc_a_out, casc_b_out
);
- wire [30-18-1:0] casc_a_dummy;
- wire [18-16-1:0] casc_b_dummy;
- wire [48-47-1:0] p_dummy;
+ `include "modexpng_parameters.vh"
+ `include "modexpng_dsp48e1.vh"
+
+ input clk;
+ input ce_a1;
+ input ce_b1;
+ input ce_a2;
+ input ce_b2;
+ input ce_m;
+ input ce_p;
+ input ce_mode;
+ input [ WORD_EXT_W -1:0] a;
+ input [ WORD_W -1:0] b;
+ output [ MAC_W -1:0] p;
+ input [ DSP48E1_INMODE_W -1:0] inmode;
+ input [ DSP48E1_OPMODE_W -1:0] opmode;
+ input [DSP48E1_ALUMODE_W -1:0] alumode;
+ input [ WORD_EXT_W -1:0] casc_a_in;
+ input [ WORD_W -1:0] casc_b_in;
+ output [ WORD_EXT_W -1:0] casc_a_out;
+ output [ WORD_W -1:0] casc_b_out;
+ wire [DSP48E1_A_W - WORD_EXT_W -1:0] casc_a_dummy;
+ wire [DSP48E1_B_W - WORD_W -1:0] casc_b_dummy;
+ wire [DSP48E1_P_W - MAC_W -1:0] p_dummy;
+
DSP48E1 #
(
.AREG (2),
@@ -52,8 +63,8 @@ module modexpng_dsp_slice_wrapper #
.USE_MULT ("DYNAMIC"),
.USE_SIMD ("ONE48"),
- .MASK (48'h3fffffffffff),
- .PATTERN (48'h000000000000),
+ .MASK ({DSP48E1_P_W{1'b1}}),
+ .PATTERN ({DSP48E1_P_W{1'b0}}),
.SEL_MASK ("MASK"),
.SEL_PATTERN ("PATTERN"),
@@ -78,21 +89,21 @@ module modexpng_dsp_slice_wrapper #
.CEALUMODE (1'b0),
.CECARRYIN (1'b0),
- .A ({{(30-18){1'b0}}, a}),
- .B ({{(18-16){1'b0}}, b}),
- .C ({48{1'b0}}),
- .D ({25{1'b0}}),
+ .A ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, a}),
+ .B ({{(DSP48E1_B_W-WORD_W){1'b0}}, b}),
+ .C ({DSP48E1_C_W{1'b0}}),
+ .D ({DSP48E1_D_W{1'b0}}),
.P ({p_dummy, p}),
.INMODE (inmode),
.OPMODE (opmode),
.ALUMODE (alumode),
- .ACIN ({{(30-18){1'b0}}, casc_a_in}),
- .BCIN ({{(18-16){1'b0}}, casc_b_in}),
+ .ACIN ({{(DSP48E1_A_W-WORD_EXT_W){1'b0}}, casc_a_in}),
+ .BCIN ({{(DSP48E1_B_W-WORD_W){1'b0}}, casc_b_in}),
.ACOUT ({casc_a_dummy, casc_a_out}),
.BCOUT ({casc_b_dummy, casc_b_out}),
- .PCIN ({48{1'b0}}),
+ .PCIN ({DSP48E1_P_W{1'b0}}),
.PCOUT (),
.CARRYCASCIN (1'b0),
.CARRYCASCOUT (),
@@ -121,5 +132,4 @@ module modexpng_dsp_slice_wrapper #
.MULTSIGNOUT ()
);
-
endmodule
diff --git a/rtl/modexpng_mmm_dual_x8.v b/rtl/modexpng_mmm_dual.v
similarity index 97%
rename from rtl/modexpng_mmm_dual_x8.v
rename to rtl/modexpng_mmm_dual.v
index 2e4f4e0..df0f823 100644
--- a/rtl/modexpng_mmm_dual_x8.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -1,14 +1,15 @@
-module modexpng_mmm_dual_x8
+module modexpng_mmm_dual
(
clk, rst,
ena, rdy,
-
ladder_mode,
word_index_last,
word_index_last_minus1,
+ sel_wide_in, sel_narrow_in,
+
rd_wide_xy_ena,
rd_wide_xy_ena_aux,
rd_wide_xy_bank,
@@ -51,9 +52,10 @@ module modexpng_mmm_dual_x8
//
// Headers
//
+ `include "modexpng_parameters.vh"
`include "../rtl_1/modexpng_mmm_fsm_old.vh"
- `include "../rtl_1/modexpng_parameters_old.vh"
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
+ //`include "../rtl_1/modexpng_parameters_old.vh"
+ //`include "../rtl_1/modexpng_parameters_x8_old.vh"
//
@@ -69,10 +71,13 @@ module modexpng_mmm_dual_x8
input [7:0] word_index_last;
input [7:0] word_index_last_minus1;
+ input [BANK_ADDR_W-1:0] sel_wide_in;
+ input [BANK_ADDR_W-1:0] sel_narrow_in;
+
output rd_wide_xy_ena;
output rd_wide_xy_ena_aux;
- output [ 1:0] rd_wide_xy_bank;
- output [ 1:0] rd_wide_xy_bank_aux;
+ output [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
output [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
output [ 8-1:0] rd_wide_xy_addr_aux;
input [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
@@ -81,24 +86,24 @@ module modexpng_mmm_dual_x8
input [ 18-1:0] rd_wide_y_dout_aux;
output rd_narrow_xy_ena;
- output [ 1:0] rd_narrow_xy_bank;
+ output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
output [ 7:0] rd_narrow_xy_addr;
input [18-1:0] rd_narrow_x_dout;
input [18-1:0] rd_narrow_y_dout;
- output [ 1:0] rcmb_wide_xy_bank;
+ output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
output [ 7:0] rcmb_wide_xy_addr;
output [17:0] rcmb_wide_x_dout;
output [17:0] rcmb_wide_y_dout;
output rcmb_wide_xy_valid;
- output [ 1:0] rcmb_narrow_xy_bank;
+ output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
output [ 7:0] rcmb_narrow_xy_addr;
output [17:0] rcmb_narrow_x_dout;
output [17:0] rcmb_narrow_y_dout;
output rcmb_narrow_xy_valid;
- output [ 1:0] rcmb_xy_bank;
+ output [BANK_ADDR_W -1:0] rcmb_xy_bank;
output [ 7:0] rcmb_xy_addr;
output [17:0] rcmb_x_dout;
output [17:0] rcmb_y_dout;
@@ -132,13 +137,13 @@ module modexpng_mmm_dual_x8
//
reg wide_xy_ena = 1'b0;
reg wide_xy_ena_aux = 1'b0;
- reg [ 1:0] wide_xy_bank;
- reg [ 1:0] wide_xy_bank_aux;
+ reg [ BANK_ADDR_W -1:0] wide_xy_bank;
+ reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux;
reg [ 8-1:0] wide_xy_addr[0:3];
reg [ 8-1:0] wide_xy_addr_aux;
reg narrow_xy_ena = 1'b0;
- reg [ 1:0] narrow_xy_bank;
+ reg [ BANK_ADDR_W -1:0] narrow_xy_bank;
reg [ 7:0] narrow_xy_addr;
reg [ 7:0] narrow_xy_addr_dly;
@@ -332,7 +337,7 @@ module modexpng_mmm_dual_x8
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= BANK_NARROW_T1T2;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
//
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
@@ -340,7 +345,7 @@ module modexpng_mmm_dual_x8
FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
- BANK_NARROW_EXT : BANK_NARROW_N_COEFF;
+ BANK_NARROW_EXT : BANK_NARROW_COEFF;
//
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
@@ -486,13 +491,13 @@ module modexpng_mmm_dual_x8
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_T1T2;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
@@ -510,13 +515,13 @@ module modexpng_mmm_dual_x8
FSM_STATE_MULT_SQUARE_COL_0_TRIG,
FSM_STATE_MULT_SQUARE_COL_N_TRIG,
FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_T1T2;
+ FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_ABH;
+ FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_ABL;
+ FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
@@ -525,8 +530,8 @@ module modexpng_mmm_dual_x8
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
case (rcmb_xy_bank)
- BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_ABL;
- BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_ABH;
+ BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
+ BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
//BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
default: wide_xy_bank_aux <= 3'bXXX;
endcase
diff --git a/rtl/modexpng_mmm_fsm_old.vh b/rtl/modexpng_mmm_fsm_old.vh
deleted file mode 100644
index 3bdae66..0000000
--- a/rtl/modexpng_mmm_fsm_old.vh
+++ /dev/null
@@ -1,43 +0,0 @@
-localparam FSM_STATE_WIDTH = 32;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_IDLE = 0;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_1 = 1;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_2 = 2;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_T1T2_3 = 3;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_1 = 4;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_2 = 5;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_LOAD_NN_COEFF_3 = 6;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_INIT = 11;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_TRIG = 12;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_0_BUSY = 13;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_INIT = 14;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37;
-
-localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
new file mode 100644
index 0000000..514fc21
--- /dev/null
+++ b/rtl/modexpng_parameters.vh
@@ -0,0 +1,71 @@
+`include "modexpng_parameters_x8.vh"
+
+function integer cryptech_clog2;
+ input integer value;
+ integer temp_value;
+ integer result;
+ //
+ begin
+ temp_value = value - 1;
+ for (result = 0; temp_value > 0; result = result + 1)
+ temp_value = temp_value >> 1;
+ cryptech_clog2 = result;
+ end
+ //
+endfunction
+
+localparam WORD_W = 16;
+localparam WORD_EXT_W = 18;
+localparam MAC_W = 47;
+
+localparam MAX_OP_W = 4096;
+
+localparam BANK_ADDR_W = 3;
+localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W);
+localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS);
+
+localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
+
+localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W;
+
+localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}};
+
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_C = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_D = 3'd3;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_E = 3'd4;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_N = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_L = 3'd6;
+localparam [BANK_ADDR_W-1:0] BANK_WIDE_H = 3'd7;
+
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_A = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_B = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_C = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_D = 3'd3;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_E = 3'd4;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_COEFF = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_Q = 3'd6;
+localparam [BANK_ADDR_W-1:0] BANK_NARROW_EXT = 3'd7; // [0] -> COEFF', [1] -> Q'
+
+localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH'
+
+localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}};
+
+localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
+localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1;
+
+localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}};
+localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1};
+localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}};
+
+localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}};
+localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}};
+
+localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}};
+
+localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}};
+
+localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
\ No newline at end of file
diff --git a/rtl/modexpng_parameters_old.vh b/rtl/modexpng_parameters_old.vh
deleted file mode 100644
index d30b751..0000000
--- a/rtl/modexpng_parameters_old.vh
+++ /dev/null
@@ -1,40 +0,0 @@
-
-//localparam WORD_WIDTH = 17;
-//localparam MAC_WIDTH = 47;
-
-localparam BANK_ADDR_WIDTH = 2; // TODO: Replace everywhere!
-
-localparam [1:0] BANK_WIDE_T1T2 = 2'd0;
-localparam [1:0] BANK_WIDE_ABL = 2'd1;
-localparam [1:0] BANK_WIDE_ABH = 2'd2;
-localparam [1:0] BANK_WIDE_N = 2'd3;
-
-localparam [1:0] BANK_RCMB_ML = 2'd0;
-localparam [1:0] BANK_RCMB_MH = 2'd1;
-localparam [1:0] BANK_RCMB_EXT = 2'd2; // 0 -> MH'
-
-localparam [1:0] BANK_NARROW_T1T2 = 2'd0;
-localparam [1:0] BANK_NARROW_N_COEFF = 2'd1;
-localparam [1:0] BANK_NARROW_Q = 2'd2;
-localparam [1:0] BANK_NARROW_EXT = 2'd3; // 0 -> N_COEFF', 1 -> Q'
-
-
-//localparam BANK_Y_T2 = 3'd0;
-//localparam BANK_XY_T1T2 = 3'd0;
-
-//localparam BANK_XY_AB_LSB = 3'd1;
-//localparam BANK_XY_AB_MSB = 3'd2;
-
-//localparam BANK_X_N = 3'd3;
-//localparam BANK_Y_N_COEFF = 3'd3;
-
-//localparam BANK_XY_M = 3'd4;
-
-//localparam BANK_XY_Q_LSB = 3'd5;
-//localparam BANK_XY_Q_MSB = 3'd6;
-
-//localparam BANK_XY_AUX = 3'd7;
-
-//localparam BANK_XY_ANY = 3'bXXX;
-
-//localparam BANK_XY_AUX_ADDR_N_COEFF = 0;
diff --git a/rtl/modexpng_parameters_x8.vh b/rtl/modexpng_parameters_x8.vh
new file mode 100644
index 0000000..0dcc3d6
--- /dev/null
+++ b/rtl/modexpng_parameters_x8.vh
@@ -0,0 +1,4 @@
+localparam NUM_MULTS = 8;
+localparam NUM_MULTS_AUX = NUM_MULTS + 1;
+localparam NUM_MULTS_HALF = NUM_MULTS / 2;
+localparam NUM_MULTS_HALF_AUX = NUM_MULTS_HALF + 1;
diff --git a/rtl/modexpng_parameters_x8_old.vh b/rtl/modexpng_parameters_x8_old.vh
deleted file mode 100644
index 8734354..0000000
--- a/rtl/modexpng_parameters_x8_old.vh
+++ /dev/null
@@ -1 +0,0 @@
-localparam NUM_MULTS = 8;
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index d6b1ad1..de60d1f 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -17,9 +17,9 @@ module modexpng_recombinator_block
//
// Headers
//
+ `include "modexpng_parameters.vh"
`include "../rtl_1/modexpng_mmm_fsm_old.vh"
- `include "../rtl_1/modexpng_parameters_old.vh"
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
+ //`include "../rtl_1/modexpng_parameters_x8_old.vh"
input clk;
@@ -34,22 +34,22 @@ module modexpng_recombinator_block
input [ 4:0] col_index;
input [ 4:0] col_index_last;
+ input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
input [ 7:0] rd_narrow_xy_addr;
- input [ 1:0] rd_narrow_xy_bank;
- output [ 1:0] rcmb_wide_xy_bank;
+ output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
output [ 7:0] rcmb_wide_xy_addr;
output [ 17:0] rcmb_wide_x_dout;
output [ 17:0] rcmb_wide_y_dout;
output rcmb_wide_xy_valid;
- output [ 1:0] rcmb_narrow_xy_bank;
+ output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
output [ 7:0] rcmb_narrow_xy_addr;
output [ 17:0] rcmb_narrow_x_dout;
output [ 17:0] rcmb_narrow_y_dout;
output rcmb_narrow_xy_valid;
- output [ 1:0] rdct_narrow_xy_bank;
+ output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
output [ 7:0] rdct_narrow_xy_addr;
output [ 17:0] rdct_narrow_x_dout;
output [ 17:0] rdct_narrow_y_dout;
@@ -167,7 +167,7 @@ module modexpng_recombinator_block
function calc_square_triangle_valid_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -182,7 +182,7 @@ module modexpng_recombinator_block
function calc_square_valid_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -192,7 +192,7 @@ module modexpng_recombinator_block
function calc_triangle_valid_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -202,7 +202,7 @@ module modexpng_recombinator_block
function calc_rectangle_valid_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -217,7 +217,7 @@ module modexpng_recombinator_block
function calc_triangle_aux_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -232,7 +232,7 @@ module modexpng_recombinator_block
function [7:0] calc_square_triangle_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -258,7 +258,7 @@ module modexpng_recombinator_block
function [7:0] calc_square_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -268,7 +268,7 @@ module modexpng_recombinator_block
function [7:0] calc_triangle_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -278,7 +278,7 @@ module modexpng_recombinator_block
function [7:0] calc_rectangle_bitmap_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -313,7 +313,7 @@ module modexpng_recombinator_block
function [2:0] calc_square_triangle_index_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -339,7 +339,7 @@ module modexpng_recombinator_block
function [2:0] calc_square_index_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -349,7 +349,7 @@ module modexpng_recombinator_block
function [2:0] calc_triangle_index_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -359,7 +359,7 @@ module modexpng_recombinator_block
function [2:0] calc_rectangle_index_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] slim_bram_xy_bank_value;
+ input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value;
input [7:0] slim_bram_xy_addr_value;
begin
//
@@ -385,7 +385,7 @@ module modexpng_recombinator_block
function calc_square_rectangle_purge_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
//
@@ -400,7 +400,7 @@ module modexpng_recombinator_block
function calc_square_purge_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -410,7 +410,7 @@ module modexpng_recombinator_block
function calc_rectangle_purge_lsb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
begin
calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
@@ -420,7 +420,7 @@ module modexpng_recombinator_block
function calc_square_valid_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -436,7 +436,7 @@ module modexpng_recombinator_block
function calc_rectangle_valid_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -452,7 +452,7 @@ module modexpng_recombinator_block
function [7:0] calc_square_bitmap_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -469,7 +469,7 @@ module modexpng_recombinator_block
function [7:0] calc_rectangle_bitmap_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -485,7 +485,7 @@ module modexpng_recombinator_block
function calc_square_purge_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -501,7 +501,7 @@ module modexpng_recombinator_block
function calc_rectangle_purge_msb;
input [4:0] col_index_value;
input [4:0] col_index_last_value;
- input [1:0] narrow_xy_bank_value;
+ input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
input [7:0] narrow_xy_addr_value;
input [7:0] index_last_value;
begin
@@ -783,19 +783,19 @@ module modexpng_recombinator_block
end
- reg [ 1:0] wide_xy_bank;
+ reg [ BANK_ADDR_W -1:0] wide_xy_bank;
reg [ 7:0] wide_xy_addr;
reg [17:0] wide_x_dout;
reg [17:0] wide_y_dout;
reg wide_xy_valid = 1'b0;
- reg [ 1:0] narrow_xy_bank;
+ reg [ BANK_ADDR_W -1:0] narrow_xy_bank;
reg [ 7:0] narrow_xy_addr;
reg [17:0] narrow_x_dout;
reg [17:0] narrow_y_dout;
reg narrow_xy_valid = 1'b0;
- reg [ 1:0] rdct_xy_bank;
+ reg [ BANK_ADDR_W -1:0] rdct_xy_bank;
reg [ 7:0] rdct_xy_addr;
reg [17:0] rdct_x_dout;
reg [17:0] rdct_y_dout;
@@ -883,7 +883,7 @@ module modexpng_recombinator_block
endtask
task _update_wide;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -898,7 +898,7 @@ module modexpng_recombinator_block
endtask
task _update_narrow;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -913,7 +913,7 @@ module modexpng_recombinator_block
endtask
task _update_rdct;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -928,7 +928,7 @@ module modexpng_recombinator_block
endtask
task set_wide;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -938,7 +938,7 @@ module modexpng_recombinator_block
endtask
task set_narrow;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -948,7 +948,7 @@ module modexpng_recombinator_block
endtask
task set_rdct;
- input [ 1:0] bank;
+ input [ BANK_ADDR_W -1:0] bank;
input [ 7:0] addr;
input [17:0] dout_x;
input [17:0] dout_y;
@@ -959,19 +959,19 @@ module modexpng_recombinator_block
task clear_wide;
begin
- _update_wide(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
end
endtask
task clear_narrow;
begin
- _update_narrow(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
end
endtask
task clear_rdct;
begin
- _update_rdct(2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+ _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
end
endtask
@@ -1074,16 +1074,16 @@ module modexpng_recombinator_block
//
case (rcmb_xy_valid)
//
- 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_ABH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+ 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
else clear_wide;
//
- 2'b01: set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
2'b10: if (cnt_msb < 8'd2) clear_wide;
- else set_wide(BANK_WIDE_ABH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+ else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
//
- 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_ABH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
- else set_wide(BANK_WIDE_ABL, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+ 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
+ else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
//
endcase
//
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
deleted file mode 100644
index 1ecf56a..0000000
--- a/rtl/modexpng_recombinator_cell.v
+++ /dev/null
@@ -1,35 +0,0 @@
-module modexpng_recombinator_cell
-(
- clk,
- ce, clr,
- din, dout
-);
-
- input clk;
- input ce;
- input clr;
- input [46:0] din;
- output [15:0] dout;
-
- reg [14:0] z;
- reg [16:0] y;
- reg [17:0] x;
- //reg [15:0] w;
-
- //assign dout = w;
- assign dout = x[15:0];
-
- wire [14:0] din_z = din[46:32]; // TODO: maybe determine more precise bound here
- wire [15:0] din_y = din[31:16];
- wire [15:0] din_x = din[15: 0];
-
- always @(posedge clk)
- //
- if (ce) begin
- z <= din_z;
- y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z};
- x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {{16{1'b0}}, x[17:16]};
- //w <= clr ? {16{1'bX}} : x[15:0];
- end
-
-endmodule
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index 0f5e461..aafb38c 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -17,8 +17,9 @@ module modexpng_reductor
//
// Headers
//
+ `include "modexpng_parameters.vh"
//`include "../rtl_1/modexpng_mmm_fsm.vh"
- `include "../rtl_1/modexpng_parameters_old.vh"
+
//`include "../rtl_1/modexpng_parameters_x8.vh"
@@ -39,12 +40,12 @@ module modexpng_reductor
input [ 7:0] rd_narrow_xy_addr;
input [ 1:0] rd_narrow_xy_bank;
*/
- input [ 1:0] rd_wide_xy_bank_aux;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
input [ 7:0] rd_wide_xy_addr_aux;
input [ 17:0] rd_wide_x_dout_aux;
input [ 17:0] rd_wide_y_dout_aux;
//
- input [ 1:0] rcmb_final_xy_bank;
+ input [ BANK_ADDR_W -1:0] rcmb_final_xy_bank;
input [ 7:0] rcmb_final_xy_addr;
input [ 17:0] rcmb_final_x_dout;
input [ 17:0] rcmb_final_y_dout;
@@ -60,7 +61,7 @@ module modexpng_reductor
// Ready
//
reg rdy_reg = 1'b1;
- reg busy_now = 1'b0;
+ wire busy_now;
assign rdy = rdy_reg;
@@ -81,9 +82,9 @@ module modexpng_reductor
reg rcmb_xy_valid_dly2 = 1'b0;
reg rcmb_xy_valid_dly3 = 1'b0;
- reg [2:0] rcmb_xy_bank_dly1;
- reg [2:0] rcmb_xy_bank_dly2;
- reg [2:0] rcmb_xy_bank_dly3;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2;
+ reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3;
reg [7:0] rcmb_xy_addr_dly1;
reg [7:0] rcmb_xy_addr_dly2;
@@ -236,7 +237,7 @@ module modexpng_reductor
//
clear_rdct;
//
- if (busy_now && rcmb_xy_valid_dly3)
+ if (rcmb_xy_valid_dly3)
//
case (rcmb_xy_bank_dly3)
@@ -258,12 +259,25 @@ module modexpng_reductor
//
// Busy
//
+ reg busy_next = 1'b0;
+ reg [2:0] busy_now_shreg = {3{1'b0}};
+
+ assign busy_now = busy_now_shreg[2];
+
+ always @(posedge clk)
+ //
+ if (rst) busy_now_shreg <= {3{1'b0}};
+ else begin
+ if (rdy && ena) busy_now_shreg <= {3{1'b1}};
+ else busy_now_shreg <= {busy_now_shreg[1:0], busy_next};
+ end
+
always @(posedge clk)
//
- if (rst) busy_now <= 1'b0;
+ if (rst) busy_next <= 1'b0;
else begin
- if (rdy && ena) busy_now <= 1'b1;
- //if (!rdy && !busy_now) rdy <= 1'b1;
+ if (rdy && ena) busy_next <= 1'b1;
+ if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0;
end
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v
new file mode 100644
index 0000000..0295697
--- /dev/null
+++ b/rtl/modexpng_sdp_36k_wrapper.v
@@ -0,0 +1,72 @@
+module modexpng_sdp_36k_wrapper
+(
+ clk,
+
+ ena, wea,
+ addra, dina,
+
+ enb, regceb,
+ addrb, doutb
+);
+
+
+ //
+ // Headers
+ //
+ `include "../rtl/modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+
+ input ena;
+ input wea;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
+ input [ WORD_EXT_W -1:0] dina;
+
+ input enb;
+ input regceb;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
+ output [ WORD_EXT_W -1:0] doutb;
+
+ //
+ // BRAM_SDP_MACRO
+ //
+ BRAM_SDP_MACRO #
+ (
+ .DEVICE ("7SERIES"),
+
+ .BRAM_SIZE ("36Kb"),
+
+ .WRITE_WIDTH (WORD_EXT_W),
+ .READ_WIDTH (WORD_EXT_W),
+
+ .DO_REG (1),
+ .WRITE_MODE ("READ_FIRST"),
+
+ .SRVAL (72'h000000000000000000),
+ .INIT (72'h000000000000000000),
+
+ .INIT_FILE ("NONE"),
+ .SIM_COLLISION_CHECK ("NONE")
+ )
+ BRAM_SDP_MACRO_inst
+ (
+ .RST (1'b0),
+
+ .WRCLK (clk),
+ .WREN (ena),
+ .WE ({2{wea}}),
+ .WRADDR (addra),
+ .DI (dina),
+
+ .RDCLK (clk),
+ .RDEN (enb),
+ .REGCE (regceb),
+ .RDADDR (addrb),
+ .DO (doutb)
+ );
+
+endmodule
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index d6f9fb1..be04c7c 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -32,49 +32,46 @@ module modexpng_storage_block
rd_narrow_y_dout
);
-
//
// Headers
//
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
-
+ `include "modexpng_parameters.vh"
//
// Ports
//
- input clk;
- input rst;
-
- input wr_wide_xy_ena;
- input [ 1:0] wr_wide_xy_bank;
- input [ 7:0] wr_wide_xy_addr;
- input [17:0] wr_wide_x_din;
- input [17:0] wr_wide_y_din;
+ input clk;
+ input rst;
+
+ input wr_wide_xy_ena;
+ input [ BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_wide_x_din;
+ input [ WORD_EXT_W -1:0] wr_wide_y_din;
- input wr_narrow_xy_ena;
- input [ 1:0] wr_narrow_xy_bank;
- input [ 7:0] wr_narrow_xy_addr;
- input [17:0] wr_narrow_x_din;
- input [17:0] wr_narrow_y_din;
-
- input rd_wide_xy_ena;
- input rd_wide_xy_ena_aux;
- input [ 1:0] rd_wide_xy_bank;
- input [ 1:0] rd_wide_xy_bank_aux;
- input [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
- input [ 8-1:0] rd_wide_xy_addr_aux;
- output [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
- output [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
- output [ 18-1:0] rd_wide_x_dout_aux;
- output [ 18-1:0] rd_wide_y_dout_aux;
+ input wr_narrow_xy_ena;
+ input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ input [ WORD_EXT_W -1:0] wr_narrow_y_din;
+
+ input rd_wide_xy_ena;
+ input rd_wide_xy_ena_aux;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
+ input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
+ output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
+ output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
- input rd_narrow_xy_ena;
- input [ 1:0] rd_narrow_xy_bank;
- input [ 7:0] rd_narrow_xy_addr;
- output [18-1:0] rd_narrow_x_dout;
- output [18-1:0] rd_narrow_y_dout;
+ input rd_narrow_xy_ena;
+ input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
-
//
// Internal Registers
//
@@ -82,43 +79,44 @@ module modexpng_storage_block
reg rd_wide_xy_reg_ena_aux = 1'b0;
reg rd_narrow_xy_reg_ena = 1'b0;
- always @(posedge clk) begin
- //
- rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena;
- rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux;
- rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena;
+ always @(posedge clk)
//
- end
-
+ if (rst) begin
+ rd_wide_xy_reg_ena <= 1'b0;
+ rd_wide_xy_reg_ena_aux <= 1'b0;
+ rd_narrow_xy_reg_ena <= 1'b0;
+ end else begin
+ rd_wide_xy_reg_ena <= rd_wide_xy_ena;
+ rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
+ rd_narrow_xy_reg_ena <= rd_narrow_xy_ena;
+ end
//
// Helper Signals
//
- wire [2+8-1:0] wr_wide_xy_offset;
- wire [2+8-1:0] rd_wide_xy_offset[0:NUM_MULTS/2-1];
- wire [2+8-1:0] rd_wide_xy_offset_aux;
- wire [2+8-1:0] wr_narrow_xy_offset;
- wire [2+8-1:0] rd_narrow_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1];
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
+ wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
- assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
- assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr};
-
+ assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
+ assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
//
// "Wide" Storage
//
genvar z;
- generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+ generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
begin : gen_wide_bram
//
- assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[8*z+:8]};
+ assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
//
- ip_bram_18k wide_bram_x
+ modexpng_sdp_36k_wrapper wide_bram_x
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -128,13 +126,12 @@ module modexpng_storage_block
.enb (rd_wide_xy_ena),
.regceb (rd_wide_xy_reg_ena),
.addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_x_dout[18*z+:18])
+ .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
- ip_bram_18k wide_bram_y
+ modexpng_sdp_36k_wrapper wide_bram_y
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -144,20 +141,18 @@ module modexpng_storage_block
.enb (rd_wide_xy_ena),
.regceb (rd_wide_xy_reg_ena),
.addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_y_dout[18*z+:18])
+ .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W])
);
//
end
endgenerate
-
//
// Auxilary Storage
//
- ip_bram_18k wide_bram_x_aux
+ modexpng_sdp_36k_wrapper wide_bram_x_aux
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -170,10 +165,9 @@ module modexpng_storage_block
.doutb (rd_wide_x_dout_aux)
);
//
- ip_bram_18k wide_bram_y_aux
+ modexpng_sdp_36k_wrapper wide_bram_y_aux
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_wide_xy_ena),
.wea (wr_wide_xy_ena),
@@ -186,14 +180,12 @@ module modexpng_storage_block
.doutb (rd_wide_y_dout_aux)
);
-
//
// "Narrow" Storage
//
- ip_bram_18k narrow_bram_x
+ modexpng_sdp_36k_wrapper narrow_bram_x
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_narrow_xy_ena),
.wea (wr_narrow_xy_ena),
@@ -206,10 +198,9 @@ module modexpng_storage_block
.doutb (rd_narrow_x_dout)
);
- ip_bram_18k narrow_bram_y
+ modexpng_sdp_36k_wrapper narrow_bram_y
(
- .clka (clk),
- .clkb (clk),
+ .clk (clk),
.ena (wr_narrow_xy_ena),
.wea (wr_narrow_xy_ena),
@@ -222,5 +213,4 @@ module modexpng_storage_block
.doutb (rd_narrow_y_dout)
);
-
endmodule
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index fa1e4a1..e5ac83f 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -43,70 +43,69 @@ module modexpng_storage_manager
//
// Headers
//
- `include "../rtl_1/modexpng_parameters_x8_old.vh"
+ `include "modexpng_parameters.vh"
//
// Ports
//
- input clk;
- input rst;
+ input clk;
+ input rst;
- output wr_wide_xy_ena;
- output [ 1:0] wr_wide_xy_bank;
- output [ 7:0] wr_wide_xy_addr;
- output [17:0] wr_wide_x_din;
- output [17:0] wr_wide_y_din;
+ output wr_wide_xy_ena;
+ output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ output [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ output [ WORD_EXT_W -1:0] wr_wide_x_din;
+ output [ WORD_EXT_W -1:0] wr_wide_y_din;
- output wr_narrow_xy_ena;
- output [ 1:0] wr_narrow_xy_bank;
- output [ 7:0] wr_narrow_xy_addr;
- output [17:0] wr_narrow_x_din;
- output [17:0] wr_narrow_y_din;
+ output wr_narrow_xy_ena;
+ output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ output [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ output [ WORD_EXT_W -1:0] wr_narrow_y_din;
- input ext_wide_xy_ena;
- input [ 1:0] ext_wide_xy_bank;
- input [ 7:0] ext_wide_xy_addr;
- input [17:0] ext_wide_x_din;
- input [17:0] ext_wide_y_din;
+ input ext_wide_xy_ena;
+ input [BANK_ADDR_W -1:0] ext_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] ext_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] ext_wide_x_din;
+ input [ WORD_EXT_W -1:0] ext_wide_y_din;
- input ext_narrow_xy_ena;
- input [ 1:0] ext_narrow_xy_bank;
- input [ 7:0] ext_narrow_xy_addr;
- input [17:0] ext_narrow_x_din;
- input [17:0] ext_narrow_y_din;
+ input ext_narrow_xy_ena;
+ input [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] ext_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] ext_narrow_x_din;
+ input [ WORD_EXT_W -1:0] ext_narrow_y_din;
input rcmb_wide_xy_ena;
- input [ 1:0] rcmb_wide_xy_bank;
+ input [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
input [ 7:0] rcmb_wide_xy_addr;
input [17:0] rcmb_wide_x_din;
input [17:0] rcmb_wide_y_din;
input rcmb_narrow_xy_ena;
- input [ 1:0] rcmb_narrow_xy_bank;
+ input [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
input [ 7:0] rcmb_narrow_xy_addr;
input [17:0] rcmb_narrow_x_din;
input [17:0] rcmb_narrow_y_din;
-
- reg wr_wide_xy_ena_reg = 1'b0;
- reg [ 1:0] wr_wide_xy_bank_reg;
- reg [ 7:0] wr_wide_xy_addr_reg;
- reg [17:0] wr_wide_x_din_reg;
- reg [17:0] wr_wide_y_din_reg;
+ reg wr_wide_xy_ena_reg = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
+ reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
+ reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg;
+ reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg;
- reg wr_narrow_xy_ena_reg = 1'b0;
- reg [ 1:0] wr_narrow_xy_bank_reg;
- reg [ 7:0] wr_narrow_xy_addr_reg;
- reg [17:0] wr_narrow_x_din_reg;
- reg [17:0] wr_narrow_y_din_reg;
+ reg wr_narrow_xy_ena_reg = 1'b0;
+ reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg;
+ reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg;
+ reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg;
+ reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg;
task _update_wide;
- input xy_ena;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input xy_ena;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
wr_wide_xy_ena_reg <= xy_ena;
wr_wide_xy_bank_reg <= xy_bank;
@@ -118,10 +117,10 @@ module modexpng_storage_manager
task _update_narrow;
input xy_ena;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
wr_narrow_xy_ena_reg <= xy_ena;
wr_narrow_xy_bank_reg <= xy_bank;
@@ -132,20 +131,20 @@ module modexpng_storage_manager
endtask
task enable_wide;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
_update_wide(1'b1, xy_bank, xy_addr, x_din, y_din);
end
endtask
task enable_narrow;
- input [ 1:0] xy_bank;
- input [ 7:0] xy_addr;
- input [17:0] x_din;
- input [17:0] y_din;
+ input [BANK_ADDR_W -1:0] xy_bank;
+ input [ OP_ADDR_W -1:0] xy_addr;
+ input [ WORD_EXT_W -1:0] x_din;
+ input [ WORD_EXT_W -1:0] y_din;
begin
_update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din);
end
@@ -153,13 +152,13 @@ module modexpng_storage_manager
task disable_wide;
begin
- _update_wide(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
end
endtask
task disable_narrow;
begin
- _update_narrow(1'b0, 2'bXX, 8'hXX, {18{1'bX}}, {18{1'bX}});
+ _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
end
endtask
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the Commits
mailing list