[Cryptech-Commits] [user/shatov/modexpng] 01/05: Redesigned storage modules, added top-level module, added I/O storage space.

git at cryptech.is git at cryptech.is
Thu Oct 3 13:51:33 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit 0b4b42da734c1164b65a334351274f946b2d4dcb
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Oct 3 16:38:18 2019 +0300

    Redesigned storage modules, added top-level module, added I/O storage space.
---
 rtl/_modexpng_mmm_dual_x8.v                        |  961 ---------------
 rtl/_modexpng_recombinator_block.v                 | 1225 --------------------
 rtl/_modexpng_storage_block.v                      |  219 ----
 rtl/_modexpng_storage_manager.v                    |  199 ----
 rtl/modexpng_core_top.v                            |   41 +
 rtl/modexpng_mmm_dual.v                            |   28 +-
 rtl/modexpng_parameters.vh                         |    3 +
 rtl/modexpng_recombinator_block.v                  |    5 +-
 rtl/modexpng_sdp_36k_wrapper.v                     |    2 +-
 ...rapper.v => modexpng_sdp_36k_x16_x32_wrapper.v} |   37 +-
 ...rapper.v => modexpng_sdp_36k_x32_x16_wrapper.v} |   37 +-
 rtl/modexpng_storage_block.v                       |  196 +++-
 rtl/modexpng_tdp_36k_x16_x32_wrapper.v             |   88 ++
 13 files changed, 360 insertions(+), 2681 deletions(-)

diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v
deleted file mode 100644
index ffd5ccf..0000000
--- a/rtl/_modexpng_mmm_dual_x8.v
+++ /dev/null
@@ -1,961 +0,0 @@
-module modexpng_mmm_dual_x8
-(
-    clk, rst,
-    
-    ena, rdy,
-
-    ladder_mode,
-    word_index_last,
-    word_index_last_minus1,
-    
-    sel_wide_in,
-    sel_narrow_in,
-    sel_wide_out,
-    sel_narrow_out,
-    
-    rd_wide_xy_ena,
-    rd_wide_xy_ena_aux,
-    rd_wide_xy_bank,
-    rd_wide_xy_bank_aux,
-    rd_wide_xy_addr,
-    rd_wide_xy_addr_aux,
-    rd_wide_x_dout,
-    rd_wide_y_dout,
-    rd_wide_x_dout_aux,
-    rd_wide_y_dout_aux,
-    
-    rd_narrow_xy_ena,
-    rd_narrow_xy_bank,
-    rd_narrow_xy_addr,
-    rd_narrow_x_dout,
-    rd_narrow_y_dout,
-    
-    rcmb_wide_xy_bank,
-    rcmb_wide_xy_addr,
-    rcmb_wide_x_dout,
-    rcmb_wide_y_dout,
-    rcmb_wide_xy_valid,
-    
-    rcmb_narrow_xy_bank,
-    rcmb_narrow_xy_addr,
-    rcmb_narrow_x_dout,
-    rcmb_narrow_y_dout,
-    rcmb_narrow_xy_valid,
-    
-    rcmb_xy_bank,
-    rcmb_xy_addr,
-    rcmb_x_dout,
-    rcmb_y_dout,
-    rcmb_xy_valid,
-    
-    rdct_ena, rdct_rdy
-);
-
-
-    //
-    // Headers
-    //
-    `include "../rtl/modexpng_mmm_fsm.vh"
-    `include "../rtl/modexpng_parameters.vh"
-
-
-    //
-    // Ports
-    //
-    input                                             clk;
-    input                                             rst;
-    
-    input                                             ena;
-    output                                            rdy;
-    
-    input                                             ladder_mode;
-    input  [                 OP_ADDR_W -1:0] word_index_last;
-    input  [                 OP_ADDR_W -1:0] word_index_last_minus1;
-    
-    input  [                 BANK_ADDR_W    -1:0] sel_wide_in;
-    input  [                 BANK_ADDR_W    -1:0] sel_narrow_in;
-    input  [                 BANK_ADDR_W    -1:0] sel_wide_out;
-    input  [                 BANK_ADDR_W    -1:0] sel_narrow_out;
-    
-    output                                            rd_wide_xy_ena;
-    output                                            rd_wide_xy_ena_aux;
-    output [                 BANK_ADDR_W    -1:0] rd_wide_xy_bank;
-    output [                 BANK_ADDR_W    -1:0] rd_wide_xy_bank_aux;
-    output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
-    output [                 OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
-    input  [NUM_MULTS_HALF * WORD_EXT_W     -1:0] rd_wide_x_dout;
-    input  [NUM_MULTS_HALF * WORD_EXT_W     -1:0] rd_wide_y_dout;
-    input  [                 WORD_EXT_W     -1:0] rd_wide_x_dout_aux;
-    input  [                 WORD_EXT_W     -1:0] rd_wide_y_dout_aux;
-
-    output                                            rd_narrow_xy_ena;
-    output [                 BANK_ADDR_W    -1:0] rd_narrow_xy_bank;
-    output [                 OP_ADDR_W -1:0] rd_narrow_xy_addr;
-    input  [                 WORD_EXT_W     -1:0] rd_narrow_x_dout;
-    input  [                 WORD_EXT_W     -1:0] rd_narrow_y_dout;
-
-    output [                 BANK_ADDR_W    -1:0] rcmb_wide_xy_bank;
-    output [                 OP_ADDR_W -1:0] rcmb_wide_xy_addr;
-    output [                 WORD_EXT_W     -1:0] rcmb_wide_x_dout;
-    output [                 WORD_EXT_W     -1:0] rcmb_wide_y_dout;
-    output                                            rcmb_wide_xy_valid;
-
-    output [                 BANK_ADDR_W    -1:0] rcmb_narrow_xy_bank;
-    output [                 OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
-    output [                 WORD_EXT_W     -1:0] rcmb_narrow_x_dout;
-    output [                 WORD_EXT_W     -1:0] rcmb_narrow_y_dout;
-    output                                            rcmb_narrow_xy_valid;
-
-    output [                 BANK_ADDR_W    -1:0] rcmb_xy_bank;
-    output [                 OP_ADDR_W -1:0] rcmb_xy_addr;
-    output [                 WORD_EXT_W     -1:0] rcmb_x_dout;
-    output [                 WORD_EXT_W     -1:0] rcmb_y_dout;
-    output                                            rcmb_xy_valid;
-    
-    output                                            rdct_ena;
-    input                                             rdct_rdy;
-
-    
-    //
-    // FSM Declaration
-    //
-    reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE;
-    reg [MMM_FSM_STATE_W-1:0] fsm_state_next;
-    
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square;
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle;
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle;
-
-    
-    //
-    // FSM Process
-    //
-    always @(posedge clk)
-        //
-        if (rst) fsm_state <= MMM_FSM_STATE_IDLE;
-        else     fsm_state <= fsm_state_next;
-
-        
-    //
-    // Storage Control Interface
-    //
-    reg                            wide_xy_ena = 1'b0;
-    reg                            wide_xy_ena_aux = 1'b0;
-    reg  [   BANK_ADDR_W -1:0] wide_xy_bank;
-    reg  [   BANK_ADDR_W -1:0] wide_xy_bank_aux;
-    reg  [OP_ADDR_W -1:0] wide_xy_addr[0:3];
-    reg  [OP_ADDR_W -1:0] wide_xy_addr_aux;
-    
-    reg                            narrow_xy_ena = 1'b0;
-    reg  [   BANK_ADDR_W -1:0] narrow_xy_bank;
-    reg  [OP_ADDR_W -1:0] narrow_xy_addr;
-    reg  [OP_ADDR_W -1:0] narrow_xy_addr_dly;
-    
-    assign rd_wide_xy_ena      = wide_xy_ena;
-    assign rd_wide_xy_ena_aux  = wide_xy_ena_aux;
-    assign rd_wide_xy_bank     = wide_xy_bank;
-    assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
-    assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
-
-    assign rd_narrow_xy_ena  = narrow_xy_ena;
-    assign rd_narrow_xy_bank = narrow_xy_bank;
-    assign rd_narrow_xy_addr = narrow_xy_addr;
-
-    genvar z;
-    generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
-        begin : gen_rd_wide_xy_addr
-            assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
-        end
-    endgenerate
-        
-    //
-    // Column Counter
-    //
-    reg  [4:0] col_index;       // current column index
-    reg  [4:0] col_index_prev;  // delayed column index value
-    reg  [4:0] col_index_last;  // index of the very last column
-    reg  [4:0] col_index_next;  // precomputed next column index
-    reg        col_is_last;     // flag set during the very last column
-
-    always @(posedge clk)
-        //
-        col_index_prev <= col_index;
-
-    //
-    // Column Counter Increment Logic
-    //
-    always @(posedge clk)
-        //
-        case (fsm_state_next)
-            //
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
-                col_index       <= 5'd0;
-                col_index_last  <= word_index_last[7:3];
-                col_index_next  <= 5'd1;
-                col_is_last     <= 1'b0;
-                
-            end
-            //
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
-                col_index <= col_index_next;
-                col_is_last <= col_index_next == col_index_last;
-                col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;   
-            end
-            //
-        endcase
-
-
-    //
-    // Completion Flags
-    //
-    wire square_almost_done_comb;
-    reg  square_almost_done_flop = 1'b0;
-    reg  square_surely_done_flop = 1'b0;
-
-    wire triangle_almost_done_comb;
-    reg  triangle_almost_done_flop = 1'b0;
-    reg  triangle_surely_done_flop = 1'b0;
-    reg  triangle_tardy_done_flop = 1'b0;
-
-    wire rectangle_almost_done_comb;
-    reg  rectangle_almost_done_flop = 1'b0;        
-    reg  rectangle_surely_done_flop = 1'b0;
-    reg  rectangle_tardy_done_flop = 1'b0;
-
-    assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
-    assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index);
-    assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
-
-    //
-    // Square Completion Flags
-    //
-    always @(posedge clk) begin
-        //
-        case (fsm_state)
-            //
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:
-                square_almost_done_flop <= square_almost_done_comb;
-            //
-            default:
-               square_almost_done_flop <= 1'b0;
-           //
-        endcase
-        //
-        square_surely_done_flop <= square_almost_done_flop;
-        //
-    end
-
-    //
-    // Triangle Completion Flags
-    //
-    always @(posedge clk) begin
-        //
-        case (fsm_state)
-            //
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
-                triangle_almost_done_flop <= triangle_almost_done_comb;
-            //
-            default:
-                triangle_almost_done_flop <= 1'b0;
-            //
-        endcase
-        //
-        triangle_surely_done_flop <= triangle_almost_done_flop;
-        triangle_tardy_done_flop  <= triangle_surely_done_flop;
-        //
-    end
-      
-    //
-    // Rectangle Completion Flags
-    //
-    always @(posedge clk) begin
-        //
-        case (fsm_state)
-            //
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
-                rectangle_almost_done_flop <= rectangle_almost_done_comb;
-            //
-            default:
-                rectangle_almost_done_flop <= 1'b0;
-            //
-        endcase
-        //
-        rectangle_surely_done_flop <= rectangle_almost_done_flop;
-        rectangle_tardy_done_flop  <= rectangle_surely_done_flop;
-        //
-    end
-
-
-    //
-    // Narrow Storage Control Logic
-    //
-    always @(posedge clk)
-        //
-        if (rst) narrow_xy_ena <= 1'b0;
-        else begin
-            //
-            // Narrow Address
-            //
-            case (fsm_state_next)
-                //
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   narrow_xy_addr <= 8'd0;
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
-                //
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
-                    8'd0 :  narrow_xy_addr + 1'b1;
-                //
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
-                    8'd1 :  narrow_xy_addr + 1'b1;            
-                //
-                default:                            narrow_xy_addr <= 8'dX;
-                //
-            endcase
-            //
-            // Narrow Bank
-            //
-            case (fsm_state_next)
-                //
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_bank <= sel_narrow_in;
-                //
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
-                    BANK_NARROW_EXT : BANK_NARROW_COEFF;
-                //
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
-                    BANK_NARROW_EXT : BANK_NARROW_Q;            
-                //
-                default:                            narrow_xy_bank <= 2'bXX;
-                //
-            endcase        
-            //
-            case (fsm_state_next)
-                //
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_ena <= ~square_almost_done_flop;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   narrow_xy_ena <= ~rectangle_surely_done_flop;
-                //
-                default:                              narrow_xy_ena <= 1'b0;
-                //
-            endcase
-            //
-        end
-
-
-    //
-    // Wide Storage Control Logic
-    //
-
-    wire [2:0] wide_offset_rom[0:3];
-    
-    generate for (z=1; z<NUM_MULTS; z=z+2)
-        begin : gen_wide_offset_rom
-            assign wide_offset_rom[(z-1)/2] = z[2:0];
-        end
-    endgenerate    
-
-    function  [7:0] wide_xy_addr_next;
-        input [7:0] wide_xy_addr_current;
-        input [7:0] wide_xy_addr_last;
-        begin
-            if (wide_xy_addr_current > 8'd0)
-                wide_xy_addr_next = wide_xy_addr_current - 1'b1;
-            else
-                wide_xy_addr_next = wide_xy_addr_last;
-        end
-    endfunction
-    
-    integer j;
-    always @(posedge clk)
-        //
-        if (rst) begin
-            wide_xy_ena <= 1'b0;
-            wide_xy_ena_aux <= 1'b0;
-        end else begin
-            //
-            // Wide Address
-            //        
-            for (j=0; j<(NUM_MULTS/2); j=j+1)
-                //
-                case (fsm_state_next)
-                    //
-                    // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
-                    //
-                    MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                    MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                    MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
-                    //
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
-                    //
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
-                    //
-                    default:                            wide_xy_addr[j] <= 8'dX;
-                endcase
-            //
-            // Wide Aux Address
-            //
-            case (fsm_state_next)
-                //
-                // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
-                //
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
-                //
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
-                //
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
-                //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
-                    //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
-                //
-                default:                            wide_xy_addr_aux <= 8'dX;
-            endcase
-            //
-            // Wide Bank
-            //
-            case (fsm_state_next)
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:    wide_xy_bank <= sel_wide_in;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  wide_xy_bank <= BANK_WIDE_L;   // ? combine ?
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_L;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_N;            
-                default:                             wide_xy_bank <= 3'bXXX;
-            endcase
-            //
-            // Wide Aux Bank
-            //
-            case (fsm_state_next)
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_bank_aux <= sel_wide_in;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank_aux <= BANK_WIDE_L;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
-                    case (rcmb_xy_bank)
-                        BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
-                        BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
-                        //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
-                        default: wide_xy_bank_aux <= 3'bXXX; 
-                     endcase
-                     else wide_xy_bank_aux <= 3'bXXX;
-                default:                            wide_xy_bank_aux <= 3'bXXX;
-            endcase
-            //
-            // Wide Enable
-            //
-            case (fsm_state_next)
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_ena <= 1'b1;
-                default:                               wide_xy_ena <= 1'b0;
-            endcase
-            //
-            // Wide Aux Enable
-            //
-            case (fsm_state_next)
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  wide_xy_ena_aux <= 1'b1;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
-                default:                            wide_xy_ena_aux <= 1'b0;
-            endcase
-            //
-        end
-        
-        
-    //
-    // Delay Lines
-    //
-    always @(posedge clk)
-        //
-        narrow_xy_addr_dly <= narrow_xy_addr;
-
-    
-    //
-    // DSP Array Logic
-    //
-    reg             dsp_xy_ce_a = 1'b0;
-    reg             dsp_xy_ce_b = 1'b0;
-    reg             dsp_xy_ce_b_dly = 1'b0;
-    reg             dsp_xy_ce_m = 1'b0;
-    reg             dsp_xy_ce_p = 1'b0;
-    reg             dsp_xy_ce_mode = 1'b0;
-    
-    reg  [9   -1:0] dsp_xy_mode_z = {9{1'b1}};
-    
-    wire [5*18-1:0] dsp_x_a;
-    wire [5*18-1:0] dsp_y_a;
-
-    reg  [1*16-1:0] dsp_x_b;
-    reg  [1*16-1:0] dsp_y_b;
-    
-    reg  [ 1:0] dsp_xy_b_carry;
-
-    wire [9*47-1:0] dsp_x_p;            
-    wire [9*47-1:0] dsp_y_p;
-        
-    //generate for (z=0; z<(NUM_MULTS/2); z=z+1)
-        //begin : gen_dsp_xy_a_split
-            //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z];
-            //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z];
-        //end
-    //endgenerate
-    
-    assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
-    assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
-    
-    //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
-    //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
-            
-    always @(posedge clk)
-        //
-        dsp_xy_ce_b_dly <= dsp_xy_ce_b;
-    
-
-    modexpng_dsp_array_block dsp_array_block_x
-    (
-        .clk            (clk),
-        
-        .ce_a           (dsp_xy_ce_a),
-        .ce_b           (dsp_xy_ce_b),
-        .ce_m           (dsp_xy_ce_m),
-        .ce_p           (dsp_xy_ce_p),
-        .ce_mode        (dsp_xy_ce_mode),
-
-        .mode_z         (dsp_xy_mode_z),
-        
-        .a              (dsp_x_a),
-        .b              (dsp_x_b),
-        .p              (dsp_x_p)
-    );
-
-    modexpng_dsp_array_block dsp_array_block_y
-    (
-        .clk            (clk),
-        
-        .ce_a           (dsp_xy_ce_a),
-        .ce_b           (dsp_xy_ce_b),
-        .ce_m           (dsp_xy_ce_m),
-        .ce_p           (dsp_xy_ce_p),
-        .ce_mode        (dsp_xy_ce_mode),
-
-        .mode_z         (dsp_xy_mode_z),
-        
-        .a              (dsp_y_a),
-        .b              (dsp_y_b),
-        .p              (dsp_y_p)
-    );
-
-
-   
-
-    //
-    // DSP Control Logic
-    //
-    reg narrow_xy_ena_dly1 = 1'b0;
-    reg narrow_xy_ena_dly2 = 1'b0;
-    
-    always @(posedge clk)
-        //
-        if (rst) begin
-            //
-            narrow_xy_ena_dly1 <= 1'b0;
-            narrow_xy_ena_dly2 <= 1'b0;
-            //
-            dsp_xy_ce_a    <= 1'b0;
-            dsp_xy_ce_b    <= 1'b0;
-            dsp_xy_ce_m    <= 1'b0;
-            dsp_xy_ce_p    <= 1'b0;
-            dsp_xy_ce_mode <= 1'b0;
-            //
-        end else begin
-            //
-            narrow_xy_ena_dly1 <= narrow_xy_ena;
-            narrow_xy_ena_dly2 <= narrow_xy_ena_dly1; 
-            //
-            dsp_xy_ce_a    <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
-            dsp_xy_ce_b    <= narrow_xy_ena_dly2;
-            dsp_xy_ce_m    <= dsp_xy_ce_b_dly;
-            dsp_xy_ce_p    <= dsp_xy_ce_m;
-            dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
-            //
-        end    
-        
-    //
-    // DSP Feed Logic
-    //
-    reg dsp_merge_xy_b;
-    
-    always @(posedge clk)
-        //
-        case (fsm_state)
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG:   dsp_merge_xy_b <= 1'b1;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
-        endcase
-
-    //
-    // On-the-fly Carry Recombination
-    //
-    wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
-  
-    always @(posedge clk)
-        //
-        if (narrow_xy_ena_dly2) begin // rewrite
-            //
-            if (!dsp_merge_xy_b) begin
-                dsp_x_b <= rd_narrow_x_dout[15:0];
-                dsp_y_b <= rd_narrow_y_dout[15:0];
-                dsp_xy_b_carry <= 2'b00;
-            end else begin
-                dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0];
-                dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0];
-                dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
-            end                 
-            //
-        end else begin
-            //
-            dsp_x_b <= {16{1'bX}};
-            dsp_y_b <= {16{1'bX}};
-            //
-            dsp_xy_b_carry <= 2'b00;
-            //
-        end
-
-        
-    reg  [9   -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
-        
-         function  [NUM_MULTS:0] calc_mac_mode_z_square;
-        input [        4:0] col_index_value;
-        input [        7:0] narrow_xy_addr_value;
-        begin
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
-                    3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
-                    3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
-                    3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
-                    3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
-                    3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
-                    3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
-                    3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
-                endcase
-            else
-                calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
-        end
-    endfunction
-    
-    function  [NUM_MULTS:0] calc_mac_mode_z_rectangle;
-        input [        4:0] col_index_value;
-        input [        7:0] narrow_xy_addr_value;
-        begin
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
-                    3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
-                    3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
-                    3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
-                    3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
-                    3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
-                    3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
-                    3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
-                endcase
-            else
-                calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
-        end
-    endfunction
-        
-    always @(posedge clk)
-        //
-        case (fsm_state_next)
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:       dsp_xy_mode_z_adv4 <= {9{1'b0}};
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
-            default:                                dsp_xy_mode_z_adv4 <= {9{1'b1}};
-        endcase
-
-    always @(posedge clk) begin
-        dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
-        //
-        dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
-        dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
-        dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
-    end
-        
-
-
-    
-    
-    //
-    // Recombinator
-    //
-    reg  rcmb_ena = 1'b0;
-    wire rcmb_rdy;
-
-    modexpng_recombinator_block recombinator_block
-    (
-        .clk                            (clk),
-        .rst                            (rst),
-        
-        .ena                            (rcmb_ena),
-        .rdy                            (rcmb_rdy),
-        
-        .mmm_fsm_state_next             (fsm_state_next),
-        
-        .word_index_last                (word_index_last),
-        
-        .dsp_xy_ce_p                    (dsp_xy_ce_p),
-        .dsp_x_p                        (dsp_x_p),
-        .dsp_y_p                        (dsp_y_p),
-        
-        .col_index                      (col_index),
-        .col_index_last                 (col_index_last),
-        
-        .rd_narrow_xy_addr                 (narrow_xy_addr),
-        .rd_narrow_xy_bank                 (narrow_xy_bank),
-        
-        .rcmb_wide_xy_bank          (rcmb_wide_xy_bank),
-        .rcmb_wide_xy_addr          (rcmb_wide_xy_addr),
-        .rcmb_wide_x_dout           (rcmb_wide_x_dout),
-        .rcmb_wide_y_dout           (rcmb_wide_y_dout),
-        .rcmb_wide_xy_valid         (rcmb_wide_xy_valid),
-        
-        .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank),
-        .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr),
-        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout),
-        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout),
-        .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid),
-        
-        .rdct_narrow_xy_bank        (rcmb_xy_bank),
-        .rdct_narrow_xy_addr        (rcmb_xy_addr),
-        .rdct_narrow_x_dout         (rcmb_x_dout),
-        .rdct_narrow_y_dout         (rcmb_y_dout),
-        .rdct_narrow_xy_valid       (rcmb_xy_valid)
-
-    );
-    
-    
-    //
-    // Recombinator Enable Logic
-    //    
-    always @(posedge clk)
-        //
-        if (rst) rcmb_ena <= 1'b0;
-        else     rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
-
-        
-    //
-    // Handy Completion Flags
-    //    
-    wire square_done    = square_surely_done_flop;
-    wire triangle_done  = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
-    wire rectangle_done = rectangle_tardy_done_flop;
-    
-
-    //
-    // FSM Transition Logic
-    //
-    assign fsm_state_after_mult_square    = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF   : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT;
-    assign fsm_state_after_mult_triangle  = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
-    assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
-
-    always @* begin
-        //
-        fsm_state_next = MMM_FSM_STATE_IDLE;
-        //
-        case (fsm_state)
-            MMM_FSM_STATE_IDLE:                   fsm_state_next = ena                   ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE;
-                        
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
-            
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square    : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
-            
-            MMM_FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
-
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
-            
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
-            
-            MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
-
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
-            
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
-            
-            MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
-            
-            default:                          fsm_state_next =                         MMM_FSM_STATE_IDLE                   ;
-
-        endcase
-        //
-    end
-
-
-    //
-    // Reductor Control Logic
-    //
-    reg rdct_ena_reg = 1'b0;
-
-    assign rdct_ena = rdct_ena_reg; 
-    
-    always @(posedge clk) // add reset!!!
-        //
-        case (fsm_state)
-           MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
-           default:                             rdct_ena_reg <= 1'b0;
-        endcase
-    
-    
-    
-endmodule
diff --git a/rtl/_modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v
deleted file mode 100644
index 61bf734..0000000
--- a/rtl/_modexpng_recombinator_block.v
+++ /dev/null
@@ -1,1225 +0,0 @@
-module modexpng_recombinator_block
-(
-    clk, rst,
-    ena, rdy,
-    mmm_fsm_state_next,
-    word_index_last,
-    dsp_xy_ce_p,
-    dsp_x_p, dsp_y_p,
-    col_index, col_index_last,
-    rd_narrow_xy_addr, rd_narrow_xy_bank,
-    rcmb_wide_xy_bank,   rcmb_wide_xy_addr,   rcmb_wide_x_dout,   rcmb_wide_y_dout,   rcmb_wide_xy_valid,
-    rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid,
-    rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
-);
-
-
-    //
-    // Headers
-    //
-    `include "../rtl/modexpng_parameters.vh"
-    `include "../rtl/modexpng_mmm_fsm.vh"
-
-
-    input                                         clk;
-    input                                         rst;
-    input                                         ena;
-    output                                        rdy;
-    input  [                MMM_FSM_STATE_W -1:0] mmm_fsm_state_next;
-    input  [                OP_ADDR_W       -1:0] word_index_last;
-    input                                         dsp_xy_ce_p;
-    input  [(NUM_MULTS+1) * MAC_W           -1:0] dsp_x_p;
-    input  [(NUM_MULTS+1) * MAC_W           -1:0] dsp_y_p;
-    input  [                COL_INDEX_W     -1:0] col_index;
-    input  [                COL_INDEX_W     -1:0] col_index_last;
-    
-    input  [                BANK_ADDR_W     -1:0] rd_narrow_xy_bank;
-    input  [                OP_ADDR_W       -1:0] rd_narrow_xy_addr;
-
-    output [                BANK_ADDR_W     -1:0] rcmb_wide_xy_bank;
-    output [                OP_ADDR_W       -1:0] rcmb_wide_xy_addr;
-    output [                WORD_EXT_W      -1:0] rcmb_wide_x_dout;
-    output [                WORD_EXT_W      -1:0] rcmb_wide_y_dout;
-    output                                        rcmb_wide_xy_valid;
-
-    output [                BANK_ADDR_W     -1:0] rcmb_narrow_xy_bank;
-    output [                OP_ADDR_W       -1:0] rcmb_narrow_xy_addr;
-    output [                WORD_EXT_W      -1:0] rcmb_narrow_x_dout;
-    output [                WORD_EXT_W      -1:0] rcmb_narrow_y_dout;
-    output                                        rcmb_narrow_xy_valid;
-
-    output [                BANK_ADDR_W     -1:0] rdct_narrow_xy_bank;
-    output [                OP_ADDR_W       -1:0] rdct_narrow_xy_addr;
-    output [                WORD_EXT_W      -1:0] rdct_narrow_x_dout;
-    output [                WORD_EXT_W      -1:0] rdct_narrow_y_dout;
-    output                                        rdct_narrow_xy_valid;
-
-
-    //
-    // Latches
-    //
-    reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS];
-    reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS];
-
-
-    //
-    // Mapping
-    //
-    wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS];
-    wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS];
-    
-    genvar z;
-    generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
-        begin : gen_dsp_xy_p_split
-            assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
-            assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
-        end
-    endgenerate
-
-
-    //
-    // Delays
-    //
-    reg dsp_xy_ce_p_dly1 = 1'b0;
-
-    always @(posedge clk)
-        //
-        if (rst) dsp_xy_ce_p_dly1 <= 1'b0;
-        else     dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
-
-
-    //
-    // Registers
-    //
-    
-    // valid
-    reg       xy_valid_lsb = 1'b0;
-    reg       xy_aux_lsb   = 1'b0;
-    reg       xy_valid_msb = 1'b0;
-    
-    // bitmap
-    reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
-    reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
-    
-    // index
-    reg [2:0] xy_index_lsb = 3'dX;
-    
-    // purge
-    reg       xy_purge_lsb = 1'b0;
-    reg       xy_purge_msb = 1'b0;
-    
-    // valid - latch
-    reg       xy_valid_latch_lsb = 1'b0;
-    
-    // aux - latch
-    reg       xy_aux_latch_lsb = 1'b0;
-    
-    // bitmap - latch
-    reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = MULT_BITMAP_ZEROES;
-    reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = MULT_BITMAP_ZEROES;
-
-    // index - latch
-    reg [MAC_INDEX_W-1:0] xy_index_latch_lsb = MAC_INDEX_DONT_CARE;
-    
-    // purge - index
-    reg       xy_purge_latch_lsb = 1'b0;
-    reg       xy_purge_latch_msb = 1'b0;
-
-    // 
-    reg       xy_valid_lsb_adv[1:6];
-    reg       xy_valid_msb_adv[1:6];
-    reg       xy_aux_lsb_adv[1:6];
-    reg [NUM_MULTS-1:0] xy_bitmap_lsb_adv[1:6];
-    reg [NUM_MULTS-1:0] xy_bitmap_msb_adv[1:6];
-    reg [MAC_INDEX_W-1:0] xy_index_lsb_adv[1:6];
-    reg [MAC_INDEX_W-1:0] xy_index_msb_adv[1:6];
-    reg       xy_purge_lsb_adv[1:6];
-    reg       xy_purge_msb_adv[1:6];
-    
-    reg [1:0] rcmb_mode;
-       
-    always @(posedge clk)
-       //
-       if (ena)
-           //
-           case (mmm_fsm_state_next)
-               MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
-               MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
-               MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
-               default:                                 rcmb_mode <= 2'd0;
-           endcase
-
-               
-    integer i;
-    initial for (i=1; i<6; i=i+1) begin
-        xy_valid_lsb_adv[i] = 1'b0;
-        xy_valid_msb_adv[i] = 1'b0;
-        xy_aux_lsb_adv[i] = 1'b0;
-        xy_bitmap_lsb_adv[i] = {8{1'b0}};
-        xy_bitmap_msb_adv[i] = {8{1'b0}};
-        xy_index_lsb_adv[i] = 3'dX;
-        xy_index_msb_adv[i] = 3'dX;
-        xy_purge_lsb_adv[i] = 1'b0;
-        xy_purge_msb_adv[i] = 1'b0;
-    end
-    
-    function        calc_square_triangle_valid_lsb;
-        //
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [  OP_ADDR_W-1:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                calc_square_triangle_valid_lsb = 1'b1;
-            else
-                calc_square_triangle_valid_lsb = 1'b0;
-            //
-        end
-    endfunction
-
-    function                    calc_square_valid_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [  OP_ADDR_W-1:0] narrow_xy_addr_value;
-        begin
-            calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
-        end
-    endfunction
-
-    function        calc_triangle_valid_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
-        end
-    endfunction
-    
-    function        calc_rectangle_valid_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value) 
-                calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
-            else
-                calc_rectangle_valid_lsb = 1'b0;
-            //
-        end
-    endfunction
-
-    function        calc_triangle_aux_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_bank_value == BANK_NARROW_EXT)
-                calc_triangle_aux_lsb = 1'b1;
-            else
-                calc_triangle_aux_lsb = 1'b0;
-            //
-        end
-    endfunction
-    
-    function  [7:0] calc_square_triangle_bitmap_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                //
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
-                    3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
-                    3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
-                    3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
-                    3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
-                    3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
-                    3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
-                    3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
-                endcase
-                //
-            else
-                calc_square_triangle_bitmap_lsb = {8{1'b0}};
-            //
-        end
-    endfunction
-
-    function  [7:0] calc_square_bitmap_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function  [7:0] calc_triangle_bitmap_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function  [7:0] calc_rectangle_bitmap_lsb;
-       input [COL_INDEX_W-1:0] col_index_value;
-       input [COL_INDEX_W-1:0] col_index_last_value;
-       input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-       input [7:0] narrow_xy_addr_value;
-       begin
-           //
-           if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
-               //
-               case (narrow_xy_addr_value[2:0])
-                   3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
-                   3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
-                   3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
-                   3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
-                   3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
-                   3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
-                   3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
-                   3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
-               endcase
-               //
-           else
-               calc_rectangle_bitmap_lsb = {8{1'b0}};
-           //
-        end
-    endfunction
-       
-       /*
-        * These can be simplified (the difference between square/triangle and
-        * rectangle is that the bank is checked or not). A universal function would
-        * accept a parameter that tells it whether it should check the bank or not.
-        * Let's do it later, too early to optimize now, it seems.
-        *
-        *
-        */
-       
-    function  [2:0] calc_square_triangle_index_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                //
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_square_triangle_index_lsb = 3'd0;
-                    3'b001: calc_square_triangle_index_lsb = 3'd1;
-                    3'b010: calc_square_triangle_index_lsb = 3'd2;
-                    3'b011: calc_square_triangle_index_lsb = 3'd3;
-                    3'b100: calc_square_triangle_index_lsb = 3'd4;
-                    3'b101: calc_square_triangle_index_lsb = 3'd5;
-                    3'b110: calc_square_triangle_index_lsb = 3'd6;
-                    3'b111: calc_square_triangle_index_lsb = 3'd7;
-                endcase
-                //
-            else
-                calc_square_triangle_index_lsb = 3'dX;
-            //
-        end
-    endfunction
-
-    function  [2:0] calc_square_index_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function  [2:0] calc_triangle_index_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function  [2:0] calc_rectangle_index_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] slim_bram_xy_bank_value;
-        input [7:0] slim_bram_xy_addr_value;
-        begin
-            //
-            if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
-                //
-                case (slim_bram_xy_addr_value[2:0])
-                    3'b000: calc_rectangle_index_lsb = 3'd0;
-                    3'b001: calc_rectangle_index_lsb = 3'd1;
-                    3'b010: calc_rectangle_index_lsb = 3'd2;
-                    3'b011: calc_rectangle_index_lsb = 3'd3;
-                    3'b100: calc_rectangle_index_lsb = 3'd4;
-                    3'b101: calc_rectangle_index_lsb = 3'd5;
-                    3'b110: calc_rectangle_index_lsb = 3'd6;
-                    3'b111: calc_rectangle_index_lsb = 3'd7;
-                endcase
-                //
-            else
-                calc_rectangle_index_lsb = 3'dX;
-            //
-        end
-    endfunction
-    
-    function        calc_square_rectangle_purge_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value;
-            else
-                calc_square_rectangle_purge_lsb = 1'b0;
-            //
-        end
-    endfunction
-
-    function        calc_square_purge_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function        calc_rectangle_purge_lsb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
-    endfunction
-
-    function        calc_square_valid_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value)
-                calc_square_valid_msb = 1'b1;
-            else
-                calc_square_valid_msb = 1'b0;
-            //
-        end
-    endfunction
-
-    function        calc_rectangle_valid_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
-                calc_rectangle_valid_msb = 1'b1;
-            else
-                calc_rectangle_valid_msb = 1'b0;
-            //
-        end
-    endfunction
-    
-    function  [7:0] calc_square_bitmap_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value) begin
-                calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
-                calc_square_bitmap_msb[6:0] = 7'b1111111;
-            end else
-                calc_square_bitmap_msb[7:0] = 8'b00000000;
-            //
-        end
-    endfunction
-
-    function  [7:0] calc_rectangle_bitmap_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin
-                calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
-            end else
-                calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
-            //
-        end
-    endfunction
-
-    function        calc_square_purge_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value)
-                calc_square_purge_msb = col_index_value == col_index_last_value;
-            else
-                calc_square_purge_msb = 1'b0;
-            //
-        end
-    endfunction
-
-    function        calc_rectangle_purge_msb;
-        input [COL_INDEX_W-1:0] col_index_value;
-        input [COL_INDEX_W-1:0] col_index_last_value;
-        input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
-                calc_rectangle_purge_msb = col_index_value == col_index_last_value;
-            else
-                calc_rectangle_purge_msb = 1'b0;
-            //
-        end
-    endfunction
-
-    
-    reg         rcmb_xy_lsb_ce = 1'b0;
-    reg         rcmb_xy_lsb_ce_aux;
-    reg  [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
-    wire        rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
-    reg         rcmb_xy_lsb_clr;
-
-    reg  [46:0] rcmb_x_lsb_din;
-    reg  [46:0] rcmb_y_lsb_din;
-    wire [15:0] rcmb_x_lsb_dout;
-    wire [15:0] rcmb_y_lsb_dout;
-
-    reg         rcmb_xy_msb_ce = 1'b0;
-    reg  [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
-    wire        rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
-    reg         rcmb_xy_msb_clr;
-    
-    reg  [46:0] rcmb_x_msb_din;
-    reg  [46:0] rcmb_y_msb_din;
-    wire [15:0] rcmb_x_msb_dout;
-    wire [15:0] rcmb_y_msb_dout;
-    
-    modexpng_recombinator_cell recomb_x_lsb
-    (
-        .clk    (clk),
-        .ce     (rcmb_xy_lsb_ce_combined),
-        .clr    (rcmb_xy_lsb_clr),
-        .din    (rcmb_x_lsb_din),
-        .dout   (rcmb_x_lsb_dout)
-    );
-    modexpng_recombinator_cell recomb_y_lsb
-    (
-        .clk    (clk),
-        .ce     (rcmb_xy_lsb_ce_combined),
-        .clr    (rcmb_xy_lsb_clr),
-        .din    (rcmb_y_lsb_din),
-        .dout   (rcmb_y_lsb_dout)
-    );
-
-    modexpng_recombinator_cell recomb_x_msb
-    (
-        .clk    (clk),
-        .ce     (rcmb_xy_msb_ce_combined),
-        .clr    (rcmb_xy_msb_clr),
-        .din    (rcmb_x_msb_din),
-        .dout   (rcmb_x_msb_dout)
-    );
-    
-    modexpng_recombinator_cell recomb_y_msb
-    (
-        .clk    (clk),
-        .ce     (rcmb_xy_msb_ce_combined),
-        .clr    (rcmb_xy_msb_clr),
-        .din    (rcmb_y_msb_din),
-        .dout   (rcmb_y_msb_dout)
-    );
-
-    always @(posedge clk) begin
-        //
-        rcmb_xy_lsb_ce <= xy_valid_latch_lsb;
-        rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
-        rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
-        //
-        if (xy_purge_latch_lsb)
-            rcmb_xy_lsb_ce_purge <= 3'b111;
-        else
-            rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
-        //
-        if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1])
-            rcmb_xy_msb_ce_purge = 2'b11;
-        else
-            rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
-        //
-    end
-
-
-    always @(posedge clk)
-        //
-        if (ena) begin
-            rcmb_xy_lsb_clr <= 1'b1;
-            rcmb_xy_msb_clr <= 1'b1;
-        end else begin
-            if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0;
-            if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0;
-        end
-
-    always @(posedge clk)
-        //
-        if (xy_valid_latch_lsb) begin
-            rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
-            rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
-        end else if (xy_aux_latch_lsb) begin
-            rcmb_x_lsb_din <= dsp_x_p_latch[8];
-            rcmb_y_lsb_din <= dsp_y_p_latch[8];
-        end else begin
-            rcmb_x_lsb_din <= {47{1'b0}};
-            rcmb_y_lsb_din <= {47{1'b0}};
-        end
-
-    always @(posedge clk)
-        //
-        if (xy_bitmap_latch_msb[0]) begin
-            rcmb_x_msb_din <= dsp_x_p_latch[0];
-            rcmb_y_msb_din <= dsp_y_p_latch[0];
-        end else begin
-            rcmb_x_msb_din <= {47{1'b0}};
-            rcmb_y_msb_din <= {47{1'b0}};
-        end
-
-
-    always @(posedge clk)
-        //
-        case (mmm_fsm_state_next)
-            //
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
-                //
-                xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_aux_lsb_adv   [6] <= 1'b0;
-                xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                //
-                xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                //
-            end
-            //
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
-                //
-                xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_aux_lsb_adv   [6] <= calc_triangle_aux_lsb   (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_purge_lsb_adv [6] <= 1'b0;
-                //
-                xy_valid_msb_adv [6] <= 1'b0;
-                xy_bitmap_msb_adv[6] <= {8{1'b0}};
-                xy_purge_msb_adv [6] <= 1'b0;
-                //
-            end
-            //
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
-                //
-                xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_aux_lsb_adv   [6] <= 1'b0;
-                xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
-                //
-                xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
-                //
-            end
-            //
-            default: begin
-                //
-                xy_valid_lsb_adv [6] <= 1'b0;
-                xy_aux_lsb_adv   [6] <= 1'b0;
-                xy_bitmap_lsb_adv[6] <= {8{1'b0}};
-                xy_index_lsb_adv [6] <= 3'dX;
-                xy_purge_lsb_adv [6] <= 1'b0;
-                //
-                xy_valid_msb_adv [6] <= 1'b0;
-                xy_bitmap_msb_adv[6] <= {8{1'b0}};
-                xy_purge_msb_adv [6] <= 1'b0;
-                //
-            end
-            //
-        endcase
-
-
-    always @(posedge clk) begin
-        //
-        xy_valid_lsb  <= xy_valid_lsb_adv [1];
-        xy_aux_lsb    <= xy_aux_lsb_adv   [1];
-        xy_bitmap_lsb <= xy_bitmap_lsb_adv[1];
-        xy_index_lsb  <= xy_index_lsb_adv [1];
-        xy_purge_lsb  <= xy_purge_lsb_adv [1];
-        //
-        xy_valid_latch_lsb  <= xy_valid_lsb;
-        xy_aux_latch_lsb    <= xy_aux_lsb;
-        xy_bitmap_latch_lsb <= xy_bitmap_lsb;
-        xy_index_latch_lsb  <= xy_index_lsb;
-        xy_purge_latch_lsb  <= xy_purge_lsb;
-        //
-        xy_valid_msb  <= xy_valid_msb_adv[1];
-        xy_bitmap_msb <= xy_bitmap_msb_adv[1];
-        xy_purge_msb  <= xy_purge_msb_adv[1];
-        //
-        if (xy_valid_msb) begin
-            xy_bitmap_latch_msb <= xy_bitmap_msb;
-            xy_purge_latch_msb  <= xy_purge_msb;
-        end else begin
-            xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]};
-        end
-        //
-        //
-        for (i=1; i<6; i=i+1) begin
-            xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
-            xy_aux_lsb_adv   [i] <= xy_aux_lsb_adv   [i+1];
-            xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
-            xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
-            xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
-            //
-            xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
-            xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
-            xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
-        end
-        //
-    end
-
-    always @(posedge clk)
-        //
-        if (xy_bitmap_latch_msb[1])   // only shift 7 times
-            //
-            for (i=0; i<8; i=i+1)
-                //            
-                if (i < 7) begin
-                    dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
-                    dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
-                end else begin
-                    dsp_x_p_latch[i] <= {47{1'bX}};
-                    dsp_y_p_latch[i] <= {47{1'bX}};
-                end
-            //
-        else if (dsp_xy_ce_p_dly1) begin
-            //
-            for (i=0; i<8; i=i+1)
-                //
-                if (xy_bitmap_lsb[i]) begin
-                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
-                    dsp_y_p_latch[i] <= dsp_y_p_split[i];
-                end else if (xy_valid_msb && xy_bitmap_msb[i]) begin
-                    dsp_x_p_latch[i] <= dsp_x_p_split[i];
-                    dsp_y_p_latch[i] <= dsp_y_p_split[i];
-                end
-            //
-            if (xy_aux_lsb) begin
-                dsp_x_p_latch[8] <= dsp_x_p_split[8];
-                dsp_y_p_latch[8] <= dsp_y_p_split[8];
-            end
-            //
-        end
-
-    reg rcmb_xy_lsb_valid = 1'b0;
-    reg rcmb_xy_msb_valid = 1'b0;
-
-    always @(posedge clk)
-        //
-        if (rst) begin
-            rcmb_xy_lsb_valid <= 1'b0;
-            rcmb_xy_msb_valid <= 1'b0;        
-        end else begin
-            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined;
-            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined;
-        end        
-
-
-    reg [BANK_ADDR_W-1:0] wide_xy_bank;
-    reg [ 7:0] wide_xy_addr;
-    reg [17:0] wide_x_dout;
-    reg [17:0] wide_y_dout;
-    reg        wide_xy_valid = 1'b0;
-
-    reg [BANK_ADDR_W-1:0] narrow_xy_bank;
-    reg [ 7:0] narrow_xy_addr;
-    reg [17:0] narrow_x_dout;
-    reg [17:0] narrow_y_dout;
-    reg        narrow_xy_valid = 1'b0;
-
-    reg [BANK_ADDR_W-1:0] rdct_xy_bank;
-    reg [ 7:0] rdct_xy_addr;
-    reg [17:0] rdct_x_dout;
-    reg [17:0] rdct_y_dout;
-    reg        rdct_xy_valid = 1'b0;
-
-    reg [ 7:0] cnt_lsb;
-    reg [ 7:0] cnt_msb;
-    
-    reg        cnt_lsb_wrapped;
-    reg        cnt_msb_wrapped;
-
-    reg [31:0] rcmb_xy_msb_carry_0;
-    reg [31:0] rcmb_xy_msb_carry_1;
-    
-    reg [31:0] rcmb_xy_msb_delay_0;
-    reg [31:0] rcmb_xy_msb_delay_1;
-    reg [31:0] rcmb_xy_msb_delay_2;
-    
-    reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0;
-    reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0;
-    reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0;
-
-    reg        rcmb_msb_flag_delay_0;
-    reg        rcmb_msb_flag_delay_1;
-    reg        rcmb_msb_flag_delay_2;
-    
-    assign rcmb_wide_xy_bank  = wide_xy_bank;
-    assign rcmb_wide_xy_addr  = wide_xy_addr;
-    assign rcmb_wide_x_dout   = wide_x_dout;
-    assign rcmb_wide_y_dout   = wide_y_dout;
-    assign rcmb_wide_xy_valid = wide_xy_valid;
-
-    assign rcmb_narrow_xy_bank  = narrow_xy_bank;
-    assign rcmb_narrow_xy_addr  = narrow_xy_addr;
-    assign rcmb_narrow_x_dout   = narrow_x_dout;
-    assign rcmb_narrow_y_dout   = narrow_y_dout;
-    assign rcmb_narrow_xy_valid = narrow_xy_valid;
-
-    assign rdct_narrow_xy_bank  = rdct_xy_bank;
-    assign rdct_narrow_xy_addr  = rdct_xy_addr;
-    assign rdct_narrow_x_dout   = rdct_x_dout;
-    assign rdct_narrow_y_dout   = rdct_y_dout;
-    assign rdct_narrow_xy_valid = rdct_xy_valid;
-    
-    reg rdy_reg = 1'b1;
-    reg rdy_adv = 1'b1;
-    
-    assign rdy = rdy_reg;
-    
-    
-    always @(posedge clk)
-        //
-        if (ena) rdy_reg <= 1'b0;
-        else     rdy_reg <= rdy_adv;
-            
-    task advance_rcmb_msb_delay;
-        input [15:0] dout_x;
-        input [15:0] dout_y;
-        input [ 7:0] cnt;
-        input        flag;
-        begin
-            //
-            rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
-            rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
-            rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
-            //
-            rcmb_msb_cnt_delay_0 <= cnt;
-            rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
-            rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
-            //
-            rcmb_msb_flag_delay_0 <= flag;
-            rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
-            rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
-            //
-        end
-    endtask
-         
-    task shift_rcmb_msb_carry;
-        input [15:0] dout_x;
-        input [15:0] dout_y;
-        begin
-            rcmb_xy_msb_carry_0 <= {dout_y, dout_x};
-            rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0;
-        end
-    endtask
-    
-    task _update_wide;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
-        begin
-            wide_xy_bank       <= bank;
-            wide_xy_addr       <= addr;
-            wide_x_dout        <= dout_x;
-            wide_y_dout        <= dout_y;
-            wide_xy_valid <= valid;
-        end
-    endtask
-    
-    task _update_narrow;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
-        begin
-            narrow_xy_bank       <= bank;
-            narrow_xy_addr       <= addr;
-            narrow_x_dout        <= dout_x;
-            narrow_y_dout        <= dout_y;
-            narrow_xy_valid <= valid;
-        end
-    endtask
-
-    task _update_rdct;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
-        begin
-            rdct_xy_bank       <= bank;
-            rdct_xy_addr       <= addr;
-            rdct_x_dout        <= dout_x;
-            rdct_y_dout        <= dout_y;
-            rdct_xy_valid <= valid;
-        end
-    endtask
-            
-    task set_wide;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_wide(bank, addr, dout_x, dout_y, 1'b1);
-        end
-    endtask
-    
-    task set_narrow;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
-        end
-    endtask
-    
-    task set_rdct;
-        input [BANK_ADDR_W-1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
-        end
-    endtask
-    
-    task clear_wide;
-        begin
-            _update_wide(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
-        end
-    endtask
-
-    task clear_narrow;
-        begin
-            _update_narrow(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
-        end
-    endtask
-
-    task clear_rdct;
-        begin
-            _update_rdct(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
-        end
-    endtask
-    
-    task _set_cnt_lsb;
-        input [7:0] cnt;
-        input       wrapped;
-        begin
-            cnt_lsb <= cnt;
-            cnt_lsb_wrapped <= wrapped;
-        end
-    endtask
-    
-    task _set_cnt_msb;
-        input [7:0] cnt;
-        input       wrapped;
-        begin
-            cnt_msb <= cnt;
-            cnt_msb_wrapped <= wrapped;
-        end
-    endtask    
-
-    task inc_cnt_lsb;
-        begin
-            if (cnt_lsb == word_index_last)
-                _set_cnt_lsb(8'd0, 1'b1);
-            else
-                _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
-        end
-    endtask
-    
-    task inc_cnt_both;
-        begin
-            inc_cnt_lsb;
-            inc_cnt_msb;
-        end
-    endtask
-    
-    task inc_cnt_msb;
-        begin
-            if (cnt_msb == word_index_last)
-                _set_cnt_msb(8'd0, 1'b1);
-            else
-                _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
-        end
-    endtask
-    
-    task clr_cnt_lsb;
-        begin
-            _set_cnt_lsb(8'd0, 1'b0);
-        end
-    endtask
-    
-    task clr_cnt_msb;
-        begin
-            _set_cnt_msb(8'd0, 1'b0);
-        end
-    endtask
-    
-   
-
-    wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; 
-    
-    always @(posedge clk)
-        //
-        if (ena) begin
-            clr_cnt_lsb();
-            clr_cnt_msb();
-        end else if (!rdy)
-            //
-            case (rcmb_mode)
-                2'd1: recombine_square();
-                2'd2: recombine_triangle();
-                2'd3: recombine_rectangle();
-            endcase
-           
-    wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout};
-    wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout};
-
-    wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout};
-    wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout};
-    
-    wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]};
-    wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]};
-
-    wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}};
-    wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}};
-           
-           
-    task recombine_square;
-        //
-        begin
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b01: inc_cnt_lsb; 
-                2'b10: inc_cnt_msb;
-                2'b11: inc_cnt_both;
-                //
-            endcase            
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
-                       else                       clear_wide;
-                //
-                2'b01:                            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
-                //
-                2'b10: if (cnt_msb < 8'd2)        clear_wide;                        
-                       else                       set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-                //
-                2'b11: if (cnt_lsb_wrapped)       set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); 
-                       else                       set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad,        rcmb_y_lsb_dout_pad);
-                //
-            endcase            
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                2'b10: if (cnt_msb < 8'd2)        shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
-                //
-                2'b11: begin                      advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
-                       if (cnt_lsb_wrapped)       shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
-                       end
-                //
-            endcase
-            //        
-        end
-        //
-    endtask
-    
-    
-    task recombine_triangle;
-        //
-        begin
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b01: inc_cnt_lsb(); 
-               //
-            endcase            
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b00:                        clear_narrow;
-                2'b01:  if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q,   cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
-                        else                  set_narrow(BANK_NARROW_EXT, 8'd1,    rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
-                2'b10:                        clear_narrow;
-                2'b11:                        clear_narrow;
-                //
-            endcase
-            //        
-        end
-        //
-    endtask
-
-
-    task recombine_rectangle;
-        //
-        begin
-            //
-            case (rcmb_xy_valid)
-                //
-                2'b01:  inc_cnt_lsb; 
-                2'b10:  inc_cnt_msb;
-                2'b11:  inc_cnt_both;
-                //
-            endcase
-//            //
-            case (rcmb_xy_valid)
-//                //
-                2'b00:  if (rcmb_msb_flag_delay_2)  set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
-                        else                        clear_rdct;
-                2'b01:                              set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
-                2'b10:  if (!cnt_msb_wrapped) begin 
-                            if (cnt_msb < 8'd2)     clear_rdct;                        
-                            else                    set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-                        end else                    set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-                            
-                2'b11:  set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); 
-//                //
-            endcase            
-//            //
-            case (rcmb_xy_valid)
-//                //
-                2'b00:  if (rcmb_msb_flag_delay_2)  advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                2'b10:  begin 
-                            if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
-                            if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                        end
-//                //
-                2'b11:  begin  advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
-                                   shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
-                        end
-//                //
-            endcase
-            //
-        end
-        //
-    endtask
-    
-    
-    always @(posedge clk)
-        //
-        if (ena) begin
-            rdy_adv <= 1'b0;
-        end else if (!rdy_reg) begin
-            //
-            case (rcmb_mode)
-                //
-                2'd1:   case (rcmb_xy_valid)
-                            //
-                            2'b00: begin
-                                //
-                                if (rcmb_msb_flag_delay_2) begin
-                                    //
-                                    rdy_adv <= ~rcmb_msb_flag_delay_1;
-                                    //
-                                end
-                                //
-                            end
-                            //
-                        endcase
-                //
-                2'd2:   case (rcmb_xy_valid)
-                            //
-                            2'b01: rdy_adv <= cnt_lsb_wrapped;                                //
-                            //
-                        endcase
-                //
-                2'd3: case (rcmb_xy_valid)
-                                            //
-                                            2'b00: begin
-                                                //
-                                                if (rcmb_msb_flag_delay_2) begin
-                                                    //
-                                                    rdy_adv <= ~rcmb_msb_flag_delay_1;
-                                                    //
-                                                end
-                                                //
-                                            end
-                                            //
-                                        endcase
-                //
-            endcase
-            //        
-        end
-
-
-    
-        // add ready for mode=3
-endmodule
diff --git a/rtl/_modexpng_storage_block.v b/rtl/_modexpng_storage_block.v
deleted file mode 100644
index d6ef1ee..0000000
--- a/rtl/_modexpng_storage_block.v
+++ /dev/null
@@ -1,219 +0,0 @@
-module modexpng_storage_block
-(
-    clk, rst,
-    
-    wr_wide_xy_ena,
-    wr_wide_xy_bank,
-    wr_wide_xy_addr,
-    wr_wide_x_din,
-    wr_wide_y_din,
-
-    wr_narrow_xy_ena,
-    wr_narrow_xy_bank,
-    wr_narrow_xy_addr,
-    wr_narrow_x_din,
-    wr_narrow_y_din,
-
-    rd_wide_xy_ena,
-    rd_wide_xy_ena_aux,
-    rd_wide_xy_bank,
-    rd_wide_xy_bank_aux,
-    rd_wide_xy_addr,
-    rd_wide_xy_addr_aux,
-    rd_wide_x_dout,
-    rd_wide_y_dout,
-    rd_wide_x_dout_aux,
-    rd_wide_y_dout_aux,
-    
-    rd_narrow_xy_ena,
-    rd_narrow_xy_bank,
-    rd_narrow_xy_addr,
-    rd_narrow_x_dout,
-    rd_narrow_y_dout
-);
-
-
-    //
-    // Headers
-    //
-    `include "modexpng_parameters.vh"
-
-
-    //
-    // Ports
-    //
-    input                     clk;
-    input                     rst;
-
-    input                     wr_wide_xy_ena;
-    input  [BANK_ADDR_W -1:0] wr_wide_xy_bank;
-    input  [  OP_ADDR_W -1:0] wr_wide_xy_addr;
-    input  [ WORD_EXT_W -1:0] wr_wide_x_din;
-    input  [ WORD_EXT_W -1:0] wr_wide_y_din;
-    
-    input                     wr_narrow_xy_ena;
-    input  [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
-    input  [  OP_ADDR_W -1:0] wr_narrow_xy_addr;
-    input  [ WORD_EXT_W -1:0] wr_narrow_x_din;
-    input  [ WORD_EXT_W -1:0] wr_narrow_y_din;
-
-    input                                      rd_wide_xy_ena;
-    input                                      rd_wide_xy_ena_aux;
-    input  [                 BANK_ADDR_W -1:0] rd_wide_xy_bank;
-    input  [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
-    input  [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr;
-    input  [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux;
-    output [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout;
-    output [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout;
-    output [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux;
-    output [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux;
-    
-    input                     rd_narrow_xy_ena;
-    input  [BANK_ADDR_W -1:0] rd_narrow_xy_bank;
-    input  [  OP_ADDR_W -1:0] rd_narrow_xy_addr;
-    output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
-    output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
-
-    
-    //
-    // Internal Registers
-    //
-    reg rd_wide_xy_reg_ena     = 1'b0;
-    reg rd_wide_xy_reg_ena_aux = 1'b0;
-    reg rd_narrow_xy_reg_ena   = 1'b0;
-
-    always @(posedge clk) begin
-        //
-        rd_wide_xy_reg_ena     <= rst ? 1'b0 : rd_wide_xy_ena;
-        rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux;
-        rd_narrow_xy_reg_ena   <= rst ? 1'b0 : rd_narrow_xy_ena;
-        //
-    end
-
-    
-    //
-    // Helper Signals
-    //
-    wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
-    wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1];
-    wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux;
-    wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
-    wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
-
-    assign wr_wide_xy_offset     = {wr_wide_xy_bank,     wr_wide_xy_addr};
-    assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
-    assign wr_narrow_xy_offset   = {wr_narrow_xy_bank,   wr_narrow_xy_addr};
-    assign rd_narrow_xy_offset   = {rd_narrow_xy_bank,   rd_narrow_xy_addr};
-    
-    //
-    // "Wide" Storage
-    //
-    genvar z;
-    generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
-        begin : gen_wide_bram
-            //
-            assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
-            //
-            modexpng_sdp_36k_wrapper wide_bram_x
-            (
-                .clk    (clk),
-                
-                .ena    (wr_wide_xy_ena),
-                .wea    (wr_wide_xy_ena),
-                .addra  (wr_wide_xy_offset),
-                .dina   (wr_wide_x_din),
-                
-                .enb    (rd_wide_xy_ena),
-                .regceb (rd_wide_xy_reg_ena),
-                .addrb  (rd_wide_xy_offset[z]),
-                .doutb  (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
-            );
-            //
-            modexpng_sdp_36k_wrapper wide_bram_y
-            (
-                .clk    (clk),
-
-                .ena    (wr_wide_xy_ena),
-                .wea    (wr_wide_xy_ena),
-                .addra  (wr_wide_xy_offset),
-                .dina   (wr_wide_y_din),
-            
-                .enb    (rd_wide_xy_ena),
-                .regceb (rd_wide_xy_reg_ena),
-                .addrb  (rd_wide_xy_offset[z]),
-                .doutb  (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W])
-            );
-            //
-        end
-    endgenerate
-
-    
-    //
-    // Auxilary Storage
-    //
-    modexpng_sdp_36k_wrapper wide_bram_x_aux
-    (
-        .clk    (clk),
-
-        .ena    (wr_wide_xy_ena),
-        .wea    (wr_wide_xy_ena),
-        .addra  (wr_wide_xy_offset),
-        .dina   (wr_wide_x_din),
-
-        .enb    (rd_wide_xy_ena_aux),
-        .regceb (rd_wide_xy_reg_ena_aux),
-        .addrb  (rd_wide_xy_offset_aux),
-        .doutb  (rd_wide_x_dout_aux)
-    );
-    //
-    modexpng_sdp_36k_wrapper wide_bram_y_aux
-    (
-        .clk    (clk),
-
-        .ena    (wr_wide_xy_ena),
-        .wea    (wr_wide_xy_ena),
-        .addra  (wr_wide_xy_offset),
-        .dina   (wr_wide_y_din),
-
-        .enb    (rd_wide_xy_ena_aux),
-        .regceb (rd_wide_xy_reg_ena_aux),
-        .addrb  (rd_wide_xy_offset_aux),
-        .doutb  (rd_wide_y_dout_aux)
-    );
-
-            
-    //
-    // "Narrow" Storage
-    //
-    modexpng_sdp_36k_wrapper narrow_bram_x
-    (
-        .clk    (clk),
-
-        .ena    (wr_narrow_xy_ena),
-        .wea    (wr_narrow_xy_ena),
-        .addra  (wr_narrow_xy_offset),
-        .dina   (wr_narrow_x_din),
-    
-        .enb    (rd_narrow_xy_ena),
-        .regceb (rd_narrow_xy_reg_ena),
-        .addrb  (rd_narrow_xy_offset),
-        .doutb  (rd_narrow_x_dout)
-    );
-
-    modexpng_sdp_36k_wrapper narrow_bram_y
-    (
-        .clk    (clk),
-
-        .ena    (wr_narrow_xy_ena),
-        .wea    (wr_narrow_xy_ena),
-        .addra  (wr_narrow_xy_offset),
-        .dina   (wr_narrow_y_din),
-    
-        .enb    (rd_narrow_xy_ena),
-        .regceb (rd_narrow_xy_reg_ena),
-        .addrb  (rd_narrow_xy_offset),
-        .doutb  (rd_narrow_y_dout)
-    );
-
-
-endmodule
diff --git a/rtl/_modexpng_storage_manager.v b/rtl/_modexpng_storage_manager.v
deleted file mode 100644
index 958596a..0000000
--- a/rtl/_modexpng_storage_manager.v
+++ /dev/null
@@ -1,199 +0,0 @@
-module modexpng_storage_manager
-(
-    clk, rst,
-    
-    wr_wide_xy_ena,
-    wr_wide_xy_bank,
-    wr_wide_xy_addr,
-    wr_wide_x_din,
-    wr_wide_y_din,
-
-    wr_narrow_xy_ena,
-    wr_narrow_xy_bank,
-    wr_narrow_xy_addr,
-    wr_narrow_x_din,
-    wr_narrow_y_din,
-    
-    ext_wide_xy_ena,
-    ext_wide_xy_bank,
-    ext_wide_xy_addr,
-    ext_wide_x_din,
-    ext_wide_y_din,
-
-    ext_narrow_xy_ena,
-    ext_narrow_xy_bank,
-    ext_narrow_xy_addr,
-    ext_narrow_x_din,
-    ext_narrow_y_din,
-    
-    rcmb_wide_xy_ena,
-    rcmb_wide_xy_bank,
-    rcmb_wide_xy_addr,
-    rcmb_wide_x_din,
-    rcmb_wide_y_din,
-
-    rcmb_narrow_xy_ena,
-    rcmb_narrow_xy_bank,
-    rcmb_narrow_xy_addr,
-    rcmb_narrow_x_din,
-    rcmb_narrow_y_din
-);
-
-
-    //
-    // Headers
-    //
-    `include "../rtl/modexpng_parameters.vh"
-
-
-    //
-    // Ports
-    //
-    input                     clk;
-    input                     rst;
-
-    output                    wr_wide_xy_ena;
-    output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
-    output [  OP_ADDR_W -1:0] wr_wide_xy_addr;
-    output [ WORD_EXT_W -1:0] wr_wide_x_din;
-    output [ WORD_EXT_W -1:0] wr_wide_y_din;
-
-    output                    wr_narrow_xy_ena;
-    output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
-    output [  OP_ADDR_W -1:0] wr_narrow_xy_addr;
-    output [ WORD_EXT_W -1:0] wr_narrow_x_din;
-    output [ WORD_EXT_W -1:0] wr_narrow_y_din;
-   
-    input                     ext_wide_xy_ena;
-    input  [BANK_ADDR_W -1:0] ext_wide_xy_bank;
-    input  [  OP_ADDR_W -1:0] ext_wide_xy_addr;
-    input  [ WORD_EXT_W -1:0] ext_wide_x_din;
-    input  [ WORD_EXT_W -1:0] ext_wide_y_din;
-
-    input                     ext_narrow_xy_ena;
-    input  [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
-    input  [  OP_ADDR_W -1:0] ext_narrow_xy_addr;
-    input  [ WORD_EXT_W -1:0] ext_narrow_x_din;
-    input  [ WORD_EXT_W -1:0] ext_narrow_y_din;
-    
-    input                     rcmb_wide_xy_ena;
-    input  [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
-    input  [  OP_ADDR_W -1:0] rcmb_wide_xy_addr;
-    input  [ WORD_EXT_W -1:0] rcmb_wide_x_din;
-    input  [ WORD_EXT_W -1:0] rcmb_wide_y_din;
-
-    input                     rcmb_narrow_xy_ena;
-    input  [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
-    input  [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
-    input  [ WORD_EXT_W -1:0] rcmb_narrow_x_din;
-    input  [ WORD_EXT_W -1:0] rcmb_narrow_y_din;
-
-    reg                    wr_wide_xy_ena_reg = 1'b0;
-    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
-    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
-    reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg;
-    reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg;
-
-    reg                    wr_narrow_xy_ena_reg = 1'b0;
-    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg;
-    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_reg;
-    reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg;
-    reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg;
-    
-    task _update_wide;
-        input                    xy_ena;
-        input [BANK_ADDR_W -1:0] xy_bank;
-        input [  OP_ADDR_W -1:0] xy_addr;
-        input [ WORD_EXT_W -1:0] x_din;
-        input [ WORD_EXT_W -1:0] y_din;
-        begin
-            wr_wide_xy_ena_reg  <= xy_ena;
-            wr_wide_xy_bank_reg <= xy_bank;
-            wr_wide_xy_addr_reg <= xy_addr;
-            wr_wide_x_din_reg   <= x_din;
-            wr_wide_y_din_reg   <= y_din;
-        end
-    endtask
-    
-    task _update_narrow;
-        input                    xy_ena;
-        input [BANK_ADDR_W -1:0] xy_bank;
-        input [  OP_ADDR_W -1:0] xy_addr;
-        input [ WORD_EXT_W -1:0] x_din;
-        input [ WORD_EXT_W -1:0] y_din;
-        begin
-            wr_narrow_xy_ena_reg  <= xy_ena;
-            wr_narrow_xy_bank_reg <= xy_bank;
-            wr_narrow_xy_addr_reg <= xy_addr;
-            wr_narrow_x_din_reg   <= x_din;
-            wr_narrow_y_din_reg   <= y_din;
-        end
-    endtask
-    
-    task enable_wide;
-        input [BANK_ADDR_W -1:0] xy_bank;
-        input [  OP_ADDR_W -1:0] xy_addr;
-        input [ WORD_EXT_W -1:0] x_din;
-        input [ WORD_EXT_W -1:0] y_din;
-        begin
-            _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din);
-        end
-    endtask
-    
-    task enable_narrow;
-        input [BANK_ADDR_W -1:0] xy_bank;
-        input [  OP_ADDR_W -1:0] xy_addr;
-        input [ WORD_EXT_W -1:0] x_din;
-        input [ WORD_EXT_W -1:0] y_din;
-        begin
-            _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din);
-        end
-    endtask
-    
-    task disable_wide;
-        begin
-            _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
-        end
-    endtask
-    
-    task disable_narrow;
-        begin
-            _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
-        end
-    endtask
-    
-    always @(posedge clk)
-        //
-        if (rst)                       disable_wide;
-        else begin
-            //
-            if      (ext_wide_xy_ena)  enable_wide(ext_wide_xy_bank,  ext_wide_xy_addr,  ext_wide_x_din,  ext_wide_y_din);
-            else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
-            else                       disable_wide;
-            //
-        end
-            
-    always @(posedge clk)
-        //
-        if (rst)                         disable_narrow;
-        else begin
-            //
-            if      (ext_narrow_xy_ena)  enable_narrow(ext_narrow_xy_bank,  ext_narrow_xy_addr,  ext_narrow_x_din,  ext_narrow_y_din);
-            else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
-            else                         disable_narrow;
-            //
-        end
-
-    assign wr_wide_xy_ena  = wr_wide_xy_ena_reg;
-    assign wr_wide_xy_bank = wr_wide_xy_bank_reg;
-    assign wr_wide_xy_addr = wr_wide_xy_addr_reg;
-    assign wr_wide_x_din   = wr_wide_x_din_reg;
-    assign wr_wide_y_din   = wr_wide_y_din_reg;
-
-    assign wr_narrow_xy_ena  = wr_narrow_xy_ena_reg;
-    assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg;
-    assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg;
-    assign wr_narrow_x_din   = wr_narrow_x_din_reg;
-    assign wr_narrow_y_din   = wr_narrow_y_din_reg;
-    
-endmodule
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
new file mode 100644
index 0000000..e834d37
--- /dev/null
+++ b/rtl/modexpng_core_top.v
@@ -0,0 +1,41 @@
+module modexpng_core_top
+(
+    clk, clk_bus,
+    rst,
+    next, valid,
+    word_index_last,
+    bus_cs,
+    bus_we,
+    bus_addr,
+    bus_data_wr,
+    bus_data_rd
+);
+
+    
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+
+    
+    //
+    // Ports
+    //
+    input                                           clk;
+    input                                           clk_bus;
+    
+    input                                           rst;
+    
+    input                                           next;
+    output                                          valid;
+
+    input                                           bus_cs;
+    input                                           bus_we;
+    input  [4 * (BANK_ADDR_W + BUS_OP_ADDR_W) -1:0] bus_addr;
+    input  [                   BUS_DATA_W     -1:0] bus_data_wr;
+    output [                   BUS_DATA_W     -1:0] bus_data_rd;
+    
+    
+  
+
+endmodule
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index df0f823..babd565 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -45,7 +45,7 @@ module modexpng_mmm_dual
     rcmb_y_dout,
     rcmb_xy_valid,
     
-    rdct_ena
+    rdct_ena, rdct_rdy
 );
 
 
@@ -110,6 +110,7 @@ module modexpng_mmm_dual
     output        rcmb_xy_valid;
     
     output        rdct_ena;
+    input         rdct_rdy;
 
     
     //
@@ -928,7 +929,11 @@ module modexpng_mmm_dual
             FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
             FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
             
-            FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+            FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? FSM_STATE_WAIT_REDUCTOR : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+            
+            FSM_STATE_WAIT_REDUCTOR:             fsm_state_next =                         rdct_rdy ? FSM_STATE_STOP : FSM_STATE_WAIT_REDUCTOR;
+            
+            FSM_STATE_STOP:                 fsm_state_next =                         FSM_STATE_IDLE                   ;
             
             default:                          fsm_state_next =                         FSM_STATE_IDLE                   ;
 
@@ -944,13 +949,28 @@ module modexpng_mmm_dual
 
     assign rdct_ena = rdct_ena_reg; 
     
-    always @(posedge clk) // add reset!!!
+    always @(posedge clk)
         //
-        case (fsm_state)
+        if (rst)                                rdct_ena_reg <= 1'b0;
+        else case (fsm_state)
            FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
            default:                             rdct_ena_reg <= 1'b0;
         endcase
     
     
+    //
+    // Ready Logic
+    //
+    reg rdy_reg = 1'b1;
+    
+    assign rdy = rdy_reg;
+    
+    always @(posedge clk)
+        //
+        if (rst) rdy_reg <= 1'b1;
+        else begin
+            if (rdy && ena) rdy_reg <= 1'b0;
+            if (!rdy && (fsm_state == FSM_STATE_STOP)) rdy_reg <= 1'b1; 
+        end
     
 endmodule
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 514fc21..6d63735 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -17,6 +17,9 @@ endfunction
 localparam WORD_W     = 16;
 localparam WORD_EXT_W = 18;
 localparam MAC_W      = 47;
+localparam BUS_DATA_W = 32;
+
+localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
 
 localparam MAX_OP_W = 4096;
 
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index de60d1f..fe4ffb9 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -1218,8 +1218,5 @@ module modexpng_recombinator_block
             endcase
             //        
         end
-
-
-    
-        // add ready for mode=3
+        
 endmodule
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v
index 0295697..433ee55 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_wrapper.v
@@ -13,7 +13,7 @@ module modexpng_sdp_36k_wrapper
     //
     // Headers
     //
-    `include "../rtl/modexpng_parameters.vh"
+    `include "modexpng_parameters.vh"
 
 
     //
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
similarity index 52%
copy from rtl/modexpng_sdp_36k_wrapper.v
copy to rtl/modexpng_sdp_36k_x16_x32_wrapper.v
index 0295697..69c5383 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
@@ -1,6 +1,6 @@
-module modexpng_sdp_36k_wrapper
+module modexpng_sdp_36k_x16_x32_wrapper
 (
-    clk,
+    clk, clk_bus,
     
     ena, wea,
     addra, dina,
@@ -13,24 +13,26 @@ module modexpng_sdp_36k_wrapper
     //
     // Headers
     //
-    `include "../rtl/modexpng_parameters.vh"
+    `include "modexpng_parameters.vh"
 
 
     //
     // Ports
     //
-    input                                 clk;
+    input                                     clk;
+    input                                     clk_bus;
     
-    input                                 ena;
-    input                                 wea;
-    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
-    input  [             WORD_EXT_W -1:0] dina;
+    input                                     ena;
+    input                                     wea;
+    input  [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
+    input  [              BUD_DATA_W    -1:0] dina;
     
-    input                                 enb;
-    input                                 regceb;
-    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
-    output [             WORD_EXT_W -1:0] doutb;
+    input                                     enb;
+    input                                     regceb;
+    input  [BANK_ADDR_W + OP_ADDR_W     -1:0] addrb;
+    output [              WORD_W        -1:0] doutb;
 
+    
     //
     // BRAM_SDP_MACRO
     //
@@ -40,10 +42,10 @@ module modexpng_sdp_36k_wrapper
         
         .BRAM_SIZE              ("36Kb"),
         
-        .WRITE_WIDTH            (WORD_EXT_W),
-        .READ_WIDTH             (WORD_EXT_W),
+        .WRITE_WIDTH            (BUD_DATA_W),
+        .READ_WIDTH             (WORD_W),
         
-        .DO_REG                 (1),
+        .DO_REG                 (0),
         .WRITE_MODE             ("READ_FIRST"),
         
         .SRVAL                  (72'h000000000000000000),
@@ -56,9 +58,9 @@ module modexpng_sdp_36k_wrapper
     (
         .RST        (1'b0),
     
-        .WRCLK      (clk),
+        .WRCLK      (clk_bus),
         .WREN       (ena),
-        .WE         ({2{wea}}),
+        .WE         ({4{wea}}),
         .WRADDR     (addra),
         .DI         (dina),
         
@@ -69,4 +71,5 @@ module modexpng_sdp_36k_wrapper
         .DO         (doutb)
     );
     
+    
 endmodule
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper.v
similarity index 53%
copy from rtl/modexpng_sdp_36k_wrapper.v
copy to rtl/modexpng_sdp_36k_x32_x16_wrapper.v
index 0295697..ff86802 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper.v
@@ -1,11 +1,11 @@
-module modexpng_sdp_36k_wrapper
+module modexpng_sdp_36k_x32_x16_wrapper
 (
-    clk,
+    clk, clk_bus,
     
     ena, wea,
     addra, dina,
     
-    enb, regceb,
+    enb,
     addrb, doutb
 );
 
@@ -13,24 +13,25 @@ module modexpng_sdp_36k_wrapper
     //
     // Headers
     //
-    `include "../rtl/modexpng_parameters.vh"
+    `include "modexpng_parameters.vh"
 
 
     //
     // Ports
     //
-    input                                 clk;
+    input                                     clk;
+    input                                     clk_bus;
     
-    input                                 ena;
-    input                                 wea;
-    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
-    input  [             WORD_EXT_W -1:0] dina;
+    input                                     ena;
+    input                                     wea;
+    input  [BANK_ADDR_W + OP_ADDR_W     -1:0] addra;
+    input  [              WORD_W        -1:0] dina;
     
-    input                                 enb;
-    input                                 regceb;
-    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
-    output [             WORD_EXT_W -1:0] doutb;
+    input                                     enb;
+    input  [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addrb;
+    output [              BUS_DATA_W    -1:0] doutb;
 
+    
     //
     // BRAM_SDP_MACRO
     //
@@ -40,10 +41,10 @@ module modexpng_sdp_36k_wrapper
         
         .BRAM_SIZE              ("36Kb"),
         
-        .WRITE_WIDTH            (WORD_EXT_W),
-        .READ_WIDTH             (WORD_EXT_W),
+        .WRITE_WIDTH            (WORD_W),
+        .READ_WIDTH             (BUS_DATA_W),
         
-        .DO_REG                 (1),
+        .DO_REG                 (0),
         .WRITE_MODE             ("READ_FIRST"),
         
         .SRVAL                  (72'h000000000000000000),
@@ -62,9 +63,9 @@ module modexpng_sdp_36k_wrapper
         .WRADDR     (addra),
         .DI         (dina),
         
-        .RDCLK      (clk),
+        .RDCLK      (clk_bus),
         .RDEN       (enb),
-        .REGCE      (regceb),
+        .REGCE      (1'b0),
         .RDADDR     (addrb),
         .DO         (doutb)
     );
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index be04c7c..d5b9b24 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -1,6 +1,6 @@
 module modexpng_storage_block
 (
-    clk, rst,
+    clk, clk_bus, rst,
     
     wr_wide_xy_ena,
     wr_wide_xy_bank,
@@ -29,7 +29,26 @@ module modexpng_storage_block
     rd_narrow_xy_bank,
     rd_narrow_xy_addr,
     rd_narrow_x_dout,
-    rd_narrow_y_dout
+    rd_narrow_y_dout,
+    
+    bus_cs,
+    bus_we,
+    bus_addr,
+    bus_data_wr,
+    bus_data_rd,
+    
+    in_1_en,
+    in_1_addr,
+    in_1_dout,
+    
+    in_2_en,
+    in_2_addr,
+    in_2_dout,
+    
+    out_en,
+    out_we,
+    out_addr,
+    out_din
 );
 
     //
@@ -37,47 +56,71 @@ module modexpng_storage_block
     //
     `include "modexpng_parameters.vh"
 
+
     //
     // Ports
     //
-    input                                      clk;
-    input                                      rst;
-
-    input                                      wr_wide_xy_ena;
-    input  [                 BANK_ADDR_W -1:0] wr_wide_xy_bank;
-    input  [                 OP_ADDR_W   -1:0] wr_wide_xy_addr;
-    input  [                 WORD_EXT_W  -1:0] wr_wide_x_din;
-    input  [                 WORD_EXT_W  -1:0] wr_wide_y_din;
-    
-    input                                      wr_narrow_xy_ena;
-    input  [                 BANK_ADDR_W -1:0] wr_narrow_xy_bank;
-    input  [                 OP_ADDR_W   -1:0] wr_narrow_xy_addr;
-    input  [                 WORD_EXT_W  -1:0] wr_narrow_x_din;
-    input  [                 WORD_EXT_W  -1:0] wr_narrow_y_din;
-
-    input                                      rd_wide_xy_ena;
-    input                                      rd_wide_xy_ena_aux;
-    input  [                 BANK_ADDR_W -1:0] rd_wide_xy_bank;
-    input  [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
-    input  [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr;
-    input  [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux;
-    output [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout;
-    output [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout;
-    output [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux;
-    output [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux;
-    
-    input                                      rd_narrow_xy_ena;
-    input  [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank;
-    input  [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr;
-    output [                 WORD_EXT_W  -1:0] rd_narrow_x_dout;
-    output [                 WORD_EXT_W  -1:0] rd_narrow_y_dout;
+    input                                         clk;
+    input                                         clk_bus;
+    input                                         rst;
 
+    input                                         wr_wide_xy_ena;
+    input  [                  BANK_ADDR_W   -1:0] wr_wide_xy_bank;
+    input  [                  OP_ADDR_W     -1:0] wr_wide_xy_addr;
+    input  [                  WORD_EXT_W    -1:0] wr_wide_x_din;
+    input  [                  WORD_EXT_W    -1:0] wr_wide_y_din;
+    
+    input                                         wr_narrow_xy_ena;
+    input  [                  BANK_ADDR_W   -1:0] wr_narrow_xy_bank;
+    input  [                  OP_ADDR_W     -1:0] wr_narrow_xy_addr;
+    input  [                  WORD_EXT_W    -1:0] wr_narrow_x_din;
+    input  [                  WORD_EXT_W    -1:0] wr_narrow_y_din;
+
+    input                                         rd_wide_xy_ena;
+    input                                         rd_wide_xy_ena_aux;
+    input  [                  BANK_ADDR_W   -1:0] rd_wide_xy_bank;
+    input  [                  BANK_ADDR_W   -1:0] rd_wide_xy_bank_aux;
+    input  [ NUM_MULTS_HALF * OP_ADDR_W     -1:0] rd_wide_xy_addr;
+    input  [                  OP_ADDR_W     -1:0] rd_wide_xy_addr_aux;
+    output [ NUM_MULTS_HALF * WORD_EXT_W    -1:0] rd_wide_x_dout;
+    output [ NUM_MULTS_HALF * WORD_EXT_W    -1:0] rd_wide_y_dout;
+    output [                  WORD_EXT_W    -1:0] rd_wide_x_dout_aux;
+    output [                  WORD_EXT_W    -1:0] rd_wide_y_dout_aux;
+    
+    input                                         rd_narrow_xy_ena;
+    input  [                  BANK_ADDR_W   -1:0] rd_narrow_xy_bank;
+    input  [                  OP_ADDR_W     -1:0] rd_narrow_xy_addr;
+    output [                  WORD_EXT_W    -1:0] rd_narrow_x_dout;
+    output [                  WORD_EXT_W    -1:0] rd_narrow_y_dout;
+
+    input                                         bus_cs;
+    input                                         bus_we;
+    input  [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+    input  [                  BUS_DATA_W    -1:0] bus_data_wr;
+    output [                  BUS_DATA_W    -1:0] bus_data_rd;
+    
+    input                                         in_1_en;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_1_addr;
+    output [                  WORD_W        -1:0] in_1_dout;
+    
+    input                                         in_2_en;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_2_addr;
+    output [                  WORD_W        -1:0] in_2_dout;
+    
+    input                                         out_en;
+    input                                         out_we;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] out_addr;
+    input  [                  WORD_W        -1:0] out_din;
+
+    
     //
     // Internal Registers
     //
     reg rd_wide_xy_reg_ena     = 1'b0;
     reg rd_wide_xy_reg_ena_aux = 1'b0;
     reg rd_narrow_xy_reg_ena   = 1'b0;
+    reg in_1_reg_en            = 1'b0;
+    reg in_2_reg_en            = 1'b0;
 
     always @(posedge clk)
         //
@@ -85,10 +128,14 @@ module modexpng_storage_block
             rd_wide_xy_reg_ena     <= 1'b0;
             rd_wide_xy_reg_ena_aux <= 1'b0;
             rd_narrow_xy_reg_ena   <= 1'b0;
+            in_1_reg_en            <= 1'b0;
+            in_2_reg_en            <= 1'b0;
         end else begin
             rd_wide_xy_reg_ena     <= rd_wide_xy_ena;
             rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
             rd_narrow_xy_reg_ena   <= rd_narrow_xy_ena;
+            in_1_reg_en            <= in_1_en;
+            in_2_reg_en            <= in_2_en;
         end
     
     //
@@ -213,4 +260,87 @@ module modexpng_storage_block
         .doutb  (rd_narrow_y_dout)
     );
 
+    //
+    // INPUT, OUTPUT Storage Buffers
+    //
+    wire [                          2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
+    wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
+    reg  [                          2 -1:0] bus_addr_msb_dly;
+    wire [              BUS_DATA_W    -1:0] bus_data_rd_input_1;
+    wire [              BUS_DATA_W    -1:0] bus_data_rd_output;
+
+    wire                                    bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
+    wire                                    bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
+
+    /* INPUT_1 */
+    modexpng_sdp_36k_x16_x32_wrapper bram_input_1
+    (
+        .clk        (clk),                  // core clock
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (bus_cs),               // bus side read-write
+        .wea        (bus_data_wr_input_1),  //
+        .addra      (bus_addr_lsb),         //
+        .dina       (bus_data_wr),          //
+        .douta      (bus_data_rd_input_1),  //
+    
+        .enb        (in_1_en),              // core side read-only
+        .regceb     (in_1_reg_en),          //
+        .addrb      (in_1_addr),            //
+        .doutb      (in_1_dout)             //
+    );
+    
+    
+    /* INPUT_2 */
+    modexpng_sdp_36k_x16_x32_wrapper bram_input_2
+    (
+        .clk        (clk),                  // core clock
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (bus_cs),               // bus side write-only
+        .wea        (bus_data_wr_input_2),  //
+        .addra      (bus_addr_lsb),         //
+        .dina       (bus_data_wr),          //
+    
+        .enb        (in_2_en),              // core side read-only
+        .regceb     (in_2_reg_en),          //
+        .addrb      (in_2_addr),            //
+        .doutb      (in_2_dout)             //
+    );
+
+
+    /* OUTPUT */
+    modexpng_sdp_36k_x32_x16_wrapper bram_output
+    (
+        .clk        (clk),                  // core clock 
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (out_en),               // core side write-only
+        .wea        (out_we),               //
+        .addra      (out_addr),             //
+        .dina       (out_din),              //
+    
+        .enb        (bus_cs),               // bus side read-only
+        .addrb      (bus_addr_lsb),         //
+        .doutb      (bus_data_rd_output)    //
+    );
+
+    reg [31: 0] bus_data_rd_mux;
+    assign bus_data_rd = bus_data_rd_mux;
+
+    always @(posedge clk_bus)
+        bus_addr_msb_dly <= bus_addr_msb;
+
+    always @(*)
+        //
+        case (bus_addr_msb_dly)
+            //
+            2'd0: bus_data_rd_mux = bus_data_rd_input_1;
+            2'd1: bus_data_rd_mux = 32'hDEADC0DE;
+            2'd2: bus_data_rd_mux = bus_data_rd_output;
+            2'd3: bus_data_rd_mux = 32'hDEADC0DE;
+            //
+        endcase
+
 endmodule
+
diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
new file mode 100644
index 0000000..37a5cbc
--- /dev/null
+++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
@@ -0,0 +1,88 @@
+module modexpng_sdp_36k_x16_x32_wrapper
+(
+    clk, clk_bus,
+    
+    ena, wea,
+    addra, dina, douta,
+    
+    enb, regceb,
+    addrb, doutb
+);
+
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+
+
+    //
+    // Ports
+    //
+    input                                     clk;
+    input                                     clk_bus;
+    
+    input                                     ena;
+    input                                     wea;
+    input  [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
+    input  [              BUD_DATA_W    -1:0] dina;
+    output [              BUD_DATA_W    -1:0] douta;
+    
+    input                                     enb;
+    input                                     regceb;
+    input  [BANK_ADDR_W + OP_ADDR_W     -1:0] addrb;
+    output [              WORD_W        -1:0] doutb;
+
+    
+    //
+    // BRAM_TDP_MACRO
+    //
+    BRAM_TDP_MACRO #
+    (
+        .DEVICE                 ("7SERIES"),
+        .BRAM_SIZE              ("36Kb"),
+
+        .WRITE_WIDTH_A          (BUD_DATA_W),
+        .READ_WIDTH_A           (BUD_DATA_W),
+
+        .WRITE_WIDTH_B          (WORD_W),
+        .READ_WIDTH_B           (WORD_W),
+
+        .DOA_REG                (0),
+        .DOB_REG                (1),
+
+        .WRITE_MODE_A           ("READ_FIRST"),
+        .WRITE_MODE_B           ("READ_FIRST"),
+
+        .SRVAL_A                (36'h000000000),
+        .SRVAL_B                (36'h000000000),
+
+        .INIT_A                 (36'h000000000),
+        .INIT_B                 (36'h000000000),
+        
+        .INIT_FILE              ("NONE"),
+        .SIM_COLLISION_CHECK    ("NONE"),
+   )
+   BRAM_TDP_MACRO_inst
+   (
+      .RSTA     (1'b0),
+      .RSTB     (1'b0),
+
+      .CLKA     (clk_bus),
+      .ENA      (ena),
+      .REGCEA   (1'b0),
+      .WEA      ({4{wea}}),
+      .ADDRA    (),
+      .DIA      (),
+      .DOA      (),
+
+      .CLKB     (clk),
+      .ENB      (enb),
+      .REGCEB   (regceb),
+      .WEB      ({2{1'b0}}),
+      .ADDRB    (addrb),
+      .DIB      ({WORD_W{1'b0}}),
+      .DOB      (doutb)
+   );
+    
+endmodule



More information about the Commits mailing list