[Cryptech-Commits] [core/math/modexpng] 31/92: Reworked storage architecture (moved I/O memory to a separate module, since there's only one instance of input/output values, while storage manager has dual storage space for P and Q multipliers).

git at cryptech.is git at cryptech.is
Sat Mar 14 18:19:10 UTC 2020


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.

commit affada8d5da7426d22035360c3674ab3b3311ab5
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Oct 3 16:40:25 2019 +0300

    Reworked storage architecture (moved I/O memory to a separate module, since there's
    only one instance of input/output values, while storage manager has dual storage space
    for P and Q multipliers).
    
    Started working on microcoded layer, added input operation and modular multiplication.
---
 rtl/modexpng_core_top.v                            | 1009 +++++++++++++++++++-
 rtl/modexpng_io_block.v                            |  158 +++
 rtl/modexpng_io_manager.v                          |  527 ++++++++++
 rtl/modexpng_microcode.vh                          |   54 ++
 rtl/modexpng_parameters.vh                         |   37 +-
 rtl/modexpng_recombinator_block.v                  |    6 +-
 rtl/modexpng_reductor.v                            |  139 ++-
 rtl/modexpng_sdp_36k_x16_x32_wrapper.v             |    6 +-
 rtl/modexpng_sdp_36k_x18_wrapper.v                 |   67 ++
 ...per.v => modexpng_sdp_36k_x18_wrapper_xilinx.v} |    2 +-
 rtl/modexpng_storage_block.v                       |  139 +--
 rtl/modexpng_storage_manager.v                     |   56 +-
 rtl/modexpng_tdp_36k_x16_x32_wrapper.v             |   18 +-
 rtl/modexpng_uop_rom.v                             |   37 +
 14 files changed, 2035 insertions(+), 220 deletions(-)

diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index e834d37..eb6826c 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -3,7 +3,9 @@ module modexpng_core_top
     clk, clk_bus,
     rst,
     next, valid,
-    word_index_last,
+    crt_mode,
+    word_index_last_n,
+    word_index_last_pq,
     bus_cs,
     bus_we,
     bus_addr,
@@ -16,26 +18,1011 @@ module modexpng_core_top
     // Headers
     //
     `include "modexpng_parameters.vh"
+    `include "modexpng_microcode.vh"
 
     
     //
     // Ports
     //
-    input                                           clk;
-    input                                           clk_bus;
+    input                                         clk;
+    input                                         clk_bus;
     
-    input                                           rst;
+    input                                         rst;
     
-    input                                           next;
-    output                                          valid;
+    input                                         next;
+    output                                        valid;
+    
+    input                                         crt_mode;
+
+    input  [                  OP_ADDR_W     -1:0] word_index_last_n;
+    input  [                  OP_ADDR_W     -1:0] word_index_last_pq;
+
+    input                                         bus_cs;
+    input                                         bus_we;
+    input  [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+    input  [                  BUS_DATA_W    -1:0] bus_data_wr;
+    output [                  BUS_DATA_W    -1:0] bus_data_rd;
+    
+    
+    //
+    // UOP_FSM
+    //
+    localparam [1:0] UOP_FSM_STATE_IDLE     = 2'b00;
+    localparam [1:0] UOP_FSM_STATE_FETCH    = 2'b01;
+    localparam [1:0] UOP_FSM_STATE_DECODE   = 2'b10;
+    localparam [1:0] UOP_FSM_STATE_BUSY     = 2'b11;
+
+    reg [1:0] uop_fsm_state = UOP_FSM_STATE_IDLE;
+    reg [1:0] uop_fsm_state_next;
+
+
+    //
+    // UOP ROM
+    //
+    reg  [UOP_ADDR_W   -1:0] uop_addr;
+    wire [UOP_W        -1:0] uop_data;
+    wire [UOP_OPCODE_W -1:0] uop_data_opcode         = uop_data[UOP_W                                                                             -1-: UOP_OPCODE_W];
+    wire [UOP_CRT_W    -1:0] uop_data_crt            = uop_data[UOP_W -UOP_OPCODE_W                                                               -1-: UOP_CRT_W   ];
+    wire [UOP_NPQ_W    -1:0] uop_data_npq            = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W                                                    -1-: UOP_NPQ_W   ];
+    wire [UOP_AUX_W    -1:0] uop_data_aux            = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W                                         -1-: UOP_AUX_W   ];
+    wire [UOP_LADDER_W -1:0] uop_data_ladder         = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W                              -1-: UOP_LADDER_W];
+    wire [BANK_ADDR_W  -1:0] uop_data_sel_wide_in    = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W                -1-: BANK_ADDR_W ];
+    wire [BANK_ADDR_W  -1:0] uop_data_sel_narrow_in  = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -1*BANK_ADDR_W -1-: BANK_ADDR_W ];
+    wire [BANK_ADDR_W  -1:0] uop_data_sel_wide_out   = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
+    wire [BANK_ADDR_W  -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
+    
+    wire uop_opcode_is_stop =  uop_data_opcode == UOP_OPCODE_STOP;
+    wire uop_opcode_is_io   = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE     ) ||
+                              (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW   ) ||
+                              (uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW) ;
+    wire uop_opcode_is_mmm  =  uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY;
+
+    wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
+    wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
+    
+    modexpng_uop_rom uop_rom
+    (
+        .clk    (clk),
+        .addr   (uop_addr),
+        .data   (uop_data)
+    );
+
+
+    //
+    // UOP ROM Address Logic
+    //
+    
+    always @(posedge clk)
+        //
+        if (uop_fsm_state_next == UOP_FSM_STATE_FETCH)
+            uop_addr <= (uop_fsm_state == UOP_FSM_STATE_IDLE) ? uop_addr_offset : uop_addr_next;
+            
+
+    //
+    // Storage Interfaces (X, Y)
+    //
+    wire                    wr_wide_xy_ena_x;
+    wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] wr_wide_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] wr_wide_x_din_x;
+    wire [ WORD_EXT_W -1:0] wr_wide_y_din_x;
+
+    wire                    wr_narrow_xy_ena_x;
+    wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] wr_narrow_x_din_x;
+    wire [ WORD_EXT_W -1:0] wr_narrow_y_din_x;
+
+    wire                                     rd_wide_xy_ena_x;
+    wire                                     rd_wide_xy_ena_aux_x;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x;
+    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_x;
+    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_x;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout_x;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout_x;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux_x;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux_x;
+    
+    wire                                     rd_narrow_xy_ena_x;
+    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
+    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_x;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_dout_x;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_dout_x;
+    
+    wire                    ext_wide_xy_ena_x;
+    wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] ext_wide_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] ext_wide_x_din_x;
+    wire [ WORD_EXT_W -1:0] ext_wide_y_din_x;
+
+    wire                    ext_narrow_xy_ena_x;
+    wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
+    wire [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
+
+    wire                    wr_wide_xy_ena_y;
+    wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] wr_wide_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] wr_wide_x_din_y;
+    wire [ WORD_EXT_W -1:0] wr_wide_y_din_y;
+
+    wire                    wr_narrow_xy_ena_y;
+    wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] wr_narrow_x_din_y;
+    wire [ WORD_EXT_W -1:0] wr_narrow_y_din_y;
+
+    wire                                     rd_wide_xy_ena_y;
+    wire                                     rd_wide_xy_ena_aux_y;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y;
+    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_y;
+    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_y;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout_y;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout_y;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux_y;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux_y;
+    
+    wire                                     rd_narrow_xy_ena_y;
+    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
+    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_y;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_dout_y;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_dout_y;
+    
+    wire                    ext_wide_xy_ena_y;
+    wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] ext_wide_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] ext_wide_x_din_y;
+    wire [ WORD_EXT_W -1:0] ext_wide_y_din_y;
+
+    wire                    ext_narrow_xy_ena_y;
+    wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
+    wire [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+
+
+    //
+    // Recombinator Interfaces (X, Y)
+    //
+    wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] rcmb_wide_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_x;
+    wire                    rcmb_wide_xy_valid_x;
+
+    wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_x;
+    wire                    rcmb_narrow_xy_valid_x;
+
+    wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] rcmb_final_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_x;
+    wire                    rcmb_final_xy_valid_x;
 
-    input                                           bus_cs;
-    input                                           bus_we;
-    input  [4 * (BANK_ADDR_W + BUS_OP_ADDR_W) -1:0] bus_addr;
-    input  [                   BUS_DATA_W     -1:0] bus_data_wr;
-    output [                   BUS_DATA_W     -1:0] bus_data_rd;
+    wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] rcmb_wide_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_y;
+    wire                    rcmb_wide_xy_valid_y;
+
+    wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_y;
+    wire                    rcmb_narrow_xy_valid_y;
+
+    wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] rcmb_final_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_y;
+    wire                    rcmb_final_xy_valid_y;
+    
+    
+    //
+    // Reductor Interfaces (X, Y)
+    //
+    wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] rdct_wide_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_x;
+    wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_x;
+    wire                    rdct_wide_xy_valid_x;
+
+    wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x;
+    wire [  OP_ADDR_W -1:0] rdct_narrow_xy_addr_x;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_x;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_x;
+    wire                    rdct_narrow_xy_valid_x;
+
+    wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] rdct_wide_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_y;
+    wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_y;
+    wire                    rdct_wide_xy_valid_y;
+
+    wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y;
+    wire [  OP_ADDR_W -1:0] rdct_narrow_xy_addr_y;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_y;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_y;
+    wire                    rdct_narrow_xy_valid_y;
+
+
+    //
+    // Storage Blocks (X, Y)
+    //
+    modexpng_storage_block storage_block_x
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+
+        .wr_wide_xy_ena         (wr_wide_xy_ena_x),
+        .wr_wide_xy_bank        (wr_wide_xy_bank_x),
+        .wr_wide_xy_addr        (wr_wide_xy_addr_x),
+        .wr_wide_x_din          (wr_wide_x_din_x),
+        .wr_wide_y_din          (wr_wide_y_din_x),
+
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena_x),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank_x),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr_x),
+        .wr_narrow_x_din        (wr_narrow_x_din_x),
+        .wr_narrow_y_din        (wr_narrow_y_din_x),
+
+        .rd_wide_xy_ena         (rd_wide_xy_ena_x),
+        .rd_wide_xy_ena_aux     (rd_wide_xy_ena_aux_x),
+        .rd_wide_xy_bank        (rd_wide_xy_bank_x),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_x),
+        .rd_wide_xy_addr        (rd_wide_xy_addr_x),
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_x),
+        .rd_wide_x_dout         (rd_wide_x_dout_x),
+        .rd_wide_y_dout         (rd_wide_y_dout_x),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_x),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_x),
+
+        .rd_narrow_xy_ena       (rd_narrow_xy_ena_x),
+        .rd_narrow_xy_bank      (rd_narrow_xy_bank_x),
+        .rd_narrow_xy_addr      (rd_narrow_xy_addr_x),
+        .rd_narrow_x_dout       (rd_narrow_x_dout_x),
+        .rd_narrow_y_dout       (rd_narrow_y_dout_x)
+    );
+
+    modexpng_storage_block storage_block_y
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+
+        .wr_wide_xy_ena         (wr_wide_xy_ena_y),
+        .wr_wide_xy_bank        (wr_wide_xy_bank_y),
+        .wr_wide_xy_addr        (wr_wide_xy_addr_y),
+        .wr_wide_x_din          (wr_wide_x_din_y),
+        .wr_wide_y_din          (wr_wide_y_din_y),
+
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena_y),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank_y),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr_y),
+        .wr_narrow_x_din        (wr_narrow_x_din_y),
+        .wr_narrow_y_din        (wr_narrow_y_din_y),
+
+        .rd_wide_xy_ena         (rd_wide_xy_ena_y),
+        .rd_wide_xy_ena_aux     (rd_wide_xy_ena_aux_y),
+        .rd_wide_xy_bank        (rd_wide_xy_bank_y),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_y),
+        .rd_wide_xy_addr        (rd_wide_xy_addr_y),
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_y),
+        .rd_wide_x_dout         (rd_wide_x_dout_y),
+        .rd_wide_y_dout         (rd_wide_y_dout_y),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_y),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_y),
+
+        .rd_narrow_xy_ena       (rd_narrow_xy_ena_y),
+        .rd_narrow_xy_bank      (rd_narrow_xy_bank_y),
+        .rd_narrow_xy_addr      (rd_narrow_xy_addr_y),
+        .rd_narrow_x_dout       (rd_narrow_x_dout_y),
+        .rd_narrow_y_dout       (rd_narrow_y_dout_y)
+    );
+
+    
+    //
+    // Storage Managers (X, Y)
+    //
+    modexpng_storage_manager storage_manager_x
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .wr_wide_xy_ena         (wr_wide_xy_ena_x),
+        .wr_wide_xy_bank        (wr_wide_xy_bank_x),
+        .wr_wide_xy_addr        (wr_wide_xy_addr_x),
+        .wr_wide_x_din          (wr_wide_x_din_x),
+        .wr_wide_y_din          (wr_wide_y_din_x),
+    
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena_x),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank_x),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr_x),
+        .wr_narrow_x_din        (wr_narrow_x_din_x),
+        .wr_narrow_y_din        (wr_narrow_y_din_x),
+        
+        .ext_wide_xy_ena        (ext_wide_xy_ena_x),
+        .ext_wide_xy_bank       (ext_wide_xy_bank_x),
+        .ext_wide_xy_addr       (ext_wide_xy_addr_x),
+        .ext_wide_x_din         (ext_wide_x_din_x),
+        .ext_wide_y_din         (ext_wide_y_din_x),
+    
+        .ext_narrow_xy_ena      (ext_narrow_xy_ena_x),
+        .ext_narrow_xy_bank     (ext_narrow_xy_bank_x),
+        .ext_narrow_xy_addr     (ext_narrow_xy_addr_x),
+        .ext_narrow_x_din       (ext_narrow_x_din_x),
+        .ext_narrow_y_din       (ext_narrow_y_din_x),
+        
+        .rcmb_wide_xy_bank      (rcmb_wide_xy_bank_x),
+        .rcmb_wide_xy_addr      (rcmb_wide_xy_addr_x),
+        .rcmb_wide_x_din        (rcmb_wide_x_dout_x),
+        .rcmb_wide_y_din        (rcmb_wide_y_dout_x),
+        .rcmb_wide_xy_ena       (rcmb_wide_xy_valid_x),
+
+        .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank_x),
+        .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr_x),
+        .rcmb_narrow_x_din      (rcmb_narrow_x_dout_x),
+        .rcmb_narrow_y_din      (rcmb_narrow_y_dout_x),
+        .rcmb_narrow_xy_ena     (rcmb_narrow_xy_valid_x),
+        
+        .rdct_wide_xy_bank      (rdct_wide_xy_bank_x),
+        .rdct_wide_xy_addr      (rdct_wide_xy_addr_x),
+        .rdct_wide_x_din        (rdct_wide_x_dout_x),   // TODO: maybe just rename to {x|y}_x, since that's an
+        .rdct_wide_y_din        (rdct_wide_y_dout_x),   // internal signal??
+        .rdct_wide_xy_valid     (rdct_wide_xy_valid_x),
+
+        .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_x),
+        .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_x),
+        .rdct_narrow_x_din      (rdct_narrow_x_dout_x),
+        .rdct_narrow_y_din      (rdct_narrow_y_dout_x),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_x)
+    );
+
+    modexpng_storage_manager storage_manager_y
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .wr_wide_xy_ena         (wr_wide_xy_ena_y),
+        .wr_wide_xy_bank        (wr_wide_xy_bank_y),
+        .wr_wide_xy_addr        (wr_wide_xy_addr_y),
+        .wr_wide_x_din          (wr_wide_x_din_y),
+        .wr_wide_y_din          (wr_wide_y_din_y),
+    
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena_y),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank_y),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr_y),
+        .wr_narrow_x_din        (wr_narrow_x_din_y),
+        .wr_narrow_y_din        (wr_narrow_y_din_y),
+        
+        .ext_wide_xy_ena        (ext_wide_xy_ena_y),
+        .ext_wide_xy_bank       (ext_wide_xy_bank_y),
+        .ext_wide_xy_addr       (ext_wide_xy_addr_y),
+        .ext_wide_x_din         (ext_wide_x_din_y),
+        .ext_wide_y_din         (ext_wide_y_din_y),
+    
+        .ext_narrow_xy_ena      (ext_narrow_xy_ena_y),
+        .ext_narrow_xy_bank     (ext_narrow_xy_bank_y),
+        .ext_narrow_xy_addr     (ext_narrow_xy_addr_y),
+        .ext_narrow_x_din       (ext_narrow_x_din_y),
+        .ext_narrow_y_din       (ext_narrow_y_din_y),
+        
+        .rcmb_wide_xy_bank      (rcmb_wide_xy_bank_y),
+        .rcmb_wide_xy_addr      (rcmb_wide_xy_addr_y),
+        .rcmb_wide_x_din        (rcmb_wide_x_dout_y),
+        .rcmb_wide_y_din        (rcmb_wide_y_dout_y),
+        .rcmb_wide_xy_ena       (rcmb_wide_xy_valid_y),
+
+        .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank_y),
+        .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr_y),
+        .rcmb_narrow_x_din      (rcmb_narrow_x_dout_y),
+        .rcmb_narrow_y_din      (rcmb_narrow_y_dout_y),
+        .rcmb_narrow_xy_ena     (rcmb_narrow_xy_valid_y),
+        
+        .rdct_wide_xy_bank      (rdct_wide_xy_bank_y),
+        .rdct_wide_xy_addr      (rdct_wide_xy_addr_y),
+        .rdct_wide_x_din        (rdct_wide_x_dout_y),
+        .rdct_wide_y_din        (rdct_wide_y_dout_y),
+        .rdct_wide_xy_valid     (rdct_wide_xy_valid_y),
+
+        .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_y),
+        .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_y),
+        .rdct_narrow_x_din      (rdct_narrow_x_dout_y),
+        .rdct_narrow_y_din      (rdct_narrow_y_dout_y),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_y)
+
+    );
+
+
+    //
+    // IO Block
+    //
+    wire                                io_in_1_en;
+    wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
+    wire [              WORD_W    -1:0] io_in_1_dout;
     
+    wire                                io_in_2_en;
+    wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
+    wire [              WORD_W    -1:0] io_in_2_dout;
     
+    wire                                io_out_en;
+    wire                                io_out_we;
+    wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
+    wire [              WORD_W    -1:0] io_out_din;
+    
+    // TODO: Separate reset for clock domains (core/bus)???
+    
+    modexpng_io_block io_block
+    (
+        .clk            (clk),
+        .clk_bus        (clk_bus),
+        
+        .rst            (rst),
+        
+        .bus_cs         (bus_cs),
+        .bus_we         (bus_we),
+        .bus_addr       (bus_addr),
+        .bus_data_wr    (bus_data_wr),
+        .bus_data_rd    (bus_data_rd),
+    
+        .in_1_en        (io_in_1_en),
+        .in_1_addr      (io_in_1_addr),
+        .in_1_dout      (io_in_1_dout),
+    
+        .in_2_en        (io_in_2_en),
+        .in_2_addr      (io_in_2_addr),
+        .in_2_dout      (io_in_2_dout),
+    
+        .out_en         (io_out_en),
+        .out_we         (io_out_we),
+        .out_addr       (io_out_addr),
+        .out_din        (io_out_din)
+    );
+
+
+    //
+    // IO Manager
+    //
+    reg                      io_mgr_ena = 1'b0;
+    wire                     io_mgr_rdy;
+    reg  [UOP_CRT_W    -1:0] io_mgr_sel_crt;
+    reg  [UOP_AUX_W    -1:0] io_mgr_sel_aux;
+    reg  [BANK_ADDR_W  -1:0] io_mgr_sel_in;
+    reg  [BANK_ADDR_W  -1:0] io_mgr_sel_out;
+    reg  [OP_ADDR_W    -1:0] io_mgr_word_index_last;
+    reg  [UOP_OPCODE_W -1:0] io_mgr_opcode;
+    
+    modexpng_io_manager io_manager
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+    
+        .ena                    (io_mgr_ena),
+        .rdy                    (io_mgr_rdy),
+    
+        .sel_crt                (io_mgr_sel_crt),
+        .sel_aux                (io_mgr_sel_aux),
+        .sel_in                 (io_mgr_sel_in),
+        .sel_out                (io_mgr_sel_out),
+        
+        .opcode                 (io_mgr_opcode),
+        
+        .word_index_last        (io_mgr_word_index_last),
+    
+        .ext_wide_xy_ena_x      (ext_wide_xy_ena_x),
+        .ext_wide_xy_bank_x     (ext_wide_xy_bank_x),
+        .ext_wide_xy_addr_x     (ext_wide_xy_addr_x),
+        .ext_wide_x_din_x       (ext_wide_x_din_x),
+        .ext_wide_y_din_x       (ext_wide_y_din_x),
+
+        .ext_narrow_xy_ena_x    (ext_narrow_xy_ena_x),
+        .ext_narrow_xy_bank_x   (ext_narrow_xy_bank_x),
+        .ext_narrow_xy_addr_x   (ext_narrow_xy_addr_x),
+        .ext_narrow_x_din_x     (ext_narrow_x_din_x),
+        .ext_narrow_y_din_x     (ext_narrow_y_din_x),
+
+        .ext_wide_xy_ena_y      (ext_wide_xy_ena_y),
+        .ext_wide_xy_bank_y     (ext_wide_xy_bank_y),
+        .ext_wide_xy_addr_y     (ext_wide_xy_addr_y),
+        .ext_wide_x_din_y       (ext_wide_x_din_y),
+        .ext_wide_y_din_y       (ext_wide_y_din_y),
+
+        .ext_narrow_xy_ena_y    (ext_narrow_xy_ena_y),
+        .ext_narrow_xy_bank_y   (ext_narrow_xy_bank_y),
+        .ext_narrow_xy_addr_y   (ext_narrow_xy_addr_y),
+        .ext_narrow_x_din_y     (ext_narrow_x_din_y),
+        .ext_narrow_y_din_y     (ext_narrow_y_din_y),
+    
+        .io_in_1_en             (io_in_1_en),
+        .io_in_1_addr           (io_in_1_addr),
+        .io_in_1_dout           (io_in_1_dout),
+    
+        .io_in_2_en             (io_in_2_en),
+        .io_in_2_addr           (io_in_2_addr),
+        .io_in_2_dout           (io_in_2_dout),
+    
+        .io_out_en              (io_out_en),
+        .io_out_we              (io_out_we),
+        .io_out_addr            (io_out_addr),
+        .io_out_din             (io_out_din)
+    );
+
+
+    //
+    // Multipliers (X, Y)
+    // 
+    reg                     mmm_ena_x = 1'b0;
+    reg                     mmm_ena_y = 1'b0;
+    wire                    mmm_ena = mmm_ena_x & mmm_ena_y;  
+    
+    wire                    mmm_rdy_x;
+    wire                    mmm_rdy_y;
+    wire                    mmm_rdy = mmm_rdy_x & mmm_rdy_y; 
+    
+    reg  [OP_ADDR_W   -1:0] mmm_word_index_last_x;
+    reg  [OP_ADDR_W   -1:0] mmm_word_index_last_y;
+
+    reg  [OP_ADDR_W   -1:0] mmm_word_index_last_minus1_x;
+    reg  [OP_ADDR_W   -1:0] mmm_word_index_last_minus1_y;
+
+    reg                     mmm_ladder_mode_x;
+    reg                     mmm_ladder_mode_y;
+    
+    reg  [BANK_ADDR_W -1:0] mmm_sel_wide_in_x;
+    reg  [BANK_ADDR_W -1:0] mmm_sel_wide_in_y;
+    reg  [BANK_ADDR_W -1:0] mmm_sel_narrow_in_x;
+    reg  [BANK_ADDR_W -1:0] mmm_sel_narrow_in_y;
+    
+    wire                    rdct_ena_x;
+    wire                    rdct_ena_y;
+    wire                    rdct_rdy_x;
+    wire                    rdct_rdy_y;
+    
+    modexpng_mmm_dual mmm_x
+    (
+        .clk                        (clk),
+        .rst                        (rst),
+        
+        .ena                        (mmm_ena_x),
+        .rdy                        (mmm_rdy_x),
+        
+        .ladder_mode                (mmm_ladder_mode_x),
+        .word_index_last            (mmm_word_index_last_x),
+        .word_index_last_minus1     (mmm_word_index_last_minus1_x),
+        
+        .sel_wide_in                (mmm_sel_wide_in_x),
+        .sel_narrow_in              (mmm_sel_narrow_in_x),
+        
+        .rd_wide_xy_ena             (rd_wide_xy_ena_x),
+        .rd_wide_xy_ena_aux         (rd_wide_xy_ena_aux_x),
+        .rd_wide_xy_bank            (rd_wide_xy_bank_x),
+        .rd_wide_xy_bank_aux        (rd_wide_xy_bank_aux_x),
+        .rd_wide_xy_addr            (rd_wide_xy_addr_x),
+        .rd_wide_xy_addr_aux        (rd_wide_xy_addr_aux_x),
+        .rd_wide_x_dout             (rd_wide_x_dout_x),
+        .rd_wide_y_dout             (rd_wide_y_dout_x),
+        .rd_wide_x_dout_aux         (rd_wide_x_dout_aux_x),
+        .rd_wide_y_dout_aux         (rd_wide_y_dout_aux_x),
+
+        .rd_narrow_xy_ena           (rd_narrow_xy_ena_x),
+        .rd_narrow_xy_bank          (rd_narrow_xy_bank_x),
+        .rd_narrow_xy_addr          (rd_narrow_xy_addr_x),
+        .rd_narrow_x_dout           (rd_narrow_x_dout_x),
+        .rd_narrow_y_dout           (rd_narrow_y_dout_x),
+        
+        .rcmb_wide_xy_bank          (rcmb_wide_xy_bank_x),
+        .rcmb_wide_xy_addr          (rcmb_wide_xy_addr_x),
+        .rcmb_wide_x_dout           (rcmb_wide_x_dout_x),
+        .rcmb_wide_y_dout           (rcmb_wide_y_dout_x),
+        .rcmb_wide_xy_valid         (rcmb_wide_xy_valid_x),
+
+        .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank_x),
+        .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr_x),
+        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout_x),
+        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout_x),
+        .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid_x),
+        
+        .rcmb_xy_bank               (rcmb_final_xy_bank_x),
+        .rcmb_xy_addr               (rcmb_final_xy_addr_x),
+        .rcmb_x_dout                (rcmb_final_x_dout_x),
+        .rcmb_y_dout                (rcmb_final_y_dout_x),
+        .rcmb_xy_valid              (rcmb_final_xy_valid_x),
+        
+        .rdct_ena                   (rdct_ena_x),
+        .rdct_rdy                   (rdct_rdy_x)
+    );
+
+    modexpng_mmm_dual mmm_y
+    (
+        .clk                        (clk),
+        .rst                        (rst),
+        
+        .ena                        (mmm_ena_y),
+        .rdy                        (mmm_rdy_y),
+        
+        .ladder_mode                (mmm_ladder_mode_y),
+        .word_index_last            (mmm_word_index_last_y),
+        .word_index_last_minus1     (mmm_word_index_last_minus1_y),
+        
+        .sel_wide_in                (mmm_sel_wide_in_y),
+        .sel_narrow_in              (mmm_sel_narrow_in_y),
+        
+        .rd_wide_xy_ena             (rd_wide_xy_ena_y),
+        .rd_wide_xy_ena_aux         (rd_wide_xy_ena_aux_y),
+        .rd_wide_xy_bank            (rd_wide_xy_bank_y),
+        .rd_wide_xy_bank_aux        (rd_wide_xy_bank_aux_y),
+        .rd_wide_xy_addr            (rd_wide_xy_addr_y),
+        .rd_wide_xy_addr_aux        (rd_wide_xy_addr_aux_y),
+        .rd_wide_x_dout             (rd_wide_x_dout_y),
+        .rd_wide_y_dout             (rd_wide_y_dout_y),
+        .rd_wide_x_dout_aux         (rd_wide_x_dout_aux_y),
+        .rd_wide_y_dout_aux         (rd_wide_y_dout_aux_y),
+
+        .rd_narrow_xy_ena           (rd_narrow_xy_ena_y),
+        .rd_narrow_xy_bank          (rd_narrow_xy_bank_y),
+        .rd_narrow_xy_addr          (rd_narrow_xy_addr_y),
+        .rd_narrow_x_dout           (rd_narrow_x_dout_y),
+        .rd_narrow_y_dout           (rd_narrow_y_dout_y),
+        
+        .rcmb_wide_xy_bank          (rcmb_wide_xy_bank_y),
+        .rcmb_wide_xy_addr          (rcmb_wide_xy_addr_y),
+        .rcmb_wide_x_dout           (rcmb_wide_x_dout_y),
+        .rcmb_wide_y_dout           (rcmb_wide_y_dout_y),
+        .rcmb_wide_xy_valid         (rcmb_wide_xy_valid_y),
+
+        .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank_y),
+        .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr_y),
+        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout_y),
+        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout_y),
+        .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid_y),
+        
+        .rcmb_xy_bank               (rcmb_final_xy_bank_y),
+        .rcmb_xy_addr               (rcmb_final_xy_addr_y),
+        .rcmb_x_dout                (rcmb_final_x_dout_y),
+        .rcmb_y_dout                (rcmb_final_y_dout_y),
+        .rcmb_xy_valid              (rcmb_final_xy_valid_y),
+        
+        .rdct_ena                   (rdct_ena_y),
+        .rdct_rdy                   (rdct_rdy_y)
+    );
+
+    //
+    // Reductors (X, Y)
+    //
+    reg [  OP_ADDR_W -1:0] rdct_word_index_last_x;
+    reg [  OP_ADDR_W -1:0] rdct_word_index_last_y;
+    
+    reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_x;
+    reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_x;
+
+    reg [BANK_ADDR_W -1:0] rdct_sel_wide_out_y;
+    reg [BANK_ADDR_W -1:0] rdct_sel_narrow_out_y;
+        
+    modexpng_reductor reductor_x
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .ena                    (rdct_ena_x),
+        .rdy                    (rdct_rdy_x),
+        
+        .word_index_last        (rdct_word_index_last_x),
+        
+        .sel_wide_out           (rdct_sel_wide_out_x),
+        .sel_narrow_out         (rdct_sel_narrow_out_x),
+
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_x),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_x),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_x),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_x),
+        
+        .rcmb_final_xy_bank     (rcmb_final_xy_bank_x),
+        .rcmb_final_xy_addr     (rcmb_final_xy_addr_x),
+        .rcmb_final_x_dout      (rcmb_final_x_dout_x),
+        .rcmb_final_y_dout      (rcmb_final_y_dout_x),
+        .rcmb_final_xy_valid    (rcmb_final_xy_valid_x),
+        
+        .rdct_wide_xy_bank      (rdct_wide_xy_bank_x),
+        .rdct_wide_xy_addr      (rdct_wide_xy_addr_x),
+        .rdct_wide_x_dout       (rdct_wide_x_dout_x),
+        .rdct_wide_y_dout       (rdct_wide_y_dout_x),
+        .rdct_wide_xy_valid     (rdct_wide_xy_valid_x),
+        
+        .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_x),
+        .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_x),
+        .rdct_narrow_x_dout     (rdct_narrow_x_dout_x),
+        .rdct_narrow_y_dout     (rdct_narrow_y_dout_x),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_x)
+    );
+
+    modexpng_reductor reductor_y
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .ena                    (rdct_ena_y),
+        .rdy                    (rdct_rdy_y),
+        
+        .word_index_last        (rdct_word_index_last_y),
+
+        .sel_wide_out           (rdct_sel_wide_out_y),
+        .sel_narrow_out         (rdct_sel_narrow_out_y),
+        
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_y),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_y),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_y),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_y),
+        
+        .rcmb_final_xy_bank     (rcmb_final_xy_bank_y),
+        .rcmb_final_xy_addr     (rcmb_final_xy_addr_y),
+        .rcmb_final_x_dout      (rcmb_final_x_dout_y),
+        .rcmb_final_y_dout      (rcmb_final_y_dout_y),
+        .rcmb_final_xy_valid    (rcmb_final_xy_valid_y),
+        
+        .rdct_wide_xy_bank      (rdct_wide_xy_bank_y),
+        .rdct_wide_xy_addr      (rdct_wide_xy_addr_y),
+        .rdct_wide_x_dout       (rdct_wide_x_dout_y),
+        .rdct_wide_y_dout       (rdct_wide_y_dout_y),
+        .rdct_wide_xy_valid     (rdct_wide_xy_valid_y),
+        
+        .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_y),
+        .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_y),
+        .rdct_narrow_x_dout     (rdct_narrow_x_dout_y),
+        .rdct_narrow_y_dout     (rdct_narrow_y_dout_y),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_y)
+    );
+
+
+    //
+    // uOP Completion Detector 
+    //
+    reg uop_exit_from_busy;
+
+    always @* begin
+        //
+        uop_exit_from_busy = 0;
+        //
+        if (uop_opcode_is_io)  uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
+        if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena    & mmm_rdy;
+        //if (uop_data_opcode_is_add)     uop_exit_from_busy = ~mod_add_ena  & mod_add_rdy;
+        //if (uop_data_opcode_is_sub)     uop_exit_from_busy = ~mod_sub_ena  & mod_sub_rdy;
+        //
+    end
+
+    
+    //
+    // uOP Trigger Logic
+    //
+    always @(posedge clk)
+        //
+        if (rst) begin
+            io_mgr_ena <= 1'b0;
+            mmm_ena_x  <= 1'b0;
+            mmm_ena_y  <= 1'b0;
+        end else begin
+            io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_io  : 1'b0;
+            mmm_ena_x  <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+            mmm_ena_y  <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+        end
+
+    //
+    // Parameters
+    //    
+    always @(posedge clk)
+        //
+        if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
+            //
+            io_mgr_opcode <= uop_data_opcode;
+            //
+            case (uop_data_opcode)
+                //
+                UOP_OPCODE_INPUT_TO_WIDE: begin
+                    io_mgr_sel_crt <= uop_data_crt;
+                    io_mgr_sel_aux <= uop_data_aux;
+                    io_mgr_sel_in  <= uop_data_sel_narrow_in;
+                    io_mgr_sel_out <= uop_data_sel_wide_out;
+                end
+                //
+                UOP_OPCODE_INPUT_TO_NARROW: begin
+                    io_mgr_sel_crt <= uop_data_crt;
+                    io_mgr_sel_aux <= uop_data_aux;
+                    io_mgr_sel_in  <= uop_data_sel_narrow_in;
+                    io_mgr_sel_out <= uop_data_sel_narrow_out;
+                end
+                //
+                UOP_OPCODE_MODULAR_MULTIPLY: begin
+                    //
+                    case (uop_data_ladder)
+                        UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; 
+                        UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11;
+                        UOP_LADDER_D:  {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+                        UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+                    endcase
+                    //
+                    {mmm_sel_wide_in_x,     mmm_sel_wide_in_y    } <= {2{uop_data_sel_wide_in   }};
+                    {mmm_sel_narrow_in_x,   mmm_sel_narrow_in_y  } <= {2{uop_data_sel_narrow_in }};
+                    {rdct_sel_wide_out_x,   rdct_sel_wide_out_y  } <= {2{uop_data_sel_wide_out  }}; 
+                    {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}};
+
+                    //
+                end
+                //
+            endcase
+            //
+        end
+    
+    //
+    // Length
+    //    
+    wire [OP_ADDR_W -1:0] word_index_last_n_minus1  = word_index_last_n  - 1'b1;
+    wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1;
+
+    wire uop_npq_is_n = uop_data_npq == UOP_NPQ_N;
+
+    always @(posedge clk)
+        //
+        if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
+            //
+            case (uop_data_opcode)
+                //
+                UOP_OPCODE_INPUT_TO_WIDE,    
+                UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; 
+                //
+                UOP_OPCODE_MODULAR_MULTIPLY: begin
+                    {mmm_word_index_last_x,        mmm_word_index_last_y       } <= {2{uop_npq_is_n ? word_index_last_n        : word_index_last_pq       }};  
+                    {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{uop_npq_is_n ? word_index_last_n_minus1 : word_index_last_pq_minus1}};
+                    {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{uop_npq_is_n ? word_index_last_n        : word_index_last_pq       }};
+                end
+                //
+            endcase
+            //
+        end
+
+
+    
+    //
+    // FSM Process
+    //
+    always @(posedge clk)
+        //
+        if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE;
+        else     uop_fsm_state <= uop_fsm_state_next;
+    
+        
+    //
+    // FSM Transition Logic
+    //
+    always @* begin
+        //
+        case (uop_fsm_state)
+            UOP_FSM_STATE_IDLE:   uop_fsm_state_next = next               ? UOP_FSM_STATE_FETCH  : UOP_FSM_STATE_IDLE;
+            UOP_FSM_STATE_FETCH:  uop_fsm_state_next =                      UOP_FSM_STATE_DECODE ;
+            UOP_FSM_STATE_DECODE: uop_fsm_state_next = uop_opcode_is_stop ? UOP_FSM_STATE_IDLE   : UOP_FSM_STATE_BUSY;
+            UOP_FSM_STATE_BUSY:   uop_fsm_state_next = uop_exit_from_busy ? UOP_FSM_STATE_FETCH  : UOP_FSM_STATE_BUSY;
+        endcase
+        //
+    end
+    
+    
+    //
+    // Ready Flag Logic
+    //
+    reg valid_reg = 1'b1;
+    assign valid = valid_reg;
+
+    always @(posedge clk)
+        //
+        if (rst)      valid_reg <= 1'b1;
+        else case (uop_fsm_state)
+            UOP_FSM_STATE_IDLE:   valid_reg <= ~next;
+            UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop;
+        endcase
+
+
+
+    //
+    // BEGIN DEBUG
+    //
+    integer i;
+    always @(posedge clk)
+        //
+        if ((uop_fsm_state == UOP_FSM_STATE_DECODE) && uop_opcode_is_stop) begin
+            //
+            $display("STOP - BANKS DUMP FOLLOWS"); 
+            //
+            // X.X
+            //
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
+            $write("X.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[0*256+i]);                $write("\n");
+            $write("X.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[1*256+i]);                $write("\n");
+            $write("X.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[2*256+i]);                $write("\n");
+            $write("X.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[3*256+i]);                $write("\n");
+            $write("X.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[4*256+i]);                $write("\n");
+            $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[5*256+i]);                $write("\n");
+            $write("X.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[6*256+i]);                $write("\n");
+            $write("X.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[7*256+i]);                $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
+            $write("X.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
+            $write("X.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
+            $write("X.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
+            $write("X.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
+            $write("X.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
+            $write("X.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
+            $write("X.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
+            $write("X.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+            //
+            // X.Y
+            //
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
+            $write("X.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[0*256+i]);                $write("\n");
+            $write("X.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[1*256+i]);                $write("\n");
+            $write("X.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[2*256+i]);                $write("\n");
+            $write("X.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[3*256+i]);                $write("\n");
+            $write("X.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[4*256+i]);                $write("\n");
+            $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[5*256+i]);                $write("\n");
+            $write("X.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[6*256+i]);                $write("\n");
+            $write("X.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[7*256+i]);                $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
+            $write("X.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
+            $write("X.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
+            $write("X.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
+            $write("X.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
+            $write("X.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
+            $write("X.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
+            $write("X.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
+            $write("X.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");
+            //
+            // Y.X
+            //
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
+            $write("Y.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[0*256+i]);                $write("\n");
+            $write("Y.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[1*256+i]);                $write("\n");
+            $write("Y.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[2*256+i]);                $write("\n");
+            $write("Y.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[3*256+i]);                $write("\n");
+            $write("Y.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[4*256+i]);                $write("\n");
+            $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[5*256+i]);                $write("\n");
+            $write("Y.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[6*256+i]);                $write("\n");
+            $write("Y.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[7*256+i]);                $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
+            $write("Y.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
+            $write("Y.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
+            $write("Y.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
+            $write("Y.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
+            $write("Y.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
+            $write("Y.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
+            $write("Y.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
+            $write("Y.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+            //
+            // Y.Y
+            //
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
+            $write("Y.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[0*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[1*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[2*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[3*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[4*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[5*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[6*256+i]);                $write("\n");
+            $write("Y.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[7*256+i]);                $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
+            $write("Y.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
+            $write("Y.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
+            $write("Y.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
+            $write("Y.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
+            $write("Y.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
+            $write("Y.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
+            $write("Y.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
+            $write("Y.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");            //
+        end
+
+    //
+    // END DEBUG
+    //
   
 
 endmodule
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
new file mode 100644
index 0000000..68d13c4
--- /dev/null
+++ b/rtl/modexpng_io_block.v
@@ -0,0 +1,158 @@
+module modexpng_io_block
+(
+    clk, clk_bus, rst,
+        
+    bus_cs,
+    bus_we,
+    bus_addr,
+    bus_data_wr,
+    bus_data_rd,
+    
+    in_1_en,
+    in_1_addr,
+    in_1_dout,
+    
+    in_2_en,
+    in_2_addr,
+    in_2_dout,
+    
+    out_en,
+    out_we,
+    out_addr,
+    out_din
+);
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+
+
+    //
+    // Ports
+    //
+    input                                         clk;
+    input                                         clk_bus;
+    input                                         rst;
+
+    input                                         bus_cs;
+    input                                         bus_we;
+    input  [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+    input  [                  BUS_DATA_W    -1:0] bus_data_wr;
+    output [                  BUS_DATA_W    -1:0] bus_data_rd;
+    
+    input                                         in_1_en;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_1_addr;
+    output [                  WORD_W        -1:0] in_1_dout;
+    
+    input                                         in_2_en;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_2_addr;
+    output [                  WORD_W        -1:0] in_2_dout;
+    
+    input                                         out_en;
+    input                                         out_we;
+    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] out_addr;
+    input  [                  WORD_W        -1:0] out_din;
+
+    
+    //
+    // Internal Registers
+    //
+    reg in_1_reg_en            = 1'b0;
+    reg in_2_reg_en            = 1'b0;
+
+    always @(posedge clk)
+        //
+        if (rst) begin
+            in_1_reg_en            <= 1'b0;
+            in_2_reg_en            <= 1'b0;
+        end else begin
+            in_1_reg_en            <= in_1_en;
+            in_2_reg_en            <= in_2_en;
+        end
+
+
+    //
+    // INPUT, OUTPUT Storage Buffers
+    //
+    wire [                          2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
+    wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
+    reg  [                          2 -1:0] bus_addr_msb_dly;
+    wire [              BUS_DATA_W    -1:0] bus_data_rd_input_1;
+    wire [              BUS_DATA_W    -1:0] bus_data_rd_output;
+
+    wire                                    bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
+    wire                                    bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
+
+    /* INPUT_1 */
+    modexpng_tdp_36k_x16_x32_wrapper bram_input_1
+    (
+        .clk        (clk),                  // core clock
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (bus_cs),               // bus side read-write
+        .wea        (bus_data_wr_input_1),  //
+        .addra      (bus_addr_lsb),         //
+        .dina       (bus_data_wr),          //
+        .douta      (bus_data_rd_input_1),  //
+    
+        .enb        (in_1_en),              // core side read-only
+        .regceb     (in_1_reg_en),          //
+        .addrb      (in_1_addr),            //
+        .doutb      (in_1_dout)             //
+    );
+    
+    
+    /* INPUT_2 */
+    modexpng_sdp_36k_x16_x32_wrapper bram_input_2
+    (
+        .clk        (clk),                  // core clock
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (bus_cs),               // bus side write-only
+        .wea        (bus_data_wr_input_2),  //
+        .addra      (bus_addr_lsb),         //
+        .dina       (bus_data_wr),          //
+    
+        .enb        (in_2_en),              // core side read-only
+        .regceb     (in_2_reg_en),          //
+        .addrb      (in_2_addr),            //
+        .doutb      (in_2_dout)             //
+    );
+
+
+    /* OUTPUT */
+    modexpng_sdp_36k_x32_x16_wrapper bram_output
+    (
+        .clk        (clk),                  // core clock 
+        .clk_bus    (clk_bus),              // bus clock
+    
+        .ena        (out_en),               // core side write-only
+        .wea        (out_we),               //
+        .addra      (out_addr),             //
+        .dina       (out_din),              //
+    
+        .enb        (bus_cs),               // bus side read-only
+        .addrb      (bus_addr_lsb),         //
+        .doutb      (bus_data_rd_output)    //
+    );
+
+    reg [31: 0] bus_data_rd_mux;
+    assign bus_data_rd = bus_data_rd_mux;
+
+    always @(posedge clk_bus)
+        bus_addr_msb_dly <= bus_addr_msb;
+
+    always @(*)
+        //
+        case (bus_addr_msb_dly)
+            //
+            2'd0: bus_data_rd_mux = bus_data_rd_input_1;
+            2'd1: bus_data_rd_mux = 32'hDEADC0DE;
+            2'd2: bus_data_rd_mux = bus_data_rd_output;
+            2'd3: bus_data_rd_mux = 32'hDEADC0DE;
+            //
+        endcase
+
+endmodule
+
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
new file mode 100644
index 0000000..81f582f
--- /dev/null
+++ b/rtl/modexpng_io_manager.v
@@ -0,0 +1,527 @@
+module modexpng_io_manager
+(
+    clk,
+    rst,
+    
+    ena,
+    rdy,
+    
+    sel_crt,
+    sel_aux,
+    sel_in,
+    sel_out,
+    
+    opcode,
+    
+    word_index_last,
+    
+    ext_wide_xy_ena_x,
+    ext_wide_xy_bank_x,
+    ext_wide_xy_addr_x,
+    ext_wide_x_din_x,
+    ext_wide_y_din_x,
+
+    ext_narrow_xy_ena_x,
+    ext_narrow_xy_bank_x,
+    ext_narrow_xy_addr_x,
+    ext_narrow_x_din_x,
+    ext_narrow_y_din_x,
+
+    ext_wide_xy_ena_y,
+    ext_wide_xy_bank_y,
+    ext_wide_xy_addr_y,
+    ext_wide_x_din_y,
+    ext_wide_y_din_y,
+
+    ext_narrow_xy_ena_y,
+    ext_narrow_xy_bank_y,
+    ext_narrow_xy_addr_y,
+    ext_narrow_x_din_y,
+    ext_narrow_y_din_y,
+    
+    io_in_1_en,
+    io_in_1_addr,
+    io_in_1_dout,
+    
+    io_in_2_en,
+    io_in_2_addr,
+    io_in_2_dout,
+    
+    io_out_en,
+    io_out_we,
+    io_out_addr,
+    io_out_din
+);
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+    `include "modexpng_microcode.vh"
+
+    
+    //
+    // Ports
+    //
+    input                                    clk;
+    input                                    rst;
+
+    input                                    ena;
+    output                                   rdy;
+    
+    input  [              UOP_CRT_W    -1:0] sel_crt;
+    input  [              UOP_AUX_W    -1:0] sel_aux;
+    input  [              BANK_ADDR_W  -1:0] sel_in; 
+    input  [              BANK_ADDR_W  -1:0] sel_out;
+    
+    input  [              UOP_OPCODE_W -1:0] opcode;
+    
+    input  [              OP_ADDR_W    -1:0] word_index_last;
+    
+    output                                   ext_wide_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] ext_wide_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] ext_wide_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] ext_wide_x_din_x;
+    output [              WORD_EXT_W   -1:0] ext_wide_y_din_x;
+
+    output                                   ext_narrow_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] ext_narrow_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] ext_narrow_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] ext_narrow_x_din_x;
+    output [              WORD_EXT_W   -1:0] ext_narrow_y_din_x;
+    
+    output                                   ext_wide_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] ext_wide_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] ext_wide_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] ext_wide_x_din_y;
+    output [              WORD_EXT_W   -1:0] ext_wide_y_din_y;
+
+    output                                   ext_narrow_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] ext_narrow_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] ext_narrow_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] ext_narrow_x_din_y;
+    output [              WORD_EXT_W   -1:0] ext_narrow_y_din_y;
+
+    output                                   io_in_1_en;
+    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_1_addr;
+    input  [              WORD_W       -1:0] io_in_1_dout;
+    
+    output                                   io_in_2_en;
+    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_2_addr;
+    input  [              WORD_W       -1:0] io_in_2_dout;
+    
+    output                                   io_out_en;
+    output                                   io_out_we;
+    output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_out_addr;
+    output [              WORD_W       -1:0] io_out_din;
+
+
+    //
+    // FSM Declaration
+    //
+    localparam [2:0] IO_FSM_STATE_IDLE          = 3'b000;
+    localparam [2:0] IO_FSM_STATE_LATENCY_PRE1  = 3'b001;
+    localparam [2:0] IO_FSM_STATE_LATENCY_PRE2  = 3'b010;
+    localparam [2:0] IO_FSM_STATE_BUSY          = 3'b011;
+    localparam [2:0] IO_FSM_STATE_EXTRA         = 3'b100;
+    localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
+    localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
+    
+    reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
+    reg [2:0] io_fsm_state_next;
+
+
+    //
+    // Control Signals
+    //
+    reg                    in_1_en = 1'b0;
+    reg [BANK_ADDR_W -1:0] in_1_addr_bank;
+    reg [  OP_ADDR_W -1:0] in_1_addr_op;
+    
+    reg                    in_2_en = 1'b0;
+    reg [BANK_ADDR_W -1:0] in_2_addr_bank;
+    reg [  OP_ADDR_W -1:0] in_2_addr_op;
+    
+    reg                    out_en = 1'b0;
+    reg                    out_we;
+    reg [BANK_ADDR_W -1:0] out_addr_bank;
+    reg [  OP_ADDR_W -1:0] out_addr_op;
+    reg [     WORD_W -1:0] out_din;
+    
+    
+    //
+    // Control Signals
+    //
+    reg                    wide_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] wide_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] wide_xy_addr_x;
+    reg [ WORD_EXT_W -1:0] wide_x_din_x;
+    reg [ WORD_EXT_W -1:0] wide_y_din_x;
+
+    reg                    narrow_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] narrow_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] narrow_xy_addr_x;
+    reg [ WORD_EXT_W -1:0] narrow_x_din_x;
+    reg [ WORD_EXT_W -1:0] narrow_y_din_x;
+    
+    reg                    wide_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] wide_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] wide_xy_addr_y;
+    reg [ WORD_EXT_W -1:0] wide_x_din_y;
+    reg [ WORD_EXT_W -1:0] wide_y_din_y;
+
+    reg                    narrow_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] narrow_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] narrow_xy_addr_y;
+    reg [ WORD_EXT_W -1:0] narrow_x_din_y;
+    reg [ WORD_EXT_W -1:0] narrow_y_din_y;
+
+
+    //
+    // Mapping
+    //    
+    assign io_in_1_en   = in_1_en;
+    assign io_in_1_addr = {in_1_addr_bank, in_1_addr_op};
+    
+    assign io_in_2_en   = in_2_en;
+    assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
+    
+    assign io_out_en    = out_en;
+    assign io_out_we    = out_we;
+    assign io_out_addr  = {out_addr_bank, out_addr_op};
+    assign io_out_din   = out_din;
+
+
+    //
+    // Mapping
+    //
+    assign ext_wide_xy_ena_x    = wide_xy_ena_x;
+    assign ext_wide_xy_bank_x   = wide_xy_bank_x;
+    assign ext_wide_xy_addr_x   = wide_xy_addr_x;
+    assign ext_wide_x_din_x     = wide_x_din_x;
+    assign ext_wide_y_din_x     = wide_y_din_x;
+
+    assign ext_narrow_xy_ena_x  = narrow_xy_ena_x; 
+    assign ext_narrow_xy_bank_x = narrow_xy_bank_x;
+    assign ext_narrow_xy_addr_x = narrow_xy_addr_x;
+    assign ext_narrow_x_din_x   = narrow_x_din_x;
+    assign ext_narrow_y_din_x   = narrow_y_din_x;
+
+    assign ext_wide_xy_ena_y    = wide_xy_ena_y;
+    assign ext_wide_xy_bank_y   = wide_xy_bank_y;
+    assign ext_wide_xy_addr_y   = wide_xy_addr_y;
+    assign ext_wide_x_din_y     = wide_x_din_y;
+    assign ext_wide_y_din_y     = wide_y_din_y;
+
+    assign ext_narrow_xy_ena_y  = narrow_xy_ena_y;
+    assign ext_narrow_xy_bank_y = narrow_xy_bank_y;
+    assign ext_narrow_xy_addr_y = narrow_xy_addr_y;
+    assign ext_narrow_x_din_y   = narrow_x_din_y;
+    assign ext_narrow_y_din_y   = narrow_y_din_y;
+
+
+    //
+    // Delays
+    //    
+    reg [  OP_ADDR_W -1:0] in_1_addr_op_dly1;
+    reg [  OP_ADDR_W -1:0] in_1_addr_op_dly2;
+    reg [  OP_ADDR_W -1:0] in_2_addr_op_dly1;
+    reg [  OP_ADDR_W -1:0] in_2_addr_op_dly2;
+    
+    always @(posedge clk) begin
+        //
+        {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
+        {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+        //
+    end
+
+
+    //
+    // Handy Wires
+    //
+    wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
+
+    wire opcode_is_wide   = (opcode == UOP_OPCODE_INPUT_TO_WIDE  );
+    wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW); 
+
+    wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF);
+
+    wire sel_crt_is_x = sel_crt == UOP_CRT_X;
+    wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
+    
+    wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
+    wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
+
+    wire in_1_addr_op_is_last;
+    wire in_2_addr_op_is_last; 
+
+    wire in_1_addr_next_op_is_last;
+    wire in_2_addr_next_op_is_last; 
+
+
+    //
+    // Enable Logic
+    //
+    always @(posedge clk)
+        //
+        if (rst) begin
+            in_1_en <= 1'b0;
+            in_2_en <= 1'b0;
+            out_en  <= 1'b0;
+        end else case (io_fsm_state_next)
+            //
+            IO_FSM_STATE_LATENCY_PRE1,
+            IO_FSM_STATE_LATENCY_PRE2,
+            IO_FSM_STATE_BUSY: begin
+                in_1_en <= opcode_is_input && sel_aux_is_1;
+                in_2_en <= opcode_is_input && sel_aux_is_2;
+            end
+            //
+            IO_FSM_STATE_EXTRA: begin
+                in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
+                in_2_en <= 1'b0;
+            end
+            //
+            default: begin
+                in_1_en <= 1'b0;
+                in_2_en <= 1'b0;
+            end
+            //
+        endcase
+
+    //
+    // Enable Logic
+    //    
+    always @(posedge clk)
+        //
+        if (rst) begin
+            //
+            wide_xy_ena_x   <= 1'b0;
+            wide_xy_ena_y   <= 1'b0;
+            narrow_xy_ena_x <= 1'b0;
+            narrow_xy_ena_y <= 1'b0;
+            //
+        end else case (io_fsm_state)
+            //
+            IO_FSM_STATE_BUSY,
+            IO_FSM_STATE_EXTRA,
+            IO_FSM_STATE_LATENCY_POST1: begin
+                wide_xy_ena_x   <= opcode_is_wide   && sel_crt_is_x;
+                wide_xy_ena_y   <= opcode_is_wide   && sel_crt_is_y;
+                narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x;
+                narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y;
+            end
+            //
+            IO_FSM_STATE_LATENCY_POST2: begin
+                wide_xy_ena_x   <= 1'b0;
+                wide_xy_ena_y   <= 1'b0;
+                narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra;
+                narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra;
+            end
+            //
+            default: begin
+                wide_xy_ena_x   <= 1'b0;
+                wide_xy_ena_y   <= 1'b0;
+                narrow_xy_ena_x <= 1'b0;
+                narrow_xy_ena_y <= 1'b0;
+            end
+            //
+        endcase
+
+
+    //
+    // Data Logic
+    //
+    wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout};
+
+    always @(posedge clk) begin
+        //
+        wide_x_din_x   <= WORD_EXT_DNC;
+        wide_y_din_x   <= WORD_EXT_DNC;
+        wide_x_din_y   <= WORD_EXT_DNC;
+        wide_y_din_y   <= WORD_EXT_DNC;
+        narrow_x_din_x <= WORD_EXT_DNC;
+        narrow_y_din_x <= WORD_EXT_DNC;
+        narrow_x_din_y <= WORD_EXT_DNC;
+        narrow_y_din_y <= WORD_EXT_DNC;
+        //
+        case (io_fsm_state)
+            //
+            IO_FSM_STATE_BUSY,
+            IO_FSM_STATE_EXTRA,
+            IO_FSM_STATE_LATENCY_POST1: begin
+                if (opcode_is_wide   && sel_crt_is_x) {wide_x_din_x,   wide_y_din_x}   <= {2{io_in_dout_mux}};
+                if (opcode_is_wide   && sel_crt_is_y) {wide_x_din_y,   wide_y_din_y}   <= {2{io_in_dout_mux}};
+                if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+                if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+            end
+            //
+            IO_FSM_STATE_LATENCY_POST2: begin
+                if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+                if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+            end            
+            //
+        endcase
+        //
+    end
+
+
+    //
+    // Address Logic
+    //
+    wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
+        sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;
+
+    always @(posedge clk) begin
+        //
+        {wide_xy_bank_x,   wide_xy_addr_x  } <= {BANK_DNC, OP_ADDR_DNC};
+        {wide_xy_bank_y,   wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
+        {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+        {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+        //
+        case (io_fsm_state)
+            //
+            IO_FSM_STATE_BUSY,
+            IO_FSM_STATE_EXTRA,
+            IO_FSM_STATE_LATENCY_POST1: begin
+                if (opcode_is_wide   && sel_crt_is_x) {wide_xy_bank_x,   wide_xy_addr_x  } <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_wide   && sel_crt_is_y) {wide_xy_bank_y,   wide_xy_addr_y  } <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+            end
+            //
+            IO_FSM_STATE_LATENCY_POST2: begin
+                if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+                if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+            end            
+            //
+        endcase
+        //
+    end
+        
+    
+    //
+    // Address Logic
+    //
+    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; 
+    reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
+    reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next;
+
+    wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0];
+    wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0];
+    wire [OP_ADDR_W -1:0] out_addr_next_op  = out_addr_next [OP_ADDR_W -1:0]; 
+    
+    assign in_1_addr_op_is_last = in_1_addr_op == word_index_last;
+    assign in_2_addr_op_is_last = in_2_addr_op == word_index_last;  
+
+    assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last;
+    assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last;  
+    
+    always @(posedge clk)
+        //
+        case (io_fsm_state_next)
+            //
+            IO_FSM_STATE_LATENCY_PRE1: begin
+                //
+                {in_1_addr_bank, in_1_addr_op} <= {sel_in,  OP_ADDR_ZERO};
+                {in_2_addr_bank, in_2_addr_op} <= {sel_in,  OP_ADDR_ZERO};
+                {out_addr_bank,  out_addr_op } <= {sel_out, OP_ADDR_ZERO};
+                //
+                in_1_addr_next <= {sel_in,  OP_ADDR_ONE};
+                in_2_addr_next <= {sel_in,  OP_ADDR_ONE};
+                out_addr_next  <= {sel_out, OP_ADDR_ONE};
+                //
+            end
+            //
+            IO_FSM_STATE_LATENCY_PRE2, 
+            IO_FSM_STATE_BUSY: begin
+                //
+                {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+                {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
+                {out_addr_bank,  out_addr_op } <= out_addr_next;
+                //
+                in_1_addr_next <= in_1_addr_next + 1'b1;
+                in_2_addr_next <= in_2_addr_next + 1'b1;
+                out_addr_next  <= out_addr_next  + 1'b1;
+                //
+            end
+            //
+            IO_FSM_STATE_EXTRA:
+                //
+                if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin
+                    //
+                    {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+                    //
+                    in_1_addr_next <= in_1_addr_next + 1'b1;
+                    // 
+                end
+            //
+        endcase
+    
+
+
+    //
+    // FSM Process
+    //
+    always @(posedge clk)
+        //
+        if (rst) io_fsm_state <= IO_FSM_STATE_IDLE;
+        else     io_fsm_state <= io_fsm_state_next;
+    
+    
+    //
+    // Busy Exit Logic
+    //
+    reg io_fsm_done = 1'b0;
+    
+    always @(posedge clk) begin
+        //
+        io_fsm_done <= 1'b0;
+        //
+        if (io_fsm_state == IO_FSM_STATE_BUSY) begin
+            //
+            if (opcode_is_input) begin
+                if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1;
+                if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1;
+            end
+             
+        end
+        //
+    end
+    
+        
+    //
+    // FSM Transition Logic
+    //
+    always @* begin
+        //
+        case (io_fsm_state)
+            IO_FSM_STATE_IDLE:          io_fsm_state_next = ena         ? IO_FSM_STATE_LATENCY_PRE1  : IO_FSM_STATE_IDLE ;
+            IO_FSM_STATE_LATENCY_PRE1:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE2  ;
+            IO_FSM_STATE_LATENCY_PRE2:  io_fsm_state_next =               IO_FSM_STATE_BUSY          ;
+            IO_FSM_STATE_BUSY:          io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA         : IO_FSM_STATE_BUSY ;
+            IO_FSM_STATE_EXTRA:         io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST1 ;
+            IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST2 ;
+            IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next =               IO_FSM_STATE_IDLE          ;
+        endcase
+        //
+    end
+
+
+    //
+    // Ready Logic
+    //
+    reg rdy_reg = 1'b1;
+    
+    assign rdy = rdy_reg;
+    
+    always @(posedge clk)
+        //
+        if (rst)                        rdy_reg <= 1'b1;
+        else case (io_fsm_state)
+            IO_FSM_STATE_IDLE:          rdy_reg <= ~ena;
+            IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1;
+        endcase
+
+
+endmodule
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
new file mode 100644
index 0000000..af21391
--- /dev/null
+++ b/rtl/modexpng_microcode.vh
@@ -0,0 +1,54 @@
+localparam UOP_OPCODE_W = 4;
+localparam UOP_CRT_W    = 1;
+localparam UOP_NPQ_W    = 1;
+localparam UOP_AUX_W    = 1;
+localparam UOP_LADDER_W = 1;
+localparam UOP_SEL_W    = 4 * BANK_ADDR_W;
+
+localparam UOP_ADDR_W = 6;  // 64 instructions
+
+localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0;
+localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31;
+
+
+//                 4              1           1           1           2              4*3=12
+localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W;
+//                 [20:17]        [16]        [15]        [14]        [13:12]        [11:9][8:6][5:3][2:0]
+
+// OPCODE
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP               = 4'd0;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE      = 4'd1;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW    = 4'd2;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+
+//localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X  = 4'd0;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY   = 4'd8;
+
+// CRT
+localparam [UOP_CRT_W -1:0] UOP_CRT_X   = 1'b0;
+localparam [UOP_CRT_W -1:0] UOP_CRT_Y   = 1'b1;
+localparam [UOP_CRT_W -1:0] UOP_CRT_DNC = 1'bX;
+
+// NPQ
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_N   = 1'b0;
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_PQ  = 1'b1;
+localparam [UOP_NPQ_W -1:0] UOP_NPQ_DNC = 1'bX;
+
+// AUX
+localparam [UOP_AUX_W -1:0] UOP_AUX_1   = 1'b0;
+localparam [UOP_AUX_W -1:0] UOP_AUX_2   = 1'b1;
+localparam [UOP_AUX_W -1:0] UOP_AUX_DNC = 1'bX;
+
+// LADDER
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_00  = 2'b00;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_11  = 2'b11;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_D   = 2'b01;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_PQ  = 2'b10;
+localparam [UOP_LADDER_W -1:0] UOP_LADDER_DNC = 2'bXX;
+
+
+// SEL
+localparam [UOP_SEL_W -1:0] UOP_SEL_DNC_ALL = {4{BANK_DNC}};
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 6d63735..25fafb4 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -14,14 +14,16 @@ function    integer cryptech_clog2;
     //
 endfunction
 
+localparam MAX_OP_W = 4096;
+
 localparam WORD_W     = 16;
 localparam WORD_EXT_W = 18;
 localparam MAC_W      = 47;
-localparam BUS_DATA_W = 32;
 
+localparam BUS_DATA_W = 32;
 localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
 
-localparam MAX_OP_W = 4096;
+
 
 localparam BANK_ADDR_W = 3;
 localparam OP_ADDR_W   = cryptech_clog2(MAX_OP_W / WORD_W); 
@@ -55,20 +57,41 @@ localparam [BANK_ADDR_W-1:0] BANK_RCMB_ML  = 3'd0;
 localparam [BANK_ADDR_W-1:0] BANK_RCMB_MH  = 3'd1;
 localparam [BANK_ADDR_W-1:0] BANK_RCMB_EXT = 3'd2; // [0] -> MH'
 
-localparam [BANK_ADDR_W-1:0] BANK_DONT_CARE = {BANK_ADDR_W{1'bX}};
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_M        = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N        = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N_FACTOR = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_N_COEFF  = 3'd3;/*
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_EXT      = 3'd4; // [0] -> N_COEFF'*/
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_X        = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_IN_1_Y        = 3'd6;
+
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_D        = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P        = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P_FACTOR = 3'd2;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_P_COEFF  = 3'd3;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q        = 3'd4;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_FACTOR = 3'd5;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_Q_COEFF  = 3'd6;
+localparam [BANK_ADDR_W-1:0] BANK_IN_2_QINV     = 3'd7;
+
+localparam [BANK_ADDR_W-1:0] BANK_OUT_S         = 3'd0;
+localparam [BANK_ADDR_W-1:0] BANK_OUT_XM        = 3'd1;
+localparam [BANK_ADDR_W-1:0] BANK_OUT_YM        = 3'd2;
+
+localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}};
 
 localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
 localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q     = 1;
 
 localparam [OP_ADDR_W-1:0] OP_ADDR_ZERO = {OP_ADDR_W{1'b0}};
 localparam [OP_ADDR_W-1:0] OP_ADDR_ONE = {{(OP_ADDR_W-1){1'b0}}, 1'b1};
-localparam [OP_ADDR_W-1:0] OP_ADDR_DONT_CARE = {OP_ADDR_W{1'bX}};
+localparam [OP_ADDR_W-1:0] OP_ADDR_DNC = {OP_ADDR_W{1'bX}};
 
 localparam [WORD_W-1:0] WORD_NULL = {WORD_W{1'b0}};
 localparam [WORD_EXT_W-1:0] WORD_EXT_NULL = {WORD_EXT_W{1'b0}};
 
-localparam [WORD_EXT_W-1:0] WORD_EXT_DONT_CARE = {WORD_EXT_W{1'bX}};
+localparam [WORD_EXT_W-1:0] WORD_EXT_DNC = {WORD_EXT_W{1'bX}};
 
-localparam [MAC_INDEX_W-1:0] MAC_INDEX_DONT_CARE = {MAC_INDEX_W{1'bX}};
+localparam [MAC_INDEX_W-1:0] MAC_INDEX_DNC = {MAC_INDEX_W{1'bX}};
 
-localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
\ No newline at end of file
+localparam [NUM_MULTS-1:0] MULT_BITMAP_ZEROES = {NUM_MULTS{1'b0}};
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index fe4ffb9..f698c18 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -959,19 +959,19 @@ module modexpng_recombinator_block
     
     task clear_wide;
         begin
-            _update_wide(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+            _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
         end
     endtask
 
     task clear_narrow;
         begin
-            _update_narrow(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+            _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
         end
     endtask
 
     task clear_rdct;
         begin
-            _update_rdct(BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0);
+            _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
         end
     endtask
     
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index aafb38c..a37333e 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -2,18 +2,14 @@ module modexpng_reductor
 (
     clk, rst,
     ena, rdy,
-    //fsm_state_next,
     word_index_last,
-    //dsp_xy_ce_p,
-    //dsp_x_p, dsp_y_p,
-    //col_index, col_index_last,
+    sel_wide_out, sel_narrow_out,
     rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
-    //rcmb_wide_xy_bank,   rcmb_wide_xy_addr,   rcmb_wide_x_dout,   rcmb_wide_y_dout,   rcmb_wide_xy_valid,
-    rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid,
-                        rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid
+    rcmb_final_xy_bank,  rcmb_final_xy_addr,  rcmb_final_x_dout,  rcmb_final_y_dout,  rcmb_final_xy_valid,
+    rdct_wide_xy_bank,   rdct_wide_xy_addr,   rdct_wide_x_dout,   rdct_wide_y_dout,   rdct_wide_xy_valid,
+    rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
 );
 
-
     //
     // Headers
     //
@@ -31,7 +27,10 @@ module modexpng_reductor
     input  [FSM_STATE_WIDTH-1:0] fsm_state_next;*/
     input [7:0]                  word_index_last;/*
     input                        dsp_xy_ce_p;
-    *//*
+    */
+    input [2:0] sel_wide_out;
+    input [2:0] sel_narrow_out;
+    /*
     input  [9*47-1:0] dsp_x_p;
     input  [9*47-1:0] dsp_y_p;
     input  [     4:0] col_index;
@@ -51,11 +50,17 @@ module modexpng_reductor
     input  [    17:0] rcmb_final_y_dout;
     input             rcmb_final_xy_valid;
 
-    output [     7:0] rdct_final_xy_addr;
-    output [    17:0] rdct_final_x_dout;
-    output [    17:0] rdct_final_y_dout;
-    output            rdct_final_xy_valid;
+    output [     2:0] rdct_wide_xy_bank;
+    output [     7:0] rdct_wide_xy_addr;
+    output [    17:0] rdct_wide_x_dout;
+    output [    17:0] rdct_wide_y_dout;
+    output            rdct_wide_xy_valid;
 
+    output [     2:0] rdct_narrow_xy_bank;
+    output [     7:0] rdct_narrow_xy_addr;
+    output [    17:0] rdct_narrow_x_dout;
+    output [    17:0] rdct_narrow_y_dout;
+    output            rdct_narrow_xy_valid;
 
     //
     // Ready
@@ -178,41 +183,89 @@ module modexpng_reductor
     //
     // Reduction
     //
-    reg [     7:0] rdct_xy_addr;
-    reg [    17:0] rdct_x_dout;
-    reg [    17:0] rdct_y_dout;
-    reg            rdct_xy_valid = 1'b0;
+    reg [     2:0] wide_xy_bank;
+    reg [     7:0] wide_xy_addr;
+    reg [    17:0] wide_x_dout;
+    reg [    17:0] wide_y_dout;
+    reg            wide_xy_valid = 1'b0;
 
-    assign rdct_final_xy_addr  = rdct_xy_addr;
-    assign rdct_final_x_dout   = rdct_x_dout;
-    assign rdct_final_y_dout   = rdct_y_dout;
-    assign rdct_final_xy_valid = rdct_xy_valid;
+    reg [     2:0] narrow_xy_bank;
+    reg [     7:0] narrow_xy_addr;
+    reg [    17:0] narrow_x_dout;
+    reg [    17:0] narrow_y_dout;
+    reg            narrow_xy_valid = 1'b0;
 
-    task _update_rdct;
+    assign rdct_wide_xy_bank  = wide_xy_bank;
+    assign rdct_wide_xy_addr  = wide_xy_addr;
+    assign rdct_wide_x_dout   = wide_x_dout;
+    assign rdct_wide_y_dout   = wide_y_dout;
+    assign rdct_wide_xy_valid = wide_xy_valid;
+
+    assign rdct_narrow_xy_bank  = narrow_xy_bank;
+    assign rdct_narrow_xy_addr  = narrow_xy_addr;
+    assign rdct_narrow_x_dout   = narrow_x_dout;
+    assign rdct_narrow_y_dout   = narrow_y_dout;
+    assign rdct_narrow_xy_valid = narrow_xy_valid;
+
+    task _update_rdct_wide;
+        input [ 2:0] bank;
         input [ 7:0] addr;
         input [17:0] dout_x;
         input [17:0] dout_y;
         input        valid;
         begin
-            rdct_xy_addr  <= addr;
-            rdct_x_dout   <= dout_x;
-            rdct_y_dout   <= dout_y;
-            rdct_xy_valid <= valid;
+            wide_xy_bank  <= bank;
+            wide_xy_addr  <= addr;
+            wide_x_dout   <= dout_x;
+            wide_y_dout   <= dout_y;
+            wide_xy_valid <= valid;
         end
     endtask
     
-    task set_rdct;
+    task _update_rdct_narrow;
+        input [ 2:0] bank;
         input [ 7:0] addr;
         input [17:0] dout_x;
         input [17:0] dout_y;
+        input        valid;
         begin
-            _update_rdct(addr, dout_x, dout_y, 1'b1);
+            narrow_xy_bank  <= bank;
+            narrow_xy_addr  <= addr;
+            narrow_x_dout   <= dout_x;
+            narrow_y_dout   <= dout_y;
+            narrow_xy_valid <= valid;
         end
     endtask
     
-    task clear_rdct;
+    task set_rdct_wide;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
         begin
-            _update_rdct(8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+            _update_rdct_wide(bank, addr, dout_x, dout_y, 1'b1);
+        end
+    endtask
+    
+    task set_rdct_narrow;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
+        begin
+            _update_rdct_narrow(bank, addr, dout_x, dout_y, 1'b1);
+        end
+    endtask
+    
+    task clear_rdct_wide;
+        begin
+            _update_rdct_wide(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+        end
+    endtask
+
+    task clear_rdct_narrow;
+        begin
+            _update_rdct_narrow(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
         end
     endtask
     
@@ -232,23 +285,31 @@ module modexpng_reductor
     //
     always @(posedge clk)
         //
-        if (rst) clear_rdct;
-        else begin
+        if (rst) begin
+            clear_rdct_wide;
+            clear_rdct_narrow;
+        end else begin
             //
-            clear_rdct;
+            clear_rdct_wide;
+            clear_rdct_narrow;
             //
             if (rcmb_xy_valid_dly3)
                 //
                 case (rcmb_xy_bank_dly3)
                                     
                     BANK_RCMB_MH:
-                        if (rcmb_xy_addr_dly3 == 8'd1)
-                            set_rdct(8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
-                        else if (rcmb_xy_addr_dly3 > 8'd1)
-                            set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+                        if (rcmb_xy_addr_dly3 == 8'd1) begin
+                            set_rdct_wide  (sel_wide_out,   8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
+                            set_rdct_narrow(sel_narrow_out, 8'd0, sum_rdct_x_carry, sum_rdct_y_carry);
+                        end else if (rcmb_xy_addr_dly3 > 8'd1) begin
+                            set_rdct_wide  (sel_wide_out,   rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+                            set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+                        end
                             
-                    BANK_RCMB_EXT:
-                        set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+                    BANK_RCMB_EXT: begin
+                        set_rdct_wide  (sel_wide_out,   word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+                        set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+                    end
 
                 endcase
             //
diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
index 69c5383..4c6fe46 100644
--- a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
@@ -25,7 +25,7 @@ module modexpng_sdp_36k_x16_x32_wrapper
     input                                     ena;
     input                                     wea;
     input  [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
-    input  [              BUD_DATA_W    -1:0] dina;
+    input  [              BUS_DATA_W    -1:0] dina;
     
     input                                     enb;
     input                                     regceb;
@@ -42,10 +42,10 @@ module modexpng_sdp_36k_x16_x32_wrapper
         
         .BRAM_SIZE              ("36Kb"),
         
-        .WRITE_WIDTH            (BUD_DATA_W),
+        .WRITE_WIDTH            (BUS_DATA_W),
         .READ_WIDTH             (WORD_W),
         
-        .DO_REG                 (0),
+        .DO_REG                 (1),
         .WRITE_MODE             ("READ_FIRST"),
         
         .SRVAL                  (72'h000000000000000000),
diff --git a/rtl/modexpng_sdp_36k_x18_wrapper.v b/rtl/modexpng_sdp_36k_x18_wrapper.v
new file mode 100644
index 0000000..ded9425
--- /dev/null
+++ b/rtl/modexpng_sdp_36k_x18_wrapper.v
@@ -0,0 +1,67 @@
+module modexpng_sdp_36k_x18_wrapper
+(
+    clk,
+    
+    ena, wea,
+    addra, dina,
+    
+    enb, regceb,
+    addrb, doutb
+);
+
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+
+
+    //
+    // Ports
+    //
+    input                                 clk;
+    
+    input                                 ena;
+    input                                 wea;
+    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
+    input  [             WORD_EXT_W -1:0] dina;
+    
+    input                                 enb;
+    input                                 regceb;
+    input  [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
+    output [             WORD_EXT_W -1:0] doutb;
+
+    //
+    // Memory
+    //
+    reg [WORD_EXT_W -1:0] mem[0:2**(BANK_ADDR_W+OP_ADDR_W)-1];
+   
+    //
+    // Write Port
+    //
+    always @(posedge clk)
+        //
+        if (ena && wea)
+            mem[addra] <= dina;
+            
+    //
+    // Read Port
+    //
+    reg [WORD_EXT_W -1:0] doutb_reg1;
+    reg [WORD_EXT_W -1:0] doutb_reg2;
+    
+    assign doutb = doutb_reg2;
+    
+    always @(posedge clk)
+        //
+        if (enb)
+            doutb_reg1 <= mem[addrb];
+            
+    always @(posedge clk)
+        //
+        if (regceb)
+            doutb_reg2 <= doutb_reg1;
+    
+
+
+endmodule
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v
similarity index 97%
rename from rtl/modexpng_sdp_36k_wrapper.v
rename to rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v
index 433ee55..b9e40ae 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_x18_wrapper_xilinx.v
@@ -1,4 +1,4 @@
-module modexpng_sdp_36k_wrapper
+module modexpng_sdp_36k_x18_wrapper
 (
     clk,
     
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index d5b9b24..f1d5ae2 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -29,26 +29,7 @@ module modexpng_storage_block
     rd_narrow_xy_bank,
     rd_narrow_xy_addr,
     rd_narrow_x_dout,
-    rd_narrow_y_dout,
-    
-    bus_cs,
-    bus_we,
-    bus_addr,
-    bus_data_wr,
-    bus_data_rd,
-    
-    in_1_en,
-    in_1_addr,
-    in_1_dout,
-    
-    in_2_en,
-    in_2_addr,
-    in_2_dout,
-    
-    out_en,
-    out_we,
-    out_addr,
-    out_din
+    rd_narrow_y_dout
 );
 
     //
@@ -93,25 +74,6 @@ module modexpng_storage_block
     output [                  WORD_EXT_W    -1:0] rd_narrow_x_dout;
     output [                  WORD_EXT_W    -1:0] rd_narrow_y_dout;
 
-    input                                         bus_cs;
-    input                                         bus_we;
-    input  [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
-    input  [                  BUS_DATA_W    -1:0] bus_data_wr;
-    output [                  BUS_DATA_W    -1:0] bus_data_rd;
-    
-    input                                         in_1_en;
-    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_1_addr;
-    output [                  WORD_W        -1:0] in_1_dout;
-    
-    input                                         in_2_en;
-    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] in_2_addr;
-    output [                  WORD_W        -1:0] in_2_dout;
-    
-    input                                         out_en;
-    input                                         out_we;
-    input  [    BANK_ADDR_W + OP_ADDR_W     -1:0] out_addr;
-    input  [                  WORD_W        -1:0] out_din;
-
     
     //
     // Internal Registers
@@ -119,8 +81,6 @@ module modexpng_storage_block
     reg rd_wide_xy_reg_ena     = 1'b0;
     reg rd_wide_xy_reg_ena_aux = 1'b0;
     reg rd_narrow_xy_reg_ena   = 1'b0;
-    reg in_1_reg_en            = 1'b0;
-    reg in_2_reg_en            = 1'b0;
 
     always @(posedge clk)
         //
@@ -128,14 +88,10 @@ module modexpng_storage_block
             rd_wide_xy_reg_ena     <= 1'b0;
             rd_wide_xy_reg_ena_aux <= 1'b0;
             rd_narrow_xy_reg_ena   <= 1'b0;
-            in_1_reg_en            <= 1'b0;
-            in_2_reg_en            <= 1'b0;
         end else begin
             rd_wide_xy_reg_ena     <= rd_wide_xy_ena;
             rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
             rd_narrow_xy_reg_ena   <= rd_narrow_xy_ena;
-            in_1_reg_en            <= in_1_en;
-            in_2_reg_en            <= in_2_en;
         end
     
     //
@@ -161,7 +117,7 @@ module modexpng_storage_block
             //
             assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
             //
-            modexpng_sdp_36k_wrapper wide_bram_x
+            modexpng_sdp_36k_x18_wrapper wide_bram_x
             (
                 .clk    (clk),
                 
@@ -176,7 +132,7 @@ module modexpng_storage_block
                 .doutb  (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
             );
             //
-            modexpng_sdp_36k_wrapper wide_bram_y
+            modexpng_sdp_36k_x18_wrapper wide_bram_y
             (
                 .clk    (clk),
 
@@ -197,7 +153,7 @@ module modexpng_storage_block
     //
     // Auxilary Storage
     //
-    modexpng_sdp_36k_wrapper wide_bram_x_aux
+    modexpng_sdp_36k_x18_wrapper wide_bram_x_aux
     (
         .clk    (clk),
 
@@ -212,7 +168,7 @@ module modexpng_storage_block
         .doutb  (rd_wide_x_dout_aux)
     );
     //
-    modexpng_sdp_36k_wrapper wide_bram_y_aux
+    modexpng_sdp_36k_x18_wrapper wide_bram_y_aux
     (
         .clk    (clk),
 
@@ -230,7 +186,7 @@ module modexpng_storage_block
     //
     // "Narrow" Storage
     //
-    modexpng_sdp_36k_wrapper narrow_bram_x
+    modexpng_sdp_36k_x18_wrapper narrow_bram_x
     (
         .clk    (clk),
 
@@ -245,7 +201,7 @@ module modexpng_storage_block
         .doutb  (rd_narrow_x_dout)
     );
 
-    modexpng_sdp_36k_wrapper narrow_bram_y
+    modexpng_sdp_36k_x18_wrapper narrow_bram_y
     (
         .clk    (clk),
 
@@ -260,87 +216,6 @@ module modexpng_storage_block
         .doutb  (rd_narrow_y_dout)
     );
 
-    //
-    // INPUT, OUTPUT Storage Buffers
-    //
-    wire [                          2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
-    wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
-    reg  [                          2 -1:0] bus_addr_msb_dly;
-    wire [              BUS_DATA_W    -1:0] bus_data_rd_input_1;
-    wire [              BUS_DATA_W    -1:0] bus_data_rd_output;
-
-    wire                                    bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
-    wire                                    bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
-
-    /* INPUT_1 */
-    modexpng_sdp_36k_x16_x32_wrapper bram_input_1
-    (
-        .clk        (clk),                  // core clock
-        .clk_bus    (clk_bus),              // bus clock
-    
-        .ena        (bus_cs),               // bus side read-write
-        .wea        (bus_data_wr_input_1),  //
-        .addra      (bus_addr_lsb),         //
-        .dina       (bus_data_wr),          //
-        .douta      (bus_data_rd_input_1),  //
-    
-        .enb        (in_1_en),              // core side read-only
-        .regceb     (in_1_reg_en),          //
-        .addrb      (in_1_addr),            //
-        .doutb      (in_1_dout)             //
-    );
-    
-    
-    /* INPUT_2 */
-    modexpng_sdp_36k_x16_x32_wrapper bram_input_2
-    (
-        .clk        (clk),                  // core clock
-        .clk_bus    (clk_bus),              // bus clock
-    
-        .ena        (bus_cs),               // bus side write-only
-        .wea        (bus_data_wr_input_2),  //
-        .addra      (bus_addr_lsb),         //
-        .dina       (bus_data_wr),          //
-    
-        .enb        (in_2_en),              // core side read-only
-        .regceb     (in_2_reg_en),          //
-        .addrb      (in_2_addr),            //
-        .doutb      (in_2_dout)             //
-    );
-
-
-    /* OUTPUT */
-    modexpng_sdp_36k_x32_x16_wrapper bram_output
-    (
-        .clk        (clk),                  // core clock 
-        .clk_bus    (clk_bus),              // bus clock
-    
-        .ena        (out_en),               // core side write-only
-        .wea        (out_we),               //
-        .addra      (out_addr),             //
-        .dina       (out_din),              //
-    
-        .enb        (bus_cs),               // bus side read-only
-        .addrb      (bus_addr_lsb),         //
-        .doutb      (bus_data_rd_output)    //
-    );
-
-    reg [31: 0] bus_data_rd_mux;
-    assign bus_data_rd = bus_data_rd_mux;
-
-    always @(posedge clk_bus)
-        bus_addr_msb_dly <= bus_addr_msb;
-
-    always @(*)
-        //
-        case (bus_addr_msb_dly)
-            //
-            2'd0: bus_data_rd_mux = bus_data_rd_input_1;
-            2'd1: bus_data_rd_mux = 32'hDEADC0DE;
-            2'd2: bus_data_rd_mux = bus_data_rd_output;
-            2'd3: bus_data_rd_mux = 32'hDEADC0DE;
-            //
-        endcase
 
 endmodule
 
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index e5ac83f..6b34bed 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -36,7 +36,19 @@ module modexpng_storage_manager
     rcmb_narrow_xy_bank,
     rcmb_narrow_xy_addr,
     rcmb_narrow_x_din,
-    rcmb_narrow_y_din
+    rcmb_narrow_y_din,
+    
+    rdct_wide_xy_bank,
+    rdct_wide_xy_addr,
+    rdct_wide_x_din,
+    rdct_wide_y_din,
+    rdct_wide_xy_valid,
+
+    rdct_narrow_xy_bank,
+    rdct_narrow_xy_addr,
+    rdct_narrow_x_din,
+    rdct_narrow_y_din,
+    rdct_narrow_xy_valid
 );
 
 
@@ -76,18 +88,30 @@ module modexpng_storage_manager
     input  [ WORD_EXT_W -1:0] ext_narrow_x_din;
     input  [ WORD_EXT_W -1:0] ext_narrow_y_din;
     
-    input         rcmb_wide_xy_ena;
-    input  [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+    input                     rcmb_wide_xy_ena;
+    input  [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
     input  [ 7:0] rcmb_wide_xy_addr;
     input  [17:0] rcmb_wide_x_din;
     input  [17:0] rcmb_wide_y_din;
 
-    input         rcmb_narrow_xy_ena;
-    input  [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+    input                     rcmb_narrow_xy_ena;
+    input  [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
     input  [ 7:0] rcmb_narrow_xy_addr;
     input  [17:0] rcmb_narrow_x_din;
     input  [17:0] rcmb_narrow_y_din;
     
+    input  [     2:0] rdct_wide_xy_bank;
+    input  [     7:0] rdct_wide_xy_addr;
+    input  [    17:0] rdct_wide_x_din;
+    input  [    17:0] rdct_wide_y_din;
+    input             rdct_wide_xy_valid;
+
+    input  [     2:0] rdct_narrow_xy_bank;
+    input  [     7:0] rdct_narrow_xy_addr;
+    input  [    17:0] rdct_narrow_x_din;
+    input  [    17:0] rdct_narrow_y_din;
+    input             rdct_narrow_xy_valid;
+    
     reg                    wr_wide_xy_ena_reg = 1'b0;
     reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
     reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
@@ -152,35 +176,37 @@ module modexpng_storage_manager
     
     task disable_wide;
         begin
-            _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
+            _update_wide(1'b0, BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
         end
     endtask
     
     task disable_narrow;
         begin
-            _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
+            _update_narrow(1'b0, BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
         end
     endtask
     
     always @(posedge clk)
         //
-        if (rst)                       disable_wide;
+        if (rst)                         disable_wide;
         else begin
             //
-            if      (ext_wide_xy_ena)  enable_wide(ext_wide_xy_bank,  ext_wide_xy_addr,  ext_wide_x_din,  ext_wide_y_din);
-            else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
-            else                       disable_wide;
+            if      (ext_wide_xy_ena)    enable_wide(ext_wide_xy_bank,  ext_wide_xy_addr,  ext_wide_x_din,  ext_wide_y_din);
+            else if (rcmb_wide_xy_ena)   enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
+            else if (rdct_wide_xy_valid) enable_wide(rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_din, rdct_wide_y_din);
+            else                         disable_wide;
             //
         end
             
     always @(posedge clk)
         //
-        if (rst)                         disable_narrow;
+        if (rst)                           disable_narrow;
         else begin
             //
-            if      (ext_narrow_xy_ena)  enable_narrow(ext_narrow_xy_bank,  ext_narrow_xy_addr,  ext_narrow_x_din,  ext_narrow_y_din);
-            else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
-            else                         disable_narrow;
+            if      (ext_narrow_xy_ena)    enable_narrow(ext_narrow_xy_bank,  ext_narrow_xy_addr,  ext_narrow_x_din,  ext_narrow_y_din);
+            else if (rcmb_narrow_xy_ena)   enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
+            else if (rdct_narrow_xy_valid) enable_narrow(rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din, rdct_narrow_y_din);
+            else                           disable_narrow;
             //
         end
 
diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
index 37a5cbc..40930f3 100644
--- a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
+++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
@@ -1,4 +1,4 @@
-module modexpng_sdp_36k_x16_x32_wrapper
+module modexpng_tdp_36k_x16_x32_wrapper
 (
     clk, clk_bus,
     
@@ -25,8 +25,8 @@ module modexpng_sdp_36k_x16_x32_wrapper
     input                                     ena;
     input                                     wea;
     input  [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
-    input  [              BUD_DATA_W    -1:0] dina;
-    output [              BUD_DATA_W    -1:0] douta;
+    input  [              BUS_DATA_W    -1:0] dina;
+    output [              BUS_DATA_W    -1:0] douta;
     
     input                                     enb;
     input                                     regceb;
@@ -42,8 +42,8 @@ module modexpng_sdp_36k_x16_x32_wrapper
         .DEVICE                 ("7SERIES"),
         .BRAM_SIZE              ("36Kb"),
 
-        .WRITE_WIDTH_A          (BUD_DATA_W),
-        .READ_WIDTH_A           (BUD_DATA_W),
+        .WRITE_WIDTH_A          (BUS_DATA_W),
+        .READ_WIDTH_A           (BUS_DATA_W),
 
         .WRITE_WIDTH_B          (WORD_W),
         .READ_WIDTH_B           (WORD_W),
@@ -61,7 +61,7 @@ module modexpng_sdp_36k_x16_x32_wrapper
         .INIT_B                 (36'h000000000),
         
         .INIT_FILE              ("NONE"),
-        .SIM_COLLISION_CHECK    ("NONE"),
+        .SIM_COLLISION_CHECK    ("NONE")
    )
    BRAM_TDP_MACRO_inst
    (
@@ -72,9 +72,9 @@ module modexpng_sdp_36k_x16_x32_wrapper
       .ENA      (ena),
       .REGCEA   (1'b0),
       .WEA      ({4{wea}}),
-      .ADDRA    (),
-      .DIA      (),
-      .DOA      (),
+      .ADDRA    (addra),
+      .DIA      (dina),
+      .DOA      (douta),
 
       .CLKB     (clk),
       .ENB      (enb),
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
new file mode 100644
index 0000000..d0b6253
--- /dev/null
+++ b/rtl/modexpng_uop_rom.v
@@ -0,0 +1,37 @@
+module modexpng_uop_rom
+(
+    clk,
+    addr,
+    data
+);
+
+    `include "modexpng_parameters.vh"
+    `include "modexpng_microcode.vh"
+
+    input  wire 	              clk;
+    input  wire [UOP_ADDR_W -1:0] addr;
+    output reg  [UOP_W      -1:0] data;    
+
+    always @(posedge clk)
+        //
+        case (addr)
+            6'd00:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         };
+            6'd01:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         };
+            6'd02:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_X,        BANK_WIDE_A,    BANK_DNC         };
+            6'd03:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_Y,        BANK_WIDE_A,    BANK_DNC         };
+            6'd04:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         };
+            6'd05:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         };
+
+            6'd06:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF};
+            6'd07:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF};
+            6'd08:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    };
+            6'd09:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    };
+            6'd10:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    };
+            6'd11:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    };
+            
+            6'd12:      data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_11,  BANK_WIDE_A, BANK_NARROW_A,      BANK_WIDE_B,    BANK_NARROW_B    };
+            
+            default:    data <= {UOP_OPCODE_STOP,             UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL};
+        endcase
+
+endmodule



More information about the Commits mailing list