[Cryptech-Commits] [user/shatov/modexpng] 04/05: Added more micro-operations, also added "general worker" module. The worker is basically a block memory data mover, but it can also do some supporting operations required for the Garner's formula part of the exponentiation.

git at cryptech.is git at cryptech.is
Thu Oct 3 13:51:36 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit e340b1489b08905e3d8acd17686e178028de7922
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Oct 3 16:47:39 2019 +0300

    Added more micro-operations, also added "general worker" module. The worker is basically
    a block memory data mover, but it can also do some supporting operations required for the
    Garner's formula part of the exponentiation.
---
 rtl/modexpng_core_top.v        | 895 +++++++++++++++++++++++++----------------
 rtl/modexpng_dsp_array_block.v |   8 +-
 rtl/modexpng_general_worker.v  | 679 +++++++++++++++++++++++++++++++
 rtl/modexpng_io_block.v        |  10 +-
 rtl/modexpng_io_manager.v      | 347 +++++++++-------
 rtl/modexpng_microcode.vh      |  21 +-
 rtl/modexpng_mmm_dual.v        |  43 +-
 rtl/modexpng_parameters.vh     |   4 +-
 rtl/modexpng_reductor.v        |  28 +-
 rtl/modexpng_storage_block.v   | 126 +++++-
 rtl/modexpng_storage_manager.v | 167 ++++----
 rtl/modexpng_uop_rom.v         |  61 ++-
 12 files changed, 1734 insertions(+), 655 deletions(-)

diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index e117e5d..6b194dc 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -71,11 +71,13 @@ module modexpng_core_top
     wire [BANK_ADDR_W  -1:0] uop_data_sel_wide_out   = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
     wire [BANK_ADDR_W  -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
     
-    wire uop_opcode_is_stop =  uop_data_opcode == UOP_OPCODE_STOP;
-    wire uop_opcode_is_io   = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE     ) ||
-                              (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW   ) ||
-                              (uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW) ;
-    wire uop_opcode_is_mmm  =  uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY;
+    wire uop_opcode_is_stop =  uop_data_opcode == UOP_OPCODE_STOP               ;
+    wire uop_opcode_is_in   = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE      ) ||
+                              (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW    ) ;
+    wire uop_opcode_is_out  =  uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+    wire uop_opcode_is_mmm  =  uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY   ;
+    wire uop_opcode_is_wrk  = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES  ) ||
+                              (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X       ) ;
 
     wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
     wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
@@ -101,87 +103,135 @@ module modexpng_core_top
     //
     // Storage Interfaces (X, Y)
     //
-    wire                    wr_wide_xy_ena_x;
-    wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
-    wire [  OP_ADDR_W -1:0] wr_wide_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] wr_wide_x_din_x;
-    wire [ WORD_EXT_W -1:0] wr_wide_y_din_x;
-
-    wire                    wr_narrow_xy_ena_x;
-    wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
-    wire [  OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] wr_narrow_x_din_x;
-    wire [ WORD_EXT_W -1:0] wr_narrow_y_din_x;
-
-    wire                                     rd_wide_xy_ena_x;
-    wire                                     rd_wide_xy_ena_aux_x;
-    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
-    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x;
-    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_x;
-    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_x;
-    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout_x;
-    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout_x;
-    wire [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux_x;
-    wire [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux_x;
-    
-    wire                                     rd_narrow_xy_ena_x;
-    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
-    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_x;
-    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_dout_x;
-    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_dout_x;
-    
-    wire                    ext_wide_xy_ena_x;
-    wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_x;
-    wire [  OP_ADDR_W -1:0] ext_wide_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] ext_wide_x_din_x;
-    wire [ WORD_EXT_W -1:0] ext_wide_y_din_x;
-
-    wire                    ext_narrow_xy_ena_x;
-    wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_x;
-    wire [  OP_ADDR_W -1:0] ext_narrow_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] ext_narrow_x_din_x;
-    wire [ WORD_EXT_W -1:0] ext_narrow_y_din_x;
-
-    wire                    wr_wide_xy_ena_y;
-    wire [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
-    wire [  OP_ADDR_W -1:0] wr_wide_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] wr_wide_x_din_y;
-    wire [ WORD_EXT_W -1:0] wr_wide_y_din_y;
-
-    wire                    wr_narrow_xy_ena_y;
-    wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
-    wire [  OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] wr_narrow_x_din_y;
-    wire [ WORD_EXT_W -1:0] wr_narrow_y_din_y;
-
-    wire                                     rd_wide_xy_ena_y;
-    wire                                     rd_wide_xy_ena_aux_y;
-    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
-    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y;
-    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_y;
-    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_y;
-    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout_y;
-    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout_y;
-    wire [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux_y;
-    wire [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux_y;
-    
-    wire                                     rd_narrow_xy_ena_y;
-    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
-    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_y;
-    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_dout_y;
-    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_dout_y;
-    
-    wire                    ext_wide_xy_ena_y;
-    wire [BANK_ADDR_W -1:0] ext_wide_xy_bank_y;
-    wire [  OP_ADDR_W -1:0] ext_wide_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] ext_wide_x_din_y;
-    wire [ WORD_EXT_W -1:0] ext_wide_y_din_y;
-
-    wire                    ext_narrow_xy_ena_y;
-    wire [BANK_ADDR_W -1:0] ext_narrow_xy_bank_y;
-    wire [  OP_ADDR_W -1:0] ext_narrow_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] ext_narrow_x_din_y;
-    wire [ WORD_EXT_W -1:0] ext_narrow_y_din_y;
+    wire                                     wr_wide_xy_ena_x;      // \        \
+    wire [                 BANK_ADDR_W -1:0] wr_wide_xy_bank_x;     //  | WIDE   | WR
+    wire [                 OP_ADDR_W   -1:0] wr_wide_xy_addr_x;     //  |        |
+    wire [                 WORD_EXT_W  -1:0] wr_wide_x_data_x;      //  |        |
+    wire [                 WORD_EXT_W  -1:0] wr_wide_y_data_x;      // /         |
+                                                                    //           |
+    wire                                     wr_narrow_xy_ena_x;    // \         |
+    wire [                 BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;   //  | NARROW |
+    wire [                 OP_ADDR_W   -1:0] wr_narrow_xy_addr_x;   //  |        |
+    wire [                 WORD_EXT_W  -1:0] wr_narrow_x_data_x;    //  |        |
+    wire [                 WORD_EXT_W  -1:0] wr_narrow_y_data_x;    // /        /
+                                                                    //
+    wire                                     rd_wide_xy_ena_x;      // \        \
+    wire                                     rd_wide_xy_ena_aux_x;  //  | WIDE   | RD
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_x;     //  |        |
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_x; //  |        |
+    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_x;     //  |        |
+    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_x; //  |        |
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_data_x;      //  |        |
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_data_x;      //  |        |
+    wire [                 WORD_EXT_W  -1:0] rd_wide_x_data_aux_x;  //  |        |
+    wire [                 WORD_EXT_W  -1:0] rd_wide_y_data_aux_x;  // /         |
+                                                                    //           |
+    wire                                     rd_narrow_xy_ena_x;    // \         |
+    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;   //  | NARROW |
+    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_x;   //  |        |
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_data_x;    //  |        |
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_data_x;    // /        /
+                                                                    //
+    wire                                     wrk_rd_wide_xy_ena_x;     // \        \
+    wire [                 BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;    //  | WIDE   | WRK
+    wire [                 OP_ADDR_W   -1:0] wrk_rd_wide_xy_addr_x;    //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_rd_wide_x_data_x;     //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_rd_wide_y_data_x;     // /         |
+                                                                    //           |
+    wire                                     wrk_rd_narrow_xy_ena_x;   // \         |
+    wire [                 BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_x;  //  | NARROW |
+    wire [                 OP_ADDR_W   -1:0] wrk_rd_narrow_xy_addr_x;  //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_rd_narrow_x_data_x;   //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_rd_narrow_y_data_x;   // /        /
+
+    wire                                     wrk_wr_wide_xy_ena_x;     // \        \
+    wire [                 BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_x;    //  | WIDE   | WRK
+    wire [                 OP_ADDR_W   -1:0] wrk_wr_wide_xy_addr_x;    //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_wr_wide_x_data_x;     //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_wr_wide_y_data_x;     // /         |
+                                                                    //           |
+    wire                                     wrk_wr_narrow_xy_ena_x;   // \         |
+    wire [                 BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_x;  //  | NARROW |
+    wire [                 OP_ADDR_W   -1:0] wrk_wr_narrow_xy_addr_x;  //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_wr_narrow_x_data_x;   //  |        |
+    wire [                 WORD_EXT_W  -1:0] wrk_wr_narrow_y_data_x;   // /        /
+                                                                    //
+    wire                                     io_wide_xy_ena_x;      // \        \
+    wire [                 BANK_ADDR_W -1:0] io_wide_xy_bank_x;     //  | WIDE   | IO
+    wire [                   OP_ADDR_W -1:0] io_wide_xy_addr_x;     //  |        |
+    wire [                  WORD_EXT_W -1:0] io_wide_x_data_x;      //  |        |
+    wire [                  WORD_EXT_W -1:0] io_wide_y_data_x;      // /         |
+                                                                    //           |
+    wire                                     io_narrow_xy_ena_x;    // \         |
+    wire [                 BANK_ADDR_W -1:0] io_narrow_xy_bank_x;   //  | NARROW |
+    wire [                   OP_ADDR_W -1:0] io_narrow_xy_addr_x;   //  |        |
+    wire [                  WORD_EXT_W -1:0] io_narrow_x_data_x;    //  |        |
+    wire [                  WORD_EXT_W -1:0] io_narrow_y_data_x;    // /        /
+                                                                    //
+    wire                                     wr_wide_xy_ena_y;      // \
+    wire [                 BANK_ADDR_W -1:0] wr_wide_xy_bank_y;     //
+    wire [                   OP_ADDR_W -1:0] wr_wide_xy_addr_y;     //
+    wire [                  WORD_EXT_W -1:0] wr_wide_x_data_y;      //
+    wire [                  WORD_EXT_W -1:0] wr_wide_y_data_y;      //
+                                                                    //
+    wire                                     wr_narrow_xy_ena_y;    //
+    wire [                 BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;   //
+    wire [                   OP_ADDR_W -1:0] wr_narrow_xy_addr_y;   //
+    wire [                  WORD_EXT_W -1:0] wr_narrow_x_data_y;    //
+    wire [                  WORD_EXT_W -1:0] wr_narrow_y_data_y;    //
+                                                                    //
+    wire                                     rd_wide_xy_ena_y;      //
+    wire                                     rd_wide_xy_ena_aux_y;  //
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_y;     //
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux_y; //
+    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr_y;     //
+    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux_y; //
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_data_y;      //
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_data_y;      //
+    wire [                 WORD_EXT_W  -1:0] rd_wide_x_data_aux_y;  //
+    wire [                 WORD_EXT_W  -1:0] rd_wide_y_data_aux_y;  //
+                                                                    //
+    wire                                     rd_narrow_xy_ena_y;    //
+    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;   //
+    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr_y;   //
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_data_y;    //
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_data_y;    //
+                                                                    //
+    wire                                     wrk_rd_wide_xy_ena_y;     //
+    wire [                 BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_y;    //
+    wire [                   OP_ADDR_W -1:0] wrk_rd_wide_xy_addr_y;    //
+    wire [                  WORD_EXT_W -1:0] wrk_rd_wide_x_data_y;     //
+    wire [                  WORD_EXT_W -1:0] wrk_rd_wide_y_data_y;     //
+                                                                    //
+    wire                                     wrk_rd_narrow_xy_ena_y;   //
+    wire [                 BANK_ADDR_W -1:0] wrk_rd_narrow_xy_bank_y;  //
+    wire [                   OP_ADDR_W -1:0] wrk_rd_narrow_xy_addr_y;  //
+    wire [                  WORD_EXT_W -1:0] wrk_rd_narrow_x_data_y;   //
+    wire [                  WORD_EXT_W -1:0] wrk_rd_narrow_y_data_y;   //
+
+    wire                                     wrk_wr_wide_xy_ena_y;     //
+    wire [                 BANK_ADDR_W -1:0] wrk_wr_wide_xy_bank_y;    //
+    wire [                   OP_ADDR_W -1:0] wrk_wr_wide_xy_addr_y;    //
+    wire [                  WORD_EXT_W -1:0] wrk_wr_wide_x_data_y;     //
+    wire [                  WORD_EXT_W -1:0] wrk_wr_wide_y_data_y;     //
+                                                                    //
+    wire                                     wrk_wr_narrow_xy_ena_y;   //
+    wire [                 BANK_ADDR_W -1:0] wrk_wr_narrow_xy_bank_y;  //
+    wire [                   OP_ADDR_W -1:0] wrk_wr_narrow_xy_addr_y;  //
+    wire [                  WORD_EXT_W -1:0] wrk_wr_narrow_x_data_y;   //
+    wire [                  WORD_EXT_W -1:0] wrk_wr_narrow_y_data_y;   //
+                                                                    //
+    wire                                     io_wide_xy_ena_y;      //
+    wire [                 BANK_ADDR_W -1:0] io_wide_xy_bank_y;     //
+    wire [                   OP_ADDR_W -1:0] io_wide_xy_addr_y;     //
+    wire [                  WORD_EXT_W -1:0] io_wide_x_data_y;      //
+    wire [                  WORD_EXT_W -1:0] io_wide_y_data_y;      //
+                                                                    //
+    wire                                     io_narrow_xy_ena_y;    //
+    wire [                 BANK_ADDR_W -1:0] io_narrow_xy_bank_y;   //
+    wire [                   OP_ADDR_W -1:0] io_narrow_xy_addr_y;   //
+    wire [                  WORD_EXT_W -1:0] io_narrow_x_data_y;    //
+    wire [                  WORD_EXT_W -1:0] io_narrow_y_data_y;    //
 
 
     //
@@ -189,38 +239,38 @@ module modexpng_core_top
     //
     wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_x;
     wire [  OP_ADDR_W -1:0] rcmb_wide_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_x;
-    wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_x;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_x;
     wire                    rcmb_wide_xy_valid_x;
 
     wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_x;
     wire [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_x;
-    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_x;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_x;
     wire                    rcmb_narrow_xy_valid_x;
 
     wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_x;
     wire [  OP_ADDR_W -1:0] rcmb_final_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_x;
-    wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_x;
+    wire [ WORD_EXT_W -1:0] rcmb_final_x_data_x;
+    wire [ WORD_EXT_W -1:0] rcmb_final_y_data_x;
     wire                    rcmb_final_xy_valid_x;
 
     wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank_y;
     wire [  OP_ADDR_W -1:0] rcmb_wide_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout_y;
-    wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_x_data_y;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_y_data_y;
     wire                    rcmb_wide_xy_valid_y;
 
     wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank_y;
     wire [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout_y;
-    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_data_y;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_data_y;
     wire                    rcmb_narrow_xy_valid_y;
 
     wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank_y;
     wire [  OP_ADDR_W -1:0] rcmb_final_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] rcmb_final_x_dout_y;
-    wire [ WORD_EXT_W -1:0] rcmb_final_y_dout_y;
+    wire [ WORD_EXT_W -1:0] rcmb_final_x_data_y;
+    wire [ WORD_EXT_W -1:0] rcmb_final_y_data_y;
     wire                    rcmb_final_xy_valid_y;
     
     
@@ -229,26 +279,26 @@ module modexpng_core_top
     //
     wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_x;
     wire [  OP_ADDR_W -1:0] rdct_wide_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_x;
-    wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_x;
+    wire [ WORD_EXT_W -1:0] rdct_wide_x_data_x;
+    wire [ WORD_EXT_W -1:0] rdct_wide_y_data_x;
     wire                    rdct_wide_xy_valid_x;
 
     wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_x;
     wire [  OP_ADDR_W -1:0] rdct_narrow_xy_addr_x;
-    wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_x;
-    wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_x;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_x;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_x;
     wire                    rdct_narrow_xy_valid_x;
 
     wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank_y;
     wire [  OP_ADDR_W -1:0] rdct_wide_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] rdct_wide_x_dout_y;
-    wire [ WORD_EXT_W -1:0] rdct_wide_y_dout_y;
+    wire [ WORD_EXT_W -1:0] rdct_wide_x_data_y;
+    wire [ WORD_EXT_W -1:0] rdct_wide_y_data_y;
     wire                    rdct_wide_xy_valid_y;
 
     wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank_y;
     wire [  OP_ADDR_W -1:0] rdct_narrow_xy_addr_y;
-    wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout_y;
-    wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout_y;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_x_data_y;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_y_data_y;
     wire                    rdct_narrow_xy_valid_y;
 
 
@@ -263,14 +313,14 @@ module modexpng_core_top
         .wr_wide_xy_ena         (wr_wide_xy_ena_x),
         .wr_wide_xy_bank        (wr_wide_xy_bank_x),
         .wr_wide_xy_addr        (wr_wide_xy_addr_x),
-        .wr_wide_x_din          (wr_wide_x_din_x),
-        .wr_wide_y_din          (wr_wide_y_din_x),
+        .wr_wide_x_din          (wr_wide_x_data_x),
+        .wr_wide_y_din          (wr_wide_y_data_x),
 
         .wr_narrow_xy_ena       (wr_narrow_xy_ena_x),
         .wr_narrow_xy_bank      (wr_narrow_xy_bank_x),
         .wr_narrow_xy_addr      (wr_narrow_xy_addr_x),
-        .wr_narrow_x_din        (wr_narrow_x_din_x),
-        .wr_narrow_y_din        (wr_narrow_y_din_x),
+        .wr_narrow_x_din        (wr_narrow_x_data_x),
+        .wr_narrow_y_din        (wr_narrow_y_data_x),
 
         .rd_wide_xy_ena         (rd_wide_xy_ena_x),
         .rd_wide_xy_ena_aux     (rd_wide_xy_ena_aux_x),
@@ -278,16 +328,28 @@ module modexpng_core_top
         .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_x),
         .rd_wide_xy_addr        (rd_wide_xy_addr_x),
         .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_x),
-        .rd_wide_x_dout         (rd_wide_x_dout_x),
-        .rd_wide_y_dout         (rd_wide_y_dout_x),
-        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_x),
-        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_x),
+        .rd_wide_x_dout         (rd_wide_x_data_x),
+        .rd_wide_y_dout         (rd_wide_y_data_x),
+        .rd_wide_x_dout_aux     (rd_wide_x_data_aux_x),
+        .rd_wide_y_dout_aux     (rd_wide_y_data_aux_x),
 
         .rd_narrow_xy_ena       (rd_narrow_xy_ena_x),
         .rd_narrow_xy_bank      (rd_narrow_xy_bank_x),
         .rd_narrow_xy_addr      (rd_narrow_xy_addr_x),
-        .rd_narrow_x_dout       (rd_narrow_x_dout_x),
-        .rd_narrow_y_dout       (rd_narrow_y_dout_x)
+        .rd_narrow_x_dout       (rd_narrow_x_data_x),
+        .rd_narrow_y_dout       (rd_narrow_y_data_x),
+        
+        .wrk_wide_xy_ena        (wrk_rd_wide_xy_ena_x),
+        .wrk_wide_xy_bank       (wrk_rd_wide_xy_bank_x),
+        .wrk_wide_xy_addr       (wrk_rd_wide_xy_addr_x),
+        .wrk_wide_x_dout        (wrk_rd_wide_x_data_x),
+        .wrk_wide_y_dout        (wrk_rd_wide_y_data_x),
+    
+        .wrk_narrow_xy_ena      (wrk_rd_narrow_xy_ena_x),
+        .wrk_narrow_xy_bank     (wrk_rd_narrow_xy_bank_x),
+        .wrk_narrow_xy_addr     (wrk_rd_narrow_xy_addr_x),
+        .wrk_narrow_x_dout      (wrk_rd_narrow_x_data_x),
+        .wrk_narrow_y_dout      (wrk_rd_narrow_y_data_x)
     );
 
     modexpng_storage_block storage_block_y
@@ -298,14 +360,14 @@ module modexpng_core_top
         .wr_wide_xy_ena         (wr_wide_xy_ena_y),
         .wr_wide_xy_bank        (wr_wide_xy_bank_y),
         .wr_wide_xy_addr        (wr_wide_xy_addr_y),
-        .wr_wide_x_din          (wr_wide_x_din_y),
-        .wr_wide_y_din          (wr_wide_y_din_y),
+        .wr_wide_x_din          (wr_wide_x_data_y),
+        .wr_wide_y_din          (wr_wide_y_data_y),
 
         .wr_narrow_xy_ena       (wr_narrow_xy_ena_y),
         .wr_narrow_xy_bank      (wr_narrow_xy_bank_y),
         .wr_narrow_xy_addr      (wr_narrow_xy_addr_y),
-        .wr_narrow_x_din        (wr_narrow_x_din_y),
-        .wr_narrow_y_din        (wr_narrow_y_din_y),
+        .wr_narrow_x_din        (wr_narrow_x_data_y),
+        .wr_narrow_y_din        (wr_narrow_y_data_y),
 
         .rd_wide_xy_ena         (rd_wide_xy_ena_y),
         .rd_wide_xy_ena_aux     (rd_wide_xy_ena_aux_y),
@@ -313,16 +375,29 @@ module modexpng_core_top
         .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_y),
         .rd_wide_xy_addr        (rd_wide_xy_addr_y),
         .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_y),
-        .rd_wide_x_dout         (rd_wide_x_dout_y),
-        .rd_wide_y_dout         (rd_wide_y_dout_y),
-        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_y),
-        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_y),
+        .rd_wide_x_dout         (rd_wide_x_data_y),
+        .rd_wide_y_dout         (rd_wide_y_data_y),
+        .rd_wide_x_dout_aux     (rd_wide_x_data_aux_y),
+        .rd_wide_y_dout_aux     (rd_wide_y_data_aux_y),
 
         .rd_narrow_xy_ena       (rd_narrow_xy_ena_y),
         .rd_narrow_xy_bank      (rd_narrow_xy_bank_y),
         .rd_narrow_xy_addr      (rd_narrow_xy_addr_y),
-        .rd_narrow_x_dout       (rd_narrow_x_dout_y),
-        .rd_narrow_y_dout       (rd_narrow_y_dout_y)
+        .rd_narrow_x_dout       (rd_narrow_x_data_y),
+        .rd_narrow_y_dout       (rd_narrow_y_data_y),
+        
+        .wrk_wide_xy_ena        (wrk_rd_wide_xy_ena_y),
+        .wrk_wide_xy_bank       (wrk_rd_wide_xy_bank_y),
+        .wrk_wide_xy_addr       (wrk_rd_wide_xy_addr_y),
+        .wrk_wide_x_dout        (wrk_rd_wide_x_data_y),
+        .wrk_wide_y_dout        (wrk_rd_wide_y_data_y),
+    
+        .wrk_narrow_xy_ena      (wrk_rd_narrow_xy_ena_y),
+        .wrk_narrow_xy_bank     (wrk_rd_narrow_xy_bank_y),
+        .wrk_narrow_xy_addr     (wrk_rd_narrow_xy_addr_y),
+        .wrk_narrow_x_dout      (wrk_rd_narrow_x_data_y),
+        .wrk_narrow_y_dout      (wrk_rd_narrow_y_data_y)
+
     );
 
     
@@ -337,50 +412,62 @@ module modexpng_core_top
         .wr_wide_xy_ena         (wr_wide_xy_ena_x),
         .wr_wide_xy_bank        (wr_wide_xy_bank_x),
         .wr_wide_xy_addr        (wr_wide_xy_addr_x),
-        .wr_wide_x_din          (wr_wide_x_din_x),
-        .wr_wide_y_din          (wr_wide_y_din_x),
+        .wr_wide_x_dout         (wr_wide_x_data_x),
+        .wr_wide_y_dout         (wr_wide_y_data_x),
     
         .wr_narrow_xy_ena       (wr_narrow_xy_ena_x),
         .wr_narrow_xy_bank      (wr_narrow_xy_bank_x),
         .wr_narrow_xy_addr      (wr_narrow_xy_addr_x),
-        .wr_narrow_x_din        (wr_narrow_x_din_x),
-        .wr_narrow_y_din        (wr_narrow_y_din_x),
+        .wr_narrow_x_dout       (wr_narrow_x_data_x),
+        .wr_narrow_y_dout       (wr_narrow_y_data_x),
         
-        .ext_wide_xy_ena        (ext_wide_xy_ena_x),
-        .ext_wide_xy_bank       (ext_wide_xy_bank_x),
-        .ext_wide_xy_addr       (ext_wide_xy_addr_x),
-        .ext_wide_x_din         (ext_wide_x_din_x),
-        .ext_wide_y_din         (ext_wide_y_din_x),
+        .io_wide_xy_ena         (io_wide_xy_ena_x),
+        .io_wide_xy_bank        (io_wide_xy_bank_x),
+        .io_wide_xy_addr        (io_wide_xy_addr_x),
+        .io_wide_x_din          (io_wide_x_data_x),
+        .io_wide_y_din          (io_wide_y_data_x),
     
-        .ext_narrow_xy_ena      (ext_narrow_xy_ena_x),
-        .ext_narrow_xy_bank     (ext_narrow_xy_bank_x),
-        .ext_narrow_xy_addr     (ext_narrow_xy_addr_x),
-        .ext_narrow_x_din       (ext_narrow_x_din_x),
-        .ext_narrow_y_din       (ext_narrow_y_din_x),
+        .io_narrow_xy_ena       (io_narrow_xy_ena_x),
+        .io_narrow_xy_bank      (io_narrow_xy_bank_x),
+        .io_narrow_xy_addr      (io_narrow_xy_addr_x),
+        .io_narrow_x_din        (io_narrow_x_data_x),
+        .io_narrow_y_din        (io_narrow_y_data_x),
         
         .rcmb_wide_xy_bank      (rcmb_wide_xy_bank_x),
         .rcmb_wide_xy_addr      (rcmb_wide_xy_addr_x),
-        .rcmb_wide_x_din        (rcmb_wide_x_dout_x),
-        .rcmb_wide_y_din        (rcmb_wide_y_dout_x),
+        .rcmb_wide_x_din        (rcmb_wide_x_data_x),
+        .rcmb_wide_y_din        (rcmb_wide_y_data_x),
         .rcmb_wide_xy_ena       (rcmb_wide_xy_valid_x),
 
         .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank_x),
         .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr_x),
-        .rcmb_narrow_x_din      (rcmb_narrow_x_dout_x),
-        .rcmb_narrow_y_din      (rcmb_narrow_y_dout_x),
+        .rcmb_narrow_x_din      (rcmb_narrow_x_data_x),
+        .rcmb_narrow_y_din      (rcmb_narrow_y_data_x),
         .rcmb_narrow_xy_ena     (rcmb_narrow_xy_valid_x),
         
         .rdct_wide_xy_bank      (rdct_wide_xy_bank_x),
         .rdct_wide_xy_addr      (rdct_wide_xy_addr_x),
-        .rdct_wide_x_din        (rdct_wide_x_dout_x),   // TODO: maybe just rename to {x|y}_x, since that's an
-        .rdct_wide_y_din        (rdct_wide_y_dout_x),   // internal signal??
+        .rdct_wide_x_din        (rdct_wide_x_data_x),
+        .rdct_wide_y_din        (rdct_wide_y_data_x),
         .rdct_wide_xy_valid     (rdct_wide_xy_valid_x),
 
         .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_x),
         .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_x),
-        .rdct_narrow_x_din      (rdct_narrow_x_dout_x),
-        .rdct_narrow_y_din      (rdct_narrow_y_dout_x),
-        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_x)
+        .rdct_narrow_x_din      (rdct_narrow_x_data_x),
+        .rdct_narrow_y_din      (rdct_narrow_y_data_x),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_x),
+        
+        .wrk_wide_xy_ena        (wrk_wr_wide_xy_ena_x),
+        .wrk_wide_xy_bank       (wrk_wr_wide_xy_bank_x),
+        .wrk_wide_xy_addr       (wrk_wr_wide_xy_addr_x),
+        .wrk_wide_x_din         (wrk_wr_wide_x_data_x),
+        .wrk_wide_y_din         (wrk_wr_wide_y_data_x),
+
+        .wrk_narrow_xy_ena      (wrk_wr_narrow_xy_ena_x),
+        .wrk_narrow_xy_bank     (wrk_wr_narrow_xy_bank_x),
+        .wrk_narrow_xy_addr     (wrk_wr_narrow_xy_addr_x),
+        .wrk_narrow_x_din       (wrk_wr_narrow_x_data_x),
+        .wrk_narrow_y_din       (wrk_wr_narrow_y_data_x)
     );
 
     modexpng_storage_manager storage_manager_y
@@ -391,51 +478,62 @@ module modexpng_core_top
         .wr_wide_xy_ena         (wr_wide_xy_ena_y),
         .wr_wide_xy_bank        (wr_wide_xy_bank_y),
         .wr_wide_xy_addr        (wr_wide_xy_addr_y),
-        .wr_wide_x_din          (wr_wide_x_din_y),
-        .wr_wide_y_din          (wr_wide_y_din_y),
+        .wr_wide_x_dout         (wr_wide_x_data_y),
+        .wr_wide_y_dout         (wr_wide_y_data_y),
     
         .wr_narrow_xy_ena       (wr_narrow_xy_ena_y),
         .wr_narrow_xy_bank      (wr_narrow_xy_bank_y),
         .wr_narrow_xy_addr      (wr_narrow_xy_addr_y),
-        .wr_narrow_x_din        (wr_narrow_x_din_y),
-        .wr_narrow_y_din        (wr_narrow_y_din_y),
+        .wr_narrow_x_dout       (wr_narrow_x_data_y),
+        .wr_narrow_y_dout       (wr_narrow_y_data_y),
         
-        .ext_wide_xy_ena        (ext_wide_xy_ena_y),
-        .ext_wide_xy_bank       (ext_wide_xy_bank_y),
-        .ext_wide_xy_addr       (ext_wide_xy_addr_y),
-        .ext_wide_x_din         (ext_wide_x_din_y),
-        .ext_wide_y_din         (ext_wide_y_din_y),
+        .io_wide_xy_ena         (io_wide_xy_ena_y),
+        .io_wide_xy_bank        (io_wide_xy_bank_y),
+        .io_wide_xy_addr        (io_wide_xy_addr_y),
+        .io_wide_x_din          (io_wide_x_data_y),
+        .io_wide_y_din          (io_wide_y_data_y),
     
-        .ext_narrow_xy_ena      (ext_narrow_xy_ena_y),
-        .ext_narrow_xy_bank     (ext_narrow_xy_bank_y),
-        .ext_narrow_xy_addr     (ext_narrow_xy_addr_y),
-        .ext_narrow_x_din       (ext_narrow_x_din_y),
-        .ext_narrow_y_din       (ext_narrow_y_din_y),
+        .io_narrow_xy_ena       (io_narrow_xy_ena_y),
+        .io_narrow_xy_bank      (io_narrow_xy_bank_y),
+        .io_narrow_xy_addr      (io_narrow_xy_addr_y),
+        .io_narrow_x_din        (io_narrow_x_data_y),
+        .io_narrow_y_din        (io_narrow_y_data_y),
         
         .rcmb_wide_xy_bank      (rcmb_wide_xy_bank_y),
         .rcmb_wide_xy_addr      (rcmb_wide_xy_addr_y),
-        .rcmb_wide_x_din        (rcmb_wide_x_dout_y),
-        .rcmb_wide_y_din        (rcmb_wide_y_dout_y),
+        .rcmb_wide_x_din        (rcmb_wide_x_data_y),
+        .rcmb_wide_y_din        (rcmb_wide_y_data_y),
         .rcmb_wide_xy_ena       (rcmb_wide_xy_valid_y),
 
         .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank_y),
         .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr_y),
-        .rcmb_narrow_x_din      (rcmb_narrow_x_dout_y),
-        .rcmb_narrow_y_din      (rcmb_narrow_y_dout_y),
+        .rcmb_narrow_x_din      (rcmb_narrow_x_data_y),
+        .rcmb_narrow_y_din      (rcmb_narrow_y_data_y),
         .rcmb_narrow_xy_ena     (rcmb_narrow_xy_valid_y),
         
         .rdct_wide_xy_bank      (rdct_wide_xy_bank_y),
         .rdct_wide_xy_addr      (rdct_wide_xy_addr_y),
-        .rdct_wide_x_din        (rdct_wide_x_dout_y),
-        .rdct_wide_y_din        (rdct_wide_y_dout_y),
+        .rdct_wide_x_din        (rdct_wide_x_data_y),
+        .rdct_wide_y_din        (rdct_wide_y_data_y),
         .rdct_wide_xy_valid     (rdct_wide_xy_valid_y),
 
         .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_y),
         .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_y),
-        .rdct_narrow_x_din      (rdct_narrow_x_dout_y),
-        .rdct_narrow_y_din      (rdct_narrow_y_dout_y),
-        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_y)
-
+        .rdct_narrow_x_din      (rdct_narrow_x_data_y),
+        .rdct_narrow_y_din      (rdct_narrow_y_data_y),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_y),
+        
+        .wrk_wide_xy_ena        (wrk_wr_wide_xy_ena_y),
+        .wrk_wide_xy_bank       (wrk_wr_wide_xy_bank_y),
+        .wrk_wide_xy_addr       (wrk_wr_wide_xy_addr_y),
+        .wrk_wide_x_din         (wrk_wr_wide_x_data_y),
+        .wrk_wide_y_din         (wrk_wr_wide_y_data_y),
+
+        .wrk_narrow_xy_ena      (wrk_wr_narrow_xy_ena_y),
+        .wrk_narrow_xy_bank     (wrk_wr_narrow_xy_bank_y),
+        .wrk_narrow_xy_addr     (wrk_wr_narrow_xy_addr_y),
+        .wrk_narrow_x_din       (wrk_wr_narrow_x_data_y),
+        .wrk_narrow_y_din       (wrk_wr_narrow_y_data_y)
     );
 
 
@@ -444,16 +542,16 @@ module modexpng_core_top
     //
     wire                                io_in_1_en;
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_1_addr;
-    wire [              WORD_W    -1:0] io_in_1_dout;
+    wire [              WORD_W    -1:0] io_in_1_data;
     
     wire                                io_in_2_en;
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_in_2_addr;
-    wire [              WORD_W    -1:0] io_in_2_dout;
+    wire [              WORD_W    -1:0] io_in_2_data;
     
     wire                                io_out_en;
     wire                                io_out_we;
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] io_out_addr;
-    wire [              WORD_W    -1:0] io_out_din;
+    wire [              WORD_W    -1:0] io_out_data;
     
     // TODO: Separate reset for clock domains (core/bus)???
     
@@ -472,16 +570,16 @@ module modexpng_core_top
     
         .in_1_en        (io_in_1_en),
         .in_1_addr      (io_in_1_addr),
-        .in_1_dout      (io_in_1_dout),
+        .in_1_dout      (io_in_1_data),
     
         .in_2_en        (io_in_2_en),
         .in_2_addr      (io_in_2_addr),
-        .in_2_dout      (io_in_2_dout),
+        .in_2_dout      (io_in_2_data),
     
         .out_en         (io_out_en),
         .out_we         (io_out_we),
         .out_addr       (io_out_addr),
-        .out_din        (io_out_din)
+        .out_din        (io_out_data)
     );
 
 
@@ -497,59 +595,65 @@ module modexpng_core_top
     reg  [OP_ADDR_W    -1:0] io_mgr_word_index_last;
     reg  [UOP_OPCODE_W -1:0] io_mgr_opcode;
     
+    wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; 
+    wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0];
+    
     modexpng_io_manager io_manager
     (
-        .clk                    (clk),
-        .rst                    (rst),
+        .clk                        (clk),
+        .rst                        (rst),
     
-        .ena                    (io_mgr_ena),
-        .rdy                    (io_mgr_rdy),
+        .ena                        (io_mgr_ena),
+        .rdy                        (io_mgr_rdy),
     
-        .sel_crt                (io_mgr_sel_crt),
-        .sel_aux                (io_mgr_sel_aux),
-        .sel_in                 (io_mgr_sel_in),
-        .sel_out                (io_mgr_sel_out),
+        .sel_crt                    (io_mgr_sel_crt),
+        .sel_aux                    (io_mgr_sel_aux),
+        .sel_in                     (io_mgr_sel_in),
+        .sel_out                    (io_mgr_sel_out),
         
-        .opcode                 (io_mgr_opcode),
+        .opcode                     (io_mgr_opcode),
         
-        .word_index_last        (io_mgr_word_index_last),
+        .word_index_last            (io_mgr_word_index_last),
     
-        .ext_wide_xy_ena_x      (ext_wide_xy_ena_x),
-        .ext_wide_xy_bank_x     (ext_wide_xy_bank_x),
-        .ext_wide_xy_addr_x     (ext_wide_xy_addr_x),
-        .ext_wide_x_din_x       (ext_wide_x_din_x),
-        .ext_wide_y_din_x       (ext_wide_y_din_x),
-
-        .ext_narrow_xy_ena_x    (ext_narrow_xy_ena_x),
-        .ext_narrow_xy_bank_x   (ext_narrow_xy_bank_x),
-        .ext_narrow_xy_addr_x   (ext_narrow_xy_addr_x),
-        .ext_narrow_x_din_x     (ext_narrow_x_din_x),
-        .ext_narrow_y_din_x     (ext_narrow_y_din_x),
-
-        .ext_wide_xy_ena_y      (ext_wide_xy_ena_y),
-        .ext_wide_xy_bank_y     (ext_wide_xy_bank_y),
-        .ext_wide_xy_addr_y     (ext_wide_xy_addr_y),
-        .ext_wide_x_din_y       (ext_wide_x_din_y),
-        .ext_wide_y_din_y       (ext_wide_y_din_y),
-
-        .ext_narrow_xy_ena_y    (ext_narrow_xy_ena_y),
-        .ext_narrow_xy_bank_y   (ext_narrow_xy_bank_y),
-        .ext_narrow_xy_addr_y   (ext_narrow_xy_addr_y),
-        .ext_narrow_x_din_y     (ext_narrow_x_din_y),
-        .ext_narrow_y_din_y     (ext_narrow_y_din_y),
+        .io_wide_xy_ena_x           (io_wide_xy_ena_x),
+        .io_wide_xy_bank_x          (io_wide_xy_bank_x),
+        .io_wide_xy_addr_x          (io_wide_xy_addr_x),
+        .io_wide_x_din_x            (io_wide_x_data_x),
+        .io_wide_y_din_x            (io_wide_y_data_x),
+
+        .io_narrow_xy_ena_x         (io_narrow_xy_ena_x),
+        .io_narrow_xy_bank_x        (io_narrow_xy_bank_x),
+        .io_narrow_xy_addr_x        (io_narrow_xy_addr_x),
+        .io_narrow_x_din_x          (io_narrow_x_data_x),
+        .io_narrow_y_din_x          (io_narrow_y_data_x),
+
+        .io_wide_xy_ena_y           (io_wide_xy_ena_y),
+        .io_wide_xy_bank_y          (io_wide_xy_bank_y),
+        .io_wide_xy_addr_y          (io_wide_xy_addr_y),
+        .io_wide_x_din_y            (io_wide_x_data_y),
+        .io_wide_y_din_y            (io_wide_y_data_y),
+
+        .io_narrow_xy_ena_y         (io_narrow_xy_ena_y),
+        .io_narrow_xy_bank_y        (io_narrow_xy_bank_y),
+        .io_narrow_xy_addr_y        (io_narrow_xy_addr_y),
+        .io_narrow_x_din_y          (io_narrow_x_data_y),
+        .io_narrow_y_din_y          (io_narrow_y_data_y),
     
-        .io_in_1_en             (io_in_1_en),
-        .io_in_1_addr           (io_in_1_addr),
-        .io_in_1_dout           (io_in_1_dout),
+        .io_in_1_en                 (io_in_1_en),
+        .io_in_1_addr               (io_in_1_addr),
+        .io_in_1_din                (io_in_1_data),
     
-        .io_in_2_en             (io_in_2_en),
-        .io_in_2_addr           (io_in_2_addr),
-        .io_in_2_dout           (io_in_2_dout),
+        .io_in_2_en                 (io_in_2_en),
+        .io_in_2_addr               (io_in_2_addr),
+        .io_in_2_din                (io_in_2_data),
     
-        .io_out_en              (io_out_en),
-        .io_out_we              (io_out_we),
-        .io_out_addr            (io_out_addr),
-        .io_out_din             (io_out_din)
+        .io_out_en                  (io_out_en),
+        .io_out_we                  (io_out_we),
+        .io_out_addr                (io_out_addr),
+        .io_out_dout                (io_out_data),
+        
+        .wrk_narrow_x_din_x_trunc   (wrk_rd_narrow_x_data_x_trunc),
+        .wrk_narrow_x_din_y_trunc   (wrk_rd_narrow_x_data_y_trunc)
     );
 
 
@@ -608,33 +712,33 @@ module modexpng_core_top
         .rd_wide_xy_bank_aux        (rd_wide_xy_bank_aux_x),
         .rd_wide_xy_addr            (rd_wide_xy_addr_x),
         .rd_wide_xy_addr_aux        (rd_wide_xy_addr_aux_x),
-        .rd_wide_x_dout             (rd_wide_x_dout_x),
-        .rd_wide_y_dout             (rd_wide_y_dout_x),
-        .rd_wide_x_dout_aux         (rd_wide_x_dout_aux_x),
-        .rd_wide_y_dout_aux         (rd_wide_y_dout_aux_x),
+        .rd_wide_x_din              (rd_wide_x_data_x),
+        .rd_wide_y_din              (rd_wide_y_data_x),
+        .rd_wide_x_din_aux          (rd_wide_x_data_aux_x),
+        .rd_wide_y_din_aux          (rd_wide_y_data_aux_x),
 
         .rd_narrow_xy_ena           (rd_narrow_xy_ena_x),
         .rd_narrow_xy_bank          (rd_narrow_xy_bank_x),
         .rd_narrow_xy_addr          (rd_narrow_xy_addr_x),
-        .rd_narrow_x_dout           (rd_narrow_x_dout_x),
-        .rd_narrow_y_dout           (rd_narrow_y_dout_x),
+        .rd_narrow_x_din            (rd_narrow_x_data_x),
+        .rd_narrow_y_din            (rd_narrow_y_data_x),
         
         .rcmb_wide_xy_bank          (rcmb_wide_xy_bank_x),
         .rcmb_wide_xy_addr          (rcmb_wide_xy_addr_x),
-        .rcmb_wide_x_dout           (rcmb_wide_x_dout_x),
-        .rcmb_wide_y_dout           (rcmb_wide_y_dout_x),
+        .rcmb_wide_x_dout           (rcmb_wide_x_data_x),
+        .rcmb_wide_y_dout           (rcmb_wide_y_data_x),
         .rcmb_wide_xy_valid         (rcmb_wide_xy_valid_x),
 
         .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank_x),
         .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr_x),
-        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout_x),
-        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout_x),
+        .rcmb_narrow_x_dout         (rcmb_narrow_x_data_x),
+        .rcmb_narrow_y_dout         (rcmb_narrow_y_data_x),
         .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid_x),
         
         .rcmb_xy_bank               (rcmb_final_xy_bank_x),
         .rcmb_xy_addr               (rcmb_final_xy_addr_x),
-        .rcmb_x_dout                (rcmb_final_x_dout_x),
-        .rcmb_y_dout                (rcmb_final_y_dout_x),
+        .rcmb_x_dout                (rcmb_final_x_data_x),
+        .rcmb_y_dout                (rcmb_final_y_data_x),
         .rcmb_xy_valid              (rcmb_final_xy_valid_x),
         
         .rdct_ena                   (rdct_ena_x),
@@ -663,33 +767,33 @@ module modexpng_core_top
         .rd_wide_xy_bank_aux        (rd_wide_xy_bank_aux_y),
         .rd_wide_xy_addr            (rd_wide_xy_addr_y),
         .rd_wide_xy_addr_aux        (rd_wide_xy_addr_aux_y),
-        .rd_wide_x_dout             (rd_wide_x_dout_y),
-        .rd_wide_y_dout             (rd_wide_y_dout_y),
-        .rd_wide_x_dout_aux         (rd_wide_x_dout_aux_y),
-        .rd_wide_y_dout_aux         (rd_wide_y_dout_aux_y),
+        .rd_wide_x_din              (rd_wide_x_data_y),
+        .rd_wide_y_din              (rd_wide_y_data_y),
+        .rd_wide_x_din_aux          (rd_wide_x_data_aux_y),
+        .rd_wide_y_din_aux          (rd_wide_y_data_aux_y),
 
         .rd_narrow_xy_ena           (rd_narrow_xy_ena_y),
         .rd_narrow_xy_bank          (rd_narrow_xy_bank_y),
         .rd_narrow_xy_addr          (rd_narrow_xy_addr_y),
-        .rd_narrow_x_dout           (rd_narrow_x_dout_y),
-        .rd_narrow_y_dout           (rd_narrow_y_dout_y),
+        .rd_narrow_x_din            (rd_narrow_x_data_y),
+        .rd_narrow_y_din            (rd_narrow_y_data_y),
         
         .rcmb_wide_xy_bank          (rcmb_wide_xy_bank_y),
         .rcmb_wide_xy_addr          (rcmb_wide_xy_addr_y),
-        .rcmb_wide_x_dout           (rcmb_wide_x_dout_y),
-        .rcmb_wide_y_dout           (rcmb_wide_y_dout_y),
+        .rcmb_wide_x_dout           (rcmb_wide_x_data_y),
+        .rcmb_wide_y_dout           (rcmb_wide_y_data_y),
         .rcmb_wide_xy_valid         (rcmb_wide_xy_valid_y),
 
         .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank_y),
         .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr_y),
-        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout_y),
-        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout_y),
+        .rcmb_narrow_x_dout         (rcmb_narrow_x_data_y),
+        .rcmb_narrow_y_dout         (rcmb_narrow_y_data_y),
         .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid_y),
         
         .rcmb_xy_bank               (rcmb_final_xy_bank_y),
         .rcmb_xy_addr               (rcmb_final_xy_addr_y),
-        .rcmb_x_dout                (rcmb_final_x_dout_y),
-        .rcmb_y_dout                (rcmb_final_y_dout_y),
+        .rcmb_x_dout                (rcmb_final_x_data_y),
+        .rcmb_y_dout                (rcmb_final_y_data_y),
         .rcmb_xy_valid              (rcmb_final_xy_valid_y),
         
         .rdct_ena                   (rdct_ena_y),
@@ -723,25 +827,25 @@ module modexpng_core_top
 
         .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_x),
         .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_x),
-        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_x),
-        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_x),
+        .rd_wide_x_din_aux      (rd_wide_x_data_aux_x),
+        .rd_wide_y_din_aux      (rd_wide_y_data_aux_x),
         
         .rcmb_final_xy_bank     (rcmb_final_xy_bank_x),
         .rcmb_final_xy_addr     (rcmb_final_xy_addr_x),
-        .rcmb_final_x_dout      (rcmb_final_x_dout_x),
-        .rcmb_final_y_dout      (rcmb_final_y_dout_x),
+        .rcmb_final_x_din       (rcmb_final_x_data_x),
+        .rcmb_final_y_din       (rcmb_final_y_data_x),
         .rcmb_final_xy_valid    (rcmb_final_xy_valid_x),
         
         .rdct_wide_xy_bank      (rdct_wide_xy_bank_x),
         .rdct_wide_xy_addr      (rdct_wide_xy_addr_x),
-        .rdct_wide_x_dout       (rdct_wide_x_dout_x),
-        .rdct_wide_y_dout       (rdct_wide_y_dout_x),
+        .rdct_wide_x_dout       (rdct_wide_x_data_x),
+        .rdct_wide_y_dout       (rdct_wide_y_data_x),
         .rdct_wide_xy_valid     (rdct_wide_xy_valid_x),
         
         .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_x),
         .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_x),
-        .rdct_narrow_x_dout     (rdct_narrow_x_dout_x),
-        .rdct_narrow_y_dout     (rdct_narrow_y_dout_x),
+        .rdct_narrow_x_dout     (rdct_narrow_x_data_x),
+        .rdct_narrow_y_dout     (rdct_narrow_y_data_x),
         .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_x)
     );
 
@@ -760,29 +864,109 @@ module modexpng_core_top
         
         .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux_y),
         .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux_y),
-        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux_y),
-        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux_y),
+        .rd_wide_x_din_aux      (rd_wide_x_data_aux_y),
+        .rd_wide_y_din_aux      (rd_wide_y_data_aux_y),
         
         .rcmb_final_xy_bank     (rcmb_final_xy_bank_y),
         .rcmb_final_xy_addr     (rcmb_final_xy_addr_y),
-        .rcmb_final_x_dout      (rcmb_final_x_dout_y),
-        .rcmb_final_y_dout      (rcmb_final_y_dout_y),
+        .rcmb_final_x_din       (rcmb_final_x_data_y),
+        .rcmb_final_y_din       (rcmb_final_y_data_y),
         .rcmb_final_xy_valid    (rcmb_final_xy_valid_y),
         
         .rdct_wide_xy_bank      (rdct_wide_xy_bank_y),
         .rdct_wide_xy_addr      (rdct_wide_xy_addr_y),
-        .rdct_wide_x_dout       (rdct_wide_x_dout_y),
-        .rdct_wide_y_dout       (rdct_wide_y_dout_y),
+        .rdct_wide_x_dout       (rdct_wide_x_data_y),
+        .rdct_wide_y_dout       (rdct_wide_y_data_y),
         .rdct_wide_xy_valid     (rdct_wide_xy_valid_y),
         
         .rdct_narrow_xy_bank    (rdct_narrow_xy_bank_y),
         .rdct_narrow_xy_addr    (rdct_narrow_xy_addr_y),
-        .rdct_narrow_x_dout     (rdct_narrow_x_dout_y),
-        .rdct_narrow_y_dout     (rdct_narrow_y_dout_y),
+        .rdct_narrow_x_dout     (rdct_narrow_x_data_y),
+        .rdct_narrow_y_dout     (rdct_narrow_y_data_y),
         .rdct_narrow_xy_valid   (rdct_narrow_xy_valid_y)
     );
 
 
+    //
+    // General Worker
+    //
+    reg                     wrk_ena = 1'b0;
+    wire                    wrk_rdy;
+
+    reg  [ BANK_ADDR_W -1:0] wrk_sel_wide_in;
+    reg  [ BANK_ADDR_W -1:0] wrk_sel_wide_out;
+    reg  [ BANK_ADDR_W -1:0] wrk_sel_narrow_in;
+    reg  [ BANK_ADDR_W -1:0] wrk_sel_narrow_out;
+    reg  [   OP_ADDR_W -1:0] wrk_word_index_last;
+    reg  [UOP_OPCODE_W -1:0] wrk_opcode;
+    
+    modexpng_general_worker general_worker
+    (
+        .clk                        (clk),
+        .rst                        (rst),
+        
+        .ena                        (wrk_ena),
+        .rdy                        (wrk_rdy),
+        
+        .sel_narrow_in              (wrk_sel_narrow_in),
+        .sel_narrow_out             (wrk_sel_narrow_out),
+        .sel_wide_in                (wrk_sel_wide_in),
+        .sel_wide_out               (wrk_sel_wide_out),
+        
+        .opcode                     (wrk_opcode),
+        
+        .word_index_last            (wrk_word_index_last),
+        
+        .wrk_rd_wide_xy_ena_x       (wrk_rd_wide_xy_ena_x),
+        .wrk_rd_wide_xy_bank_x      (wrk_rd_wide_xy_bank_x),
+        .wrk_rd_wide_xy_addr_x      (wrk_rd_wide_xy_addr_x),
+        .wrk_rd_wide_x_din_x        (wrk_rd_wide_x_data_x),
+        .wrk_rd_wide_y_din_x        (wrk_rd_wide_y_data_x),
+    
+        .wrk_rd_narrow_xy_ena_x     (wrk_rd_narrow_xy_ena_x),
+        .wrk_rd_narrow_xy_bank_x    (wrk_rd_narrow_xy_bank_x),
+        .wrk_rd_narrow_xy_addr_x    (wrk_rd_narrow_xy_addr_x),
+        .wrk_rd_narrow_x_din_x      (wrk_rd_narrow_x_data_x),
+        .wrk_rd_narrow_y_din_x      (wrk_rd_narrow_y_data_x),
+    
+        .wrk_rd_wide_xy_ena_y       (wrk_rd_wide_xy_ena_y),
+        .wrk_rd_wide_xy_bank_y      (wrk_rd_wide_xy_bank_y),
+        .wrk_rd_wide_xy_addr_y      (wrk_rd_wide_xy_addr_y),
+        .wrk_rd_wide_x_din_y        (wrk_rd_wide_x_data_y),
+        .wrk_rd_wide_y_din_y        (wrk_rd_wide_y_data_y),
+    
+        .wrk_rd_narrow_xy_ena_y     (wrk_rd_narrow_xy_ena_y),
+        .wrk_rd_narrow_xy_bank_y    (wrk_rd_narrow_xy_bank_y),
+        .wrk_rd_narrow_xy_addr_y    (wrk_rd_narrow_xy_addr_y),
+        .wrk_rd_narrow_x_din_y      (wrk_rd_narrow_x_data_y),
+        .wrk_rd_narrow_y_din_y      (wrk_rd_narrow_y_data_y),
+        
+        .wrk_wr_wide_xy_ena_x       (wrk_wr_wide_xy_ena_x),
+        .wrk_wr_wide_xy_bank_x      (wrk_wr_wide_xy_bank_x),
+        .wrk_wr_wide_xy_addr_x      (wrk_wr_wide_xy_addr_x),
+        .wrk_wr_wide_x_dout_x       (wrk_wr_wide_x_data_x),
+        .wrk_wr_wide_y_dout_x       (wrk_wr_wide_y_data_x),
+    
+        .wrk_wr_narrow_xy_ena_x     (wrk_wr_narrow_xy_ena_x),
+        .wrk_wr_narrow_xy_bank_x    (wrk_wr_narrow_xy_bank_x),
+        .wrk_wr_narrow_xy_addr_x    (wrk_wr_narrow_xy_addr_x),
+        .wrk_wr_narrow_x_dout_x     (wrk_wr_narrow_x_data_x),
+        .wrk_wr_narrow_y_dout_x     (wrk_wr_narrow_y_data_x),
+    
+        .wrk_wr_wide_xy_ena_y       (wrk_wr_wide_xy_ena_y),
+        .wrk_wr_wide_xy_bank_y      (wrk_wr_wide_xy_bank_y),
+        .wrk_wr_wide_xy_addr_y      (wrk_wr_wide_xy_addr_y),
+        .wrk_wr_wide_x_dout_y       (wrk_wr_wide_x_data_y),
+        .wrk_wr_wide_y_dout_y       (wrk_wr_wide_y_data_y),
+    
+        .wrk_wr_narrow_xy_ena_y     (wrk_wr_narrow_xy_ena_y),
+        .wrk_wr_narrow_xy_bank_y    (wrk_wr_narrow_xy_bank_y),
+        .wrk_wr_narrow_xy_addr_y    (wrk_wr_narrow_xy_addr_y),
+        .wrk_wr_narrow_x_dout_y     (wrk_wr_narrow_x_data_y),
+        .wrk_wr_narrow_y_dout_y     (wrk_wr_narrow_y_data_y)
+    );
+
+
     //
     // uOP Completion Detector 
     //
@@ -792,10 +976,10 @@ module modexpng_core_top
         //
         uop_exit_from_busy = 0;
         //
-        if (uop_opcode_is_io)  uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
-        if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena    & mmm_rdy;
-        //if (uop_data_opcode_is_add)     uop_exit_from_busy = ~mod_add_ena  & mod_add_rdy;
-        //if (uop_data_opcode_is_sub)     uop_exit_from_busy = ~mod_sub_ena  & mod_sub_rdy;
+        if (uop_opcode_is_in)   uop_exit_from_busy =  ~io_mgr_ena & io_mgr_rdy;
+        if (uop_opcode_is_out)  uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy);
+        if (uop_opcode_is_mmm)  uop_exit_from_busy =  ~mmm_ena    & mmm_rdy   ;
+        if (uop_opcode_is_wrk)  uop_exit_from_busy =  ~wrk_ena    & wrk_rdy   ;
         //
     end
 
@@ -809,10 +993,12 @@ module modexpng_core_top
             io_mgr_ena <= 1'b0;
             mmm_ena_x  <= 1'b0;
             mmm_ena_y  <= 1'b0;
+            wrk_ena    <= 1'b0;
         end else begin
-            io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_io  : 1'b0;
-            mmm_ena_x  <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
-            mmm_ena_y  <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+            io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in  || uop_opcode_is_out) : 1'b0;
+            mmm_ena_x  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm                       : 1'b0;
+            mmm_ena_y  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm                       : 1'b0;
+            wrk_ena    <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0;
         end
 
     //
@@ -825,6 +1011,7 @@ module modexpng_core_top
         if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
             //
             io_mgr_opcode <= uop_data_opcode;
+            wrk_opcode    <= uop_data_opcode;
             //
             case (uop_data_opcode)
                 //
@@ -842,6 +1029,15 @@ module modexpng_core_top
                     io_mgr_sel_out <= uop_data_sel_narrow_out;
                 end
                 //
+                UOP_OPCODE_OUTPUT_FROM_NARROW: begin
+                    io_mgr_sel_crt <= uop_data_crt;
+                    io_mgr_sel_aux <= UOP_AUX_DNC;
+                    io_mgr_sel_in  <= BANK_DNC;
+                    io_mgr_sel_out <= uop_data_sel_narrow_out;
+                    //
+                    wrk_sel_narrow_in <= uop_data_sel_narrow_in;                
+                end
+                //
                 UOP_OPCODE_MODULAR_MULTIPLY: begin
                     //
                     case (uop_data_ladder)
@@ -856,10 +1052,21 @@ module modexpng_core_top
                     {mmm_sel_narrow_in_x,   mmm_sel_narrow_in_y  } <= {2{uop_data_sel_narrow_in    }};
                     {rdct_sel_wide_out_x,   rdct_sel_wide_out_y  } <= {2{uop_data_sel_wide_out     }}; 
                     {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out   }};
-                    
                     //
                 end
                 //
+                UOP_OPCODE_PROPAGATE_CARRIES: begin
+                    wrk_sel_narrow_in   <= uop_data_sel_narrow_in;
+                    wrk_sel_narrow_out  <= uop_data_sel_narrow_out;
+                end
+                //
+                UOP_OPCODE_COPY_CRT_Y2X: begin
+                    wrk_sel_wide_in    <= uop_data_sel_wide_in;
+                    wrk_sel_wide_out   <= uop_data_sel_wide_out;
+                    wrk_sel_narrow_in  <= uop_data_sel_narrow_in;
+                    wrk_sel_narrow_out <= uop_data_sel_narrow_out;                    
+                end
+                //
             endcase
             //
         end
@@ -887,6 +1094,9 @@ module modexpng_core_top
                     {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{uop_npq_is_n ? word_index_last_n        : word_index_last_pq       }};
                 end
                 //
+                UOP_OPCODE_PROPAGATE_CARRIES:
+                    wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+                //
             endcase
             //
         end
@@ -945,87 +1155,88 @@ module modexpng_core_top
             //
             // X.X
             //
-            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
-            $write("X.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[0*256+i]);                $write("\n");
-            $write("X.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[1*256+i]);                $write("\n");
-            $write("X.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[2*256+i]);                $write("\n");
-            $write("X.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[3*256+i]);                $write("\n");
-            $write("X.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[4*256+i]);                $write("\n");
-            $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[5*256+i]);                $write("\n");
-            $write("X.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[6*256+i]);                $write("\n");
-            $write("X.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_x.mem[7*256+i]);                $write("\n");
-            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
-            $write("X.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
-            $write("X.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
-            $write("X.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
-            $write("X.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
-            $write("X.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
-            $write("X.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
-            $write("X.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
-            $write("X.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                              $write("\n");
+            $write("X.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[0*256+i]);           $write("\n");
+            $write("X.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[1*256+i]);           $write("\n");
+            $write("X.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[2*256+i]);           $write("\n");
+            $write("X.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[3*256+i]);           $write("\n");
+            $write("X.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[4*256+i]);           $write("\n");
+            $write("X.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[5*256+i]);           $write("\n");
+            $write("X.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[6*256+i]);           $write("\n");
+            $write("X.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_x.mem[7*256+i]);           $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                 $write("\n");
+            $write("X.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[0*256+i]); $write("\n");
+            $write("X.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[1*256+i]); $write("\n");
+            $write("X.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[2*256+i]); $write("\n");
+            $write("X.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[3*256+i]); $write("\n");
+            $write("X.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[4*256+i]); $write("\n");
+            $write("X.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[5*256+i]); $write("\n");
+            $write("X.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[6*256+i]); $write("\n");
+            $write("X.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_x.mem[7*256+i]); $write("\n");
             //
             // X.Y
             //
-            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
-            $write("X.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[0*256+i]);                $write("\n");
-            $write("X.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[1*256+i]);                $write("\n");
-            $write("X.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[2*256+i]);                $write("\n");
-            $write("X.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[3*256+i]);                $write("\n");
-            $write("X.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[4*256+i]);                $write("\n");
-            $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[5*256+i]);                $write("\n");
-            $write("X.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[6*256+i]);                $write("\n");
-            $write("X.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_bram_y.mem[7*256+i]);                $write("\n");
-            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
-            $write("X.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
-            $write("X.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
-            $write("X.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
-            $write("X.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
-            $write("X.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
-            $write("X.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
-            $write("X.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
-            $write("X.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                              $write("\n");
+            $write("X.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[0*256+i]);           $write("\n");
+            $write("X.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[1*256+i]);           $write("\n");
+            $write("X.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[2*256+i]);           $write("\n");
+            $write("X.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[3*256+i]);           $write("\n");
+            $write("X.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[4*256+i]);           $write("\n");
+            $write("X.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[5*256+i]);           $write("\n");
+            $write("X.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[6*256+i]);           $write("\n");
+            $write("X.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.narrow_y.mem[7*256+i]);           $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                 $write("\n");
+            $write("X.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[0*256+i]); $write("\n");
+            $write("X.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[1*256+i]); $write("\n");
+            $write("X.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[2*256+i]); $write("\n");
+            $write("X.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[3*256+i]); $write("\n");
+            $write("X.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[4*256+i]); $write("\n");
+            $write("X.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[5*256+i]); $write("\n");
+            $write("X.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[6*256+i]); $write("\n");
+            $write("X.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_x.gen_wide[0].wide_y.mem[7*256+i]); $write("\n");
             //
             // Y.X
             //
-            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
-            $write("Y.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[0*256+i]);                $write("\n");
-            $write("Y.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[1*256+i]);                $write("\n");
-            $write("Y.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[2*256+i]);                $write("\n");
-            $write("Y.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[3*256+i]);                $write("\n");
-            $write("Y.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[4*256+i]);                $write("\n");
-            $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[5*256+i]);                $write("\n");
-            $write("Y.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[6*256+i]);                $write("\n");
-            $write("Y.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_x.mem[7*256+i]);                $write("\n");
-            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
-            $write("Y.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[0*256+i]); $write("\n");
-            $write("Y.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[1*256+i]); $write("\n");
-            $write("Y.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[2*256+i]); $write("\n");
-            $write("Y.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[3*256+i]); $write("\n");
-            $write("Y.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[4*256+i]); $write("\n");
-            $write("Y.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[5*256+i]); $write("\n");
-            $write("Y.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[6*256+i]); $write("\n");
-            $write("Y.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_x.mem[7*256+i]); $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                              $write("\n");
+            $write("Y.X.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[0*256+i]);           $write("\n");
+            $write("Y.X.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[1*256+i]);           $write("\n");
+            $write("Y.X.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[2*256+i]);           $write("\n");
+            $write("Y.X.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[3*256+i]);           $write("\n");
+            $write("Y.X.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[4*256+i]);           $write("\n");
+            $write("Y.X.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[5*256+i]);           $write("\n");
+            $write("Y.X.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[6*256+i]);           $write("\n");
+            $write("Y.X.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_x.mem[7*256+i]);           $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                 $write("\n");
+            $write("Y.X.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[0*256+i]); $write("\n");
+            $write("Y.X.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[1*256+i]); $write("\n");
+            $write("Y.X.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[2*256+i]); $write("\n");
+            $write("Y.X.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[3*256+i]); $write("\n");
+            $write("Y.X.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[4*256+i]); $write("\n");
+            $write("Y.X.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[5*256+i]); $write("\n");
+            $write("Y.X.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[6*256+i]); $write("\n");
+            $write("Y.X.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_x.mem[7*256+i]); $write("\n");
             //
             // Y.Y
             //
-            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                                        $write("\n");
-            $write("Y.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[0*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[1*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[2*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[3*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[4*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[5*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[6*256+i]);                $write("\n");
-            $write("Y.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_bram_y.mem[7*256+i]);                $write("\n");
-            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                           $write("\n");
-            $write("Y.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[0*256+i]); $write("\n");
-            $write("Y.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[1*256+i]); $write("\n");
-            $write("Y.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[2*256+i]); $write("\n");
-            $write("Y.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[3*256+i]); $write("\n");
-            $write("Y.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[4*256+i]); $write("\n");
-            $write("Y.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[5*256+i]); $write("\n");
-            $write("Y.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[6*256+i]); $write("\n");
-            $write("Y.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide_bram[0].wide_bram_y.mem[7*256+i]); $write("\n");            //
+            $write("                  "); for (i=0; i<64; i=i+1) $write("[ %3d ] ", i);                                              $write("\n");
+            $write("Y.Y.NARROW.A:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[0*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.B:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[1*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.C:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[2*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.D:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[3*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.E:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[4*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.COEFF: "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[5*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.Q:     "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[6*256+i]);           $write("\n");
+            $write("Y.Y.NARROW.EXT:   "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.narrow_y.mem[7*256+i]);           $write("\n");
+            $write("                  "); for (i=0; i<64; i=i+1) $write(" ------ ");                                                 $write("\n");
+            $write("Y.Y.WIDE.A:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[0*256+i]); $write("\n");
+            $write("Y.Y.WIDE.B:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[1*256+i]); $write("\n");
+            $write("Y.Y.WIDE.C:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[2*256+i]); $write("\n");
+            $write("Y.Y.WIDE.D:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[3*256+i]); $write("\n");
+            $write("Y.Y.WIDE.E:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[4*256+i]); $write("\n");
+            $write("Y.Y.WIDE.N:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[5*256+i]); $write("\n");
+            $write("Y.Y.WIDE.L:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[6*256+i]); $write("\n");
+            $write("Y.Y.WIDE.H:       "); for (i=0; i<64; i=i+1) $write("0x%05x ", storage_block_y.gen_wide[0].wide_y.mem[7*256+i]); $write("\n");
+            //
         end
 
     //
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
index 8ab64f0..8c4e844 100644
--- a/rtl/modexpng_dsp_array_block.v
+++ b/rtl/modexpng_dsp_array_block.v
@@ -70,8 +70,8 @@ module modexpng_dsp_array_block
                 .opmode         ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
                 .alumode        ({DSP48E1_ALUMODE_W{1'b0}}),
                 
-                .casc_a_in      (WORD_EXT_NULL),
-                .casc_b_in      (WORD_NULL),
+                .casc_a_in      (WORD_EXT_ZERO),
+                .casc_b_in      (WORD_ZERO),
                 
                 .casc_a_out     (casc_a[z]),
                 .casc_b_out     (casc_b[z])
@@ -138,8 +138,8 @@ module modexpng_dsp_array_block
         .opmode         ({1'b0, mode_z[2*NUM_MULTS_HALF], 1'b0, 2'b01, 2'b01}),
         .alumode        ({DSP48E1_ALUMODE_W{1'b0}}),
         
-        .casc_a_in      (WORD_EXT_NULL),
-        .casc_b_in      (WORD_NULL),
+        .casc_a_in      (WORD_EXT_ZERO),
+        .casc_b_in      (WORD_ZERO),
         
         .casc_a_out     (),
         .casc_b_out     ()
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
new file mode 100644
index 0000000..c35f0b3
--- /dev/null
+++ b/rtl/modexpng_general_worker.v
@@ -0,0 +1,679 @@
+module modexpng_general_worker
+(
+    clk,
+    rst,
+    
+    ena,
+    rdy,
+    
+    sel_narrow_in,
+    sel_narrow_out,
+    sel_wide_in,
+    sel_wide_out,
+    
+    opcode,
+    
+    word_index_last,
+    
+    wrk_rd_wide_xy_ena_x,
+    wrk_rd_wide_xy_bank_x,
+    wrk_rd_wide_xy_addr_x,
+    wrk_rd_wide_x_din_x,
+    wrk_rd_wide_y_din_x,
+
+    wrk_rd_narrow_xy_ena_x,
+    wrk_rd_narrow_xy_bank_x,
+    wrk_rd_narrow_xy_addr_x,
+    wrk_rd_narrow_x_din_x,
+    wrk_rd_narrow_y_din_x,
+
+    wrk_rd_wide_xy_ena_y,
+    wrk_rd_wide_xy_bank_y,
+    wrk_rd_wide_xy_addr_y,
+    wrk_rd_wide_x_din_y,
+    wrk_rd_wide_y_din_y,
+
+    wrk_rd_narrow_xy_ena_y,
+    wrk_rd_narrow_xy_bank_y,
+    wrk_rd_narrow_xy_addr_y,
+    wrk_rd_narrow_x_din_y,
+    wrk_rd_narrow_y_din_y,
+    
+    wrk_wr_wide_xy_ena_x,
+    wrk_wr_wide_xy_bank_x,
+    wrk_wr_wide_xy_addr_x,
+    wrk_wr_wide_x_dout_x,
+    wrk_wr_wide_y_dout_x,
+
+    wrk_wr_narrow_xy_ena_x,
+    wrk_wr_narrow_xy_bank_x,
+    wrk_wr_narrow_xy_addr_x,
+    wrk_wr_narrow_x_dout_x,
+    wrk_wr_narrow_y_dout_x,
+
+    wrk_wr_wide_xy_ena_y,
+    wrk_wr_wide_xy_bank_y,
+    wrk_wr_wide_xy_addr_y,
+    wrk_wr_wide_x_dout_y,
+    wrk_wr_wide_y_dout_y,
+
+    wrk_wr_narrow_xy_ena_y,
+    wrk_wr_narrow_xy_bank_y,
+    wrk_wr_narrow_xy_addr_y,
+    wrk_wr_narrow_x_dout_y,
+    wrk_wr_narrow_y_dout_y
+);
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+    `include "modexpng_microcode.vh"
+
+    
+    //
+    // Ports
+    //
+    input                                    clk;
+    input                                    rst;
+
+    input                                    ena;
+    output                                   rdy;
+    
+    input  [              BANK_ADDR_W  -1:0] sel_narrow_in; 
+    input  [              BANK_ADDR_W  -1:0] sel_narrow_out; 
+    input  [              BANK_ADDR_W  -1:0] sel_wide_in; 
+    input  [              BANK_ADDR_W  -1:0] sel_wide_out; 
+    
+    input  [              UOP_OPCODE_W -1:0] opcode;
+    
+    input  [              OP_ADDR_W    -1:0] word_index_last;
+    
+    output                                   wrk_rd_wide_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_x;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_x;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_x;
+
+    output                                   wrk_rd_narrow_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_x;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_x;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_x;
+    
+    output                                   wrk_rd_wide_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] wrk_rd_wide_xy_addr_y;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_x_din_y;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_wide_y_din_y;
+
+    output                                   wrk_rd_narrow_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] wrk_rd_narrow_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] wrk_rd_narrow_xy_addr_y;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_x_din_y;
+    input  [              WORD_EXT_W   -1:0] wrk_rd_narrow_y_din_y;
+
+    output                                   wrk_wr_wide_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_x;
+    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_x;
+
+    output                                   wrk_wr_narrow_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_x;
+    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_x;
+    
+    output                                   wrk_wr_wide_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] wrk_wr_wide_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] wrk_wr_wide_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] wrk_wr_wide_x_dout_y;
+    output [              WORD_EXT_W   -1:0] wrk_wr_wide_y_dout_y;
+
+    output                                   wrk_wr_narrow_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] wrk_wr_narrow_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] wrk_wr_narrow_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_x_dout_y;
+    output [              WORD_EXT_W   -1:0] wrk_wr_narrow_y_dout_y;
+
+
+    //
+    // FSM Declaration
+    //
+    localparam [3:0] WRK_FSM_STATE_IDLE          = 4'h0;
+    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1  = 4'h1;
+    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2  = 4'h2;
+    localparam [3:0] WRK_FSM_STATE_BUSY          = 4'h3;
+    localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;    // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+    localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
+    localparam [3:0] WRK_FSM_STATE_STOP          = 4'h7;
+    
+    reg  [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+    reg  [3:0] wrk_fsm_state_next_one_pass;     // single address space sweep
+
+
+    //
+    // Control Signals
+    //
+    reg                    rd_wide_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] rd_wide_xy_addr_x; 
+
+    reg                    rd_narrow_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] rd_narrow_xy_addr_x; 
+
+    reg                    rd_wide_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] rd_wide_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] rd_wide_xy_addr_y; 
+
+    reg                    rd_narrow_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] rd_narrow_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] rd_narrow_xy_addr_y; 
+    
+    reg                    wr_wide_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_x;
+    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_x;
+    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_x;
+
+    reg                    wr_narrow_xy_ena_x = 1'b0;
+    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_x;
+    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_x;
+    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_x;
+    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_x;
+
+    reg                    wr_wide_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_y;
+    reg [ WORD_EXT_W -1:0] wr_wide_x_dout_y;
+    reg [ WORD_EXT_W -1:0] wr_wide_y_dout_y;
+
+    reg                    wr_narrow_xy_ena_y = 1'b0;
+    reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_y;
+    reg [  OP_ADDR_W -1:0] wr_narrow_xy_addr_y;
+    reg [ WORD_EXT_W -1:0] wr_narrow_x_dout_y;
+    reg [ WORD_EXT_W -1:0] wr_narrow_y_dout_y;
+
+
+    //
+    // Mapping
+    //
+    assign wrk_rd_wide_xy_ena_x     = rd_wide_xy_ena_x;
+    assign wrk_rd_wide_xy_bank_x    = rd_wide_xy_bank_x;
+    assign wrk_rd_wide_xy_addr_x    = rd_wide_xy_addr_x;
+
+    assign wrk_rd_narrow_xy_ena_x   = rd_narrow_xy_ena_x;
+    assign wrk_rd_narrow_xy_bank_x  = rd_narrow_xy_bank_x;
+    assign wrk_rd_narrow_xy_addr_x  = rd_narrow_xy_addr_x;
+    
+    assign wrk_rd_wide_xy_ena_y     = rd_wide_xy_ena_y;
+    assign wrk_rd_wide_xy_bank_y    = rd_wide_xy_bank_y;
+    assign wrk_rd_wide_xy_addr_y    = rd_wide_xy_addr_y;
+
+    assign wrk_rd_narrow_xy_ena_y   = rd_narrow_xy_ena_y;
+    assign wrk_rd_narrow_xy_bank_y  = rd_narrow_xy_bank_y;
+    assign wrk_rd_narrow_xy_addr_y  = rd_narrow_xy_addr_y;
+
+    assign wrk_wr_wide_xy_ena_x     = wr_wide_xy_ena_x;
+    assign wrk_wr_wide_xy_bank_x    = wr_wide_xy_bank_x;
+    assign wrk_wr_wide_xy_addr_x    = wr_wide_xy_addr_x;
+    assign wrk_wr_wide_x_dout_x     = wr_wide_x_dout_x;
+    assign wrk_wr_wide_y_dout_x     = wr_wide_y_dout_x;
+
+    assign wrk_wr_narrow_xy_ena_x   = wr_narrow_xy_ena_x;
+    assign wrk_wr_narrow_xy_bank_x  = wr_narrow_xy_bank_x;
+    assign wrk_wr_narrow_xy_addr_x  = wr_narrow_xy_addr_x;
+    assign wrk_wr_narrow_x_dout_x   = wr_narrow_x_dout_x;
+    assign wrk_wr_narrow_y_dout_x   = wr_narrow_y_dout_x;
+    
+    assign wrk_wr_wide_xy_ena_y     = wr_wide_xy_ena_y;
+    assign wrk_wr_wide_xy_bank_y    = wr_wide_xy_bank_y;
+    assign wrk_wr_wide_xy_addr_y    = wr_wide_xy_addr_y;
+    assign wrk_wr_wide_x_dout_y     = wr_wide_x_dout_y;
+    assign wrk_wr_wide_y_dout_y     = wr_wide_y_dout_y;
+
+    assign wrk_wr_narrow_xy_ena_y   = wr_narrow_xy_ena_y;
+    assign wrk_wr_narrow_xy_bank_y  = wr_narrow_xy_bank_y;
+    assign wrk_wr_narrow_xy_addr_y  = wr_narrow_xy_addr_y;
+    assign wrk_wr_narrow_x_dout_y   = wr_narrow_x_dout_y;
+    assign wrk_wr_narrow_y_dout_y   = wr_narrow_y_dout_y;
+   
+   
+    //
+    // Delays
+    //    
+    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+    
+    always @(posedge clk) begin
+        //
+        //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+        //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};        
+        //
+        {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
+        {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+        //
+    end
+
+
+    //
+    // Handy Wires
+    //
+    wire rd_narrow_xy_addr_x_next_is_last;
+    wire rd_narrow_xy_addr_y_next_is_last;
+        
+
+    //
+    // Read Enable Logic
+    //
+    
+    task _update_wide_xy_rd_en;   input _en; {rd_wide_xy_ena_x,   rd_wide_xy_ena_y  } <= {2{_en}}; endtask
+    task _update_narrow_xy_rd_en; input _en; {rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{_en}}; endtask
+    
+    task enable_wide_xy_rd_en;  _update_wide_xy_rd_en(1'b1); endtask
+    task disable_wide_xy_rd_en; _update_wide_xy_rd_en(1'b0); endtask
+    
+    task enable_narrow_xy_rd_en;  _update_narrow_xy_rd_en(1'b1); endtask
+    task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
+    
+    always @(posedge clk)
+        //
+        if (rst) begin
+            //
+            disable_wide_xy_rd_en;
+            disable_narrow_xy_rd_en;
+            /*
+            rd_wide_xy_ena_x    <= 1'b0;
+            rd_wide_xy_ena_y    <= 1'b0;
+            rd_narrow_xy_ena_x  <= 1'b0;
+            rd_narrow_xy_ena_y  <= 1'b0;
+            */
+        end else begin
+            //
+            disable_wide_xy_rd_en;
+            disable_narrow_xy_rd_en;
+            //
+            //rd_wide_xy_ena_x    <= 1'b0;
+            //rd_wide_xy_ena_y    <= 1'b0;
+            //rd_narrow_xy_ena_x  <= 1'b0;
+            //rd_narrow_xy_ena_y  <= 1'b0;
+            //
+            case (opcode)
+                //
+                UOP_OPCODE_PROPAGATE_CARRIES,
+                UOP_OPCODE_OUTPUT_FROM_NARROW:
+                    //
+                    case (wrk_fsm_state_next_one_pass)
+                        //
+                        WRK_FSM_STATE_LATENCY_PRE1,
+                        WRK_FSM_STATE_LATENCY_PRE2,
+                        WRK_FSM_STATE_BUSY:
+                            //
+                            enable_narrow_xy_rd_en;
+                            //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}};
+                            //
+                        //
+                    endcase
+                    //
+                //
+                UOP_OPCODE_COPY_CRT_Y2X:
+                    //
+                    case (wrk_fsm_state_next_one_pass)
+                        //
+                        WRK_FSM_STATE_LATENCY_PRE1,
+                        WRK_FSM_STATE_LATENCY_PRE2,
+                        WRK_FSM_STATE_BUSY: begin
+                            //
+                            enable_narrow_xy_rd_en;
+                            enable_wide_xy_rd_en;
+                            //
+                        end
+                        //
+                    endcase
+                //
+            endcase
+            //
+        end
+
+
+    //
+    // Write Enable Logic
+    //
+    
+    task _update_wide_xy_wr_en;   input _en; {wr_wide_xy_ena_x,   wr_wide_xy_ena_y  } <= {2{_en}}; endtask
+    task _update_narrow_xy_wr_en; input _en; {wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{_en}}; endtask
+    
+    task enable_wide_xy_wr_en;  _update_wide_xy_wr_en(1'b1); endtask
+    task disable_wide_xy_wr_en; _update_wide_xy_wr_en(1'b0); endtask
+    
+    task enable_narrow_xy_wr_en;  _update_narrow_xy_wr_en(1'b1); endtask
+    task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
+    
+    always @(posedge clk)
+        //
+        if (rst) begin
+            //
+            disable_wide_xy_wr_en;
+            disable_narrow_xy_wr_en;
+            //wr_wide_xy_ena_x    <= 1'b0;
+            //wr_wide_xy_ena_y    <= 1'b0;
+            //wr_narrow_xy_ena_x  <= 1'b0;
+            //wr_narrow_xy_ena_y  <= 1'b0;
+            //
+        end else begin
+            //
+            disable_wide_xy_wr_en;
+            disable_narrow_xy_wr_en;
+            //
+            //wr_wide_xy_ena_x    <= 1'b0;
+            //wr_wide_xy_ena_y    <= 1'b0;
+            //wr_narrow_xy_ena_x  <= 1'b0;
+            //wr_narrow_xy_ena_y  <= 1'b0;
+            //
+            case (opcode)
+                //
+                UOP_OPCODE_PROPAGATE_CARRIES:
+                    //
+                    case (wrk_fsm_state)
+                        //
+                        WRK_FSM_STATE_BUSY,
+                        WRK_FSM_STATE_LATENCY_POST1,
+                        WRK_FSM_STATE_LATENCY_POST2:
+                            //
+                            enable_narrow_xy_wr_en;                            
+                            //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+                            //
+                        //
+                    endcase
+                    //
+                UOP_OPCODE_COPY_CRT_Y2X:
+                    //
+                    case (wrk_fsm_state)
+                        //
+                        WRK_FSM_STATE_BUSY,
+                        WRK_FSM_STATE_LATENCY_POST1,
+                        WRK_FSM_STATE_LATENCY_POST2: begin
+                            //
+                            enable_wide_xy_wr_en;
+                            enable_narrow_xy_wr_en;
+                            //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+                            //
+                        end
+                        //
+                    endcase
+                //
+            endcase
+            //
+        end
+
+
+    //
+    // Data Logic
+    //
+    reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
+    reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
+    reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
+    reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
+    
+    wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};  
+    wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
+    wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
+    wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
+    
+    wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
+    wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
+    wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
+    wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
+    
+    always @(posedge clk) begin
+        //
+        wr_wide_x_dout_x    <= WORD_EXT_DNC;
+        wr_wide_y_dout_x    <= WORD_EXT_DNC;
+        wr_wide_x_dout_y    <= WORD_EXT_DNC;
+        wr_wide_y_dout_y    <= WORD_EXT_DNC;
+        wr_narrow_x_dout_x  <= WORD_EXT_DNC;
+        wr_narrow_y_dout_x  <= WORD_EXT_DNC;
+        wr_narrow_x_dout_y  <= WORD_EXT_DNC;
+        wr_narrow_y_dout_y  <= WORD_EXT_DNC;
+        //
+        case (opcode)
+            //
+            UOP_OPCODE_PROPAGATE_CARRIES:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE2: begin
+                        rd_narrow_x_din_x_cry_r <= CARRY_ZERO;
+                        rd_narrow_y_din_x_cry_r <= CARRY_ZERO;
+                        rd_narrow_x_din_y_cry_r <= CARRY_ZERO;
+                        rd_narrow_y_din_y_cry_r <= CARRY_ZERO;
+                    end
+                    //
+                    WRK_FSM_STATE_BUSY,
+                    WRK_FSM_STATE_LATENCY_POST1,
+                    WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word
+                        //
+                        rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+                        rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+                        rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+                        rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+                        //
+                        wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced;
+                        wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced;
+                        wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced;
+                        wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced;                       
+                        //
+                    end
+                    //
+                endcase
+                //
+            UOP_OPCODE_COPY_CRT_Y2X:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY,
+                    WRK_FSM_STATE_LATENCY_POST1,
+                    WRK_FSM_STATE_LATENCY_POST2: begin
+                        //
+                        wr_wide_x_dout_x   <= wrk_rd_wide_x_din_y;
+                        wr_wide_y_dout_x   <= wrk_rd_wide_y_din_y;
+                        wr_wide_x_dout_y   <= wrk_rd_wide_x_din_y;
+                        wr_wide_y_dout_y   <= wrk_rd_wide_y_din_y;
+                        //
+                        wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y;
+                        wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y;
+                        wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y;
+                        wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y;                       
+                        //
+                    end
+                    //
+                endcase
+            //
+        endcase
+        //
+    end
+
+
+    //
+    // Write Address Logic
+    //
+    always @(posedge clk) begin
+        //
+        {wr_wide_xy_bank_x,   wr_wide_xy_addr_x }  <= {BANK_DNC, OP_ADDR_DNC};
+        {wr_wide_xy_bank_y,   wr_wide_xy_addr_y }  <= {BANK_DNC, OP_ADDR_DNC};
+        {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+        {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+        //
+        case (opcode)
+            //
+            UOP_OPCODE_PROPAGATE_CARRIES,
+            UOP_OPCODE_COPY_CRT_Y2X:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY,
+                    WRK_FSM_STATE_LATENCY_POST1,
+                    WRK_FSM_STATE_LATENCY_POST2: begin
+                        //
+                        {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2};
+                        {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2};                        
+                        //
+                        {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2};
+                        {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2};
+                        //
+                    end
+                    //
+                endcase
+                //
+            //
+        endcase
+        //
+    end
+ 
+    
+    //
+    // Read Address Logic
+    //
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
+
+    assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+    assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+    
+    always @(posedge clk) begin
+        //
+        {rd_wide_xy_bank_x,   rd_wide_xy_addr_x  } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
+        {rd_wide_xy_bank_y,   rd_wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
+        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
+        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+        //
+        case (opcode)
+            //
+            UOP_OPCODE_PROPAGATE_CARRIES,
+            UOP_OPCODE_OUTPUT_FROM_NARROW,
+            UOP_OPCODE_COPY_CRT_Y2X:
+                //
+                case (wrk_fsm_state_next_one_pass)
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE1: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+                        //
+                        rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
+                    end
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE2,
+                    WRK_FSM_STATE_BUSY: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next};                        
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+                        //
+                        rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+                        rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+                        //
+                    end
+                    //
+                endcase
+                //
+            //
+        endcase
+        //
+    end
+    
+
+    //
+    // FSM Process
+    //
+    always @(posedge clk)
+        //
+        if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+        else case (opcode)
+            UOP_OPCODE_PROPAGATE_CARRIES,
+            UOP_OPCODE_OUTPUT_FROM_NARROW,
+            UOP_OPCODE_COPY_CRT_Y2X:        wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+            default:                        wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+        endcase
+    
+  
+    //
+    // Busy Exit Logic
+    //
+    reg wrk_fsm_done_one_pass = 1'b0; 
+    
+    always @(posedge clk) begin
+        //
+        wrk_fsm_done_one_pass <= 1'b0;
+        //
+        case (opcode)
+            //
+            UOP_OPCODE_PROPAGATE_CARRIES,
+            UOP_OPCODE_OUTPUT_FROM_NARROW,
+            UOP_OPCODE_COPY_CRT_Y2X: begin
+                //
+                if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
+                    //
+                    if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary...
+                    if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
+                    //
+                end
+                //
+            end
+            //
+        endcase
+        //
+    end
+    
+        
+    //
+    // FSM Transition Logic
+    //
+    always @* begin
+        //
+        case (wrk_fsm_state)
+            WRK_FSM_STATE_IDLE:          wrk_fsm_state_next_one_pass = ena                   ? WRK_FSM_STATE_LATENCY_PRE1  : WRK_FSM_STATE_IDLE ;
+            WRK_FSM_STATE_LATENCY_PRE1:  wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_LATENCY_PRE2  ;
+            WRK_FSM_STATE_LATENCY_PRE2:  wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_BUSY          ;
+            WRK_FSM_STATE_BUSY:          wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
+            WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_LATENCY_POST2 ;
+            WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_STOP          ;
+            WRK_FSM_STATE_STOP:          wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_IDLE          ; 
+        endcase
+        //
+    end
+    
+
+    //
+    // Ready Logic
+    //
+    reg rdy_reg = 1'b1;
+    
+    assign rdy = rdy_reg;
+    
+    always @(posedge clk)
+        //
+        if (rst)                  rdy_reg <= 1'b1;
+        else case (wrk_fsm_state)
+            WRK_FSM_STATE_IDLE:   rdy_reg <= ~ena;
+            WRK_FSM_STATE_STOP:   rdy_reg <= 1'b1;
+        endcase
+
+
+endmodule
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
index 68d13c4..d7dd72e 100644
--- a/rtl/modexpng_io_block.v
+++ b/rtl/modexpng_io_block.v
@@ -84,13 +84,17 @@ module modexpng_io_block
     wire                                    bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
     wire                                    bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
 
+    wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00);
+    wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01);
+    wire bus_cs_output  = bus_cs && (bus_addr_msb == 2'b10);
+
     /* INPUT_1 */
     modexpng_tdp_36k_x16_x32_wrapper bram_input_1
     (
         .clk        (clk),                  // core clock
         .clk_bus    (clk_bus),              // bus clock
     
-        .ena        (bus_cs),               // bus side read-write
+        .ena        (bus_cs_input_1),       // bus side read-write
         .wea        (bus_data_wr_input_1),  //
         .addra      (bus_addr_lsb),         //
         .dina       (bus_data_wr),          //
@@ -109,7 +113,7 @@ module modexpng_io_block
         .clk        (clk),                  // core clock
         .clk_bus    (clk_bus),              // bus clock
     
-        .ena        (bus_cs),               // bus side write-only
+        .ena        (bus_cs_input_2),       // bus side write-only
         .wea        (bus_data_wr_input_2),  //
         .addra      (bus_addr_lsb),         //
         .dina       (bus_data_wr),          //
@@ -132,7 +136,7 @@ module modexpng_io_block
         .addra      (out_addr),             //
         .dina       (out_din),              //
     
-        .enb        (bus_cs),               // bus side read-only
+        .enb        (bus_cs_output),        // bus side read-only
         .addrb      (bus_addr_lsb),         //
         .doutb      (bus_data_rd_output)    //
     );
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index 81f582f..dfbd676 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -15,42 +15,45 @@ module modexpng_io_manager
     
     word_index_last,
     
-    ext_wide_xy_ena_x,
-    ext_wide_xy_bank_x,
-    ext_wide_xy_addr_x,
-    ext_wide_x_din_x,
-    ext_wide_y_din_x,
-
-    ext_narrow_xy_ena_x,
-    ext_narrow_xy_bank_x,
-    ext_narrow_xy_addr_x,
-    ext_narrow_x_din_x,
-    ext_narrow_y_din_x,
-
-    ext_wide_xy_ena_y,
-    ext_wide_xy_bank_y,
-    ext_wide_xy_addr_y,
-    ext_wide_x_din_y,
-    ext_wide_y_din_y,
-
-    ext_narrow_xy_ena_y,
-    ext_narrow_xy_bank_y,
-    ext_narrow_xy_addr_y,
-    ext_narrow_x_din_y,
-    ext_narrow_y_din_y,
+    io_wide_xy_ena_x,
+    io_wide_xy_bank_x,
+    io_wide_xy_addr_x,
+    io_wide_x_din_x,
+    io_wide_y_din_x,
+
+    io_narrow_xy_ena_x,
+    io_narrow_xy_bank_x,
+    io_narrow_xy_addr_x,
+    io_narrow_x_din_x,
+    io_narrow_y_din_x,
+
+    io_wide_xy_ena_y,
+    io_wide_xy_bank_y,
+    io_wide_xy_addr_y,
+    io_wide_x_din_y,
+    io_wide_y_din_y,
+
+    io_narrow_xy_ena_y,
+    io_narrow_xy_bank_y,
+    io_narrow_xy_addr_y,
+    io_narrow_x_din_y,
+    io_narrow_y_din_y,
     
     io_in_1_en,
     io_in_1_addr,
-    io_in_1_dout,
+    io_in_1_din,
     
     io_in_2_en,
     io_in_2_addr,
-    io_in_2_dout,
+    io_in_2_din,
     
     io_out_en,
     io_out_we,
     io_out_addr,
-    io_out_din
+    io_out_dout,
+    
+    wrk_narrow_x_din_x_trunc,
+    wrk_narrow_x_din_y_trunc
 );
 
     //
@@ -78,42 +81,45 @@ module modexpng_io_manager
     
     input  [              OP_ADDR_W    -1:0] word_index_last;
     
-    output                                   ext_wide_xy_ena_x;
-    output [              BANK_ADDR_W  -1:0] ext_wide_xy_bank_x;
-    output [              OP_ADDR_W    -1:0] ext_wide_xy_addr_x;
-    output [              WORD_EXT_W   -1:0] ext_wide_x_din_x;
-    output [              WORD_EXT_W   -1:0] ext_wide_y_din_x;
-
-    output                                   ext_narrow_xy_ena_x;
-    output [              BANK_ADDR_W  -1:0] ext_narrow_xy_bank_x;
-    output [              OP_ADDR_W    -1:0] ext_narrow_xy_addr_x;
-    output [              WORD_EXT_W   -1:0] ext_narrow_x_din_x;
-    output [              WORD_EXT_W   -1:0] ext_narrow_y_din_x;
-    
-    output                                   ext_wide_xy_ena_y;
-    output [              BANK_ADDR_W  -1:0] ext_wide_xy_bank_y;
-    output [              OP_ADDR_W    -1:0] ext_wide_xy_addr_y;
-    output [              WORD_EXT_W   -1:0] ext_wide_x_din_y;
-    output [              WORD_EXT_W   -1:0] ext_wide_y_din_y;
-
-    output                                   ext_narrow_xy_ena_y;
-    output [              BANK_ADDR_W  -1:0] ext_narrow_xy_bank_y;
-    output [              OP_ADDR_W    -1:0] ext_narrow_xy_addr_y;
-    output [              WORD_EXT_W   -1:0] ext_narrow_x_din_y;
-    output [              WORD_EXT_W   -1:0] ext_narrow_y_din_y;
+    output                                   io_wide_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] io_wide_x_din_x;
+    output [              WORD_EXT_W   -1:0] io_wide_y_din_x;
+
+    output                                   io_narrow_xy_ena_x;
+    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_x;
+    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_x;
+    output [              WORD_EXT_W   -1:0] io_narrow_x_din_x;
+    output [              WORD_EXT_W   -1:0] io_narrow_y_din_x;
+    
+    output                                   io_wide_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] io_wide_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] io_wide_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] io_wide_x_din_y;
+    output [              WORD_EXT_W   -1:0] io_wide_y_din_y;
+
+    output                                   io_narrow_xy_ena_y;
+    output [              BANK_ADDR_W  -1:0] io_narrow_xy_bank_y;
+    output [              OP_ADDR_W    -1:0] io_narrow_xy_addr_y;
+    output [              WORD_EXT_W   -1:0] io_narrow_x_din_y;
+    output [              WORD_EXT_W   -1:0] io_narrow_y_din_y;
 
     output                                   io_in_1_en;
     output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_1_addr;
-    input  [              WORD_W       -1:0] io_in_1_dout;
+    input  [              WORD_W       -1:0] io_in_1_din;
     
     output                                   io_in_2_en;
     output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_in_2_addr;
-    input  [              WORD_W       -1:0] io_in_2_dout;
+    input  [              WORD_W       -1:0] io_in_2_din;
     
     output                                   io_out_en;
     output                                   io_out_we;
     output [BANK_ADDR_W + OP_ADDR_W    -1:0] io_out_addr;
-    output [              WORD_W       -1:0] io_out_din;
+    output [              WORD_W       -1:0] io_out_dout;
+    
+    output [              WORD_W       -1:0] wrk_narrow_x_din_x_trunc;
+    output [              WORD_W       -1:0] wrk_narrow_x_din_y_trunc;
 
 
     //
@@ -126,6 +132,7 @@ module modexpng_io_manager
     localparam [2:0] IO_FSM_STATE_EXTRA         = 3'b100;
     localparam [2:0] IO_FSM_STATE_LATENCY_POST1 = 3'b101;
     localparam [2:0] IO_FSM_STATE_LATENCY_POST2 = 3'b110;
+    localparam [2:0] IO_FSM_STATE_STOP          = 3'b111;
     
     reg [2:0] io_fsm_state = IO_FSM_STATE_IDLE;
     reg [2:0] io_fsm_state_next;
@@ -143,10 +150,11 @@ module modexpng_io_manager
     reg [  OP_ADDR_W -1:0] in_2_addr_op;
     
     reg                    out_en = 1'b0;
-    reg                    out_we;
     reg [BANK_ADDR_W -1:0] out_addr_bank;
     reg [  OP_ADDR_W -1:0] out_addr_op;
-    reg [     WORD_W -1:0] out_din;
+    reg [     WORD_W -1:0] out_dout;
+    
+    reg [  OP_ADDR_W -1:0] dummy_addr_op;
     
     
     //
@@ -187,51 +195,54 @@ module modexpng_io_manager
     assign io_in_2_addr = {in_2_addr_bank, in_2_addr_op};
     
     assign io_out_en    = out_en;
-    assign io_out_we    = out_we;
+    assign io_out_we    = io_out_en; // we can only write!
     assign io_out_addr  = {out_addr_bank, out_addr_op};
-    assign io_out_din   = out_din;
+    assign io_out_dout  = out_dout;
 
 
     //
     // Mapping
     //
-    assign ext_wide_xy_ena_x    = wide_xy_ena_x;
-    assign ext_wide_xy_bank_x   = wide_xy_bank_x;
-    assign ext_wide_xy_addr_x   = wide_xy_addr_x;
-    assign ext_wide_x_din_x     = wide_x_din_x;
-    assign ext_wide_y_din_x     = wide_y_din_x;
+    assign io_wide_xy_ena_x    = wide_xy_ena_x;
+    assign io_wide_xy_bank_x   = wide_xy_bank_x;
+    assign io_wide_xy_addr_x   = wide_xy_addr_x;
+    assign io_wide_x_din_x     = wide_x_din_x;
+    assign io_wide_y_din_x     = wide_y_din_x;
 
-    assign ext_narrow_xy_ena_x  = narrow_xy_ena_x; 
-    assign ext_narrow_xy_bank_x = narrow_xy_bank_x;
-    assign ext_narrow_xy_addr_x = narrow_xy_addr_x;
-    assign ext_narrow_x_din_x   = narrow_x_din_x;
-    assign ext_narrow_y_din_x   = narrow_y_din_x;
+    assign io_narrow_xy_ena_x  = narrow_xy_ena_x; 
+    assign io_narrow_xy_bank_x = narrow_xy_bank_x;
+    assign io_narrow_xy_addr_x = narrow_xy_addr_x;
+    assign io_narrow_x_din_x   = narrow_x_din_x;
+    assign io_narrow_y_din_x   = narrow_y_din_x;
 
-    assign ext_wide_xy_ena_y    = wide_xy_ena_y;
-    assign ext_wide_xy_bank_y   = wide_xy_bank_y;
-    assign ext_wide_xy_addr_y   = wide_xy_addr_y;
-    assign ext_wide_x_din_y     = wide_x_din_y;
-    assign ext_wide_y_din_y     = wide_y_din_y;
+    assign io_wide_xy_ena_y    = wide_xy_ena_y;
+    assign io_wide_xy_bank_y   = wide_xy_bank_y;
+    assign io_wide_xy_addr_y   = wide_xy_addr_y;
+    assign io_wide_x_din_y     = wide_x_din_y;
+    assign io_wide_y_din_y     = wide_y_din_y;
 
-    assign ext_narrow_xy_ena_y  = narrow_xy_ena_y;
-    assign ext_narrow_xy_bank_y = narrow_xy_bank_y;
-    assign ext_narrow_xy_addr_y = narrow_xy_addr_y;
-    assign ext_narrow_x_din_y   = narrow_x_din_y;
-    assign ext_narrow_y_din_y   = narrow_y_din_y;
+    assign io_narrow_xy_ena_y  = narrow_xy_ena_y;
+    assign io_narrow_xy_bank_y = narrow_xy_bank_y;
+    assign io_narrow_xy_addr_y = narrow_xy_addr_y;
+    assign io_narrow_x_din_y   = narrow_x_din_y;
+    assign io_narrow_y_din_y   = narrow_y_din_y;
 
 
     //
     // Delays
     //    
-    reg [  OP_ADDR_W -1:0] in_1_addr_op_dly1;
-    reg [  OP_ADDR_W -1:0] in_1_addr_op_dly2;
-    reg [  OP_ADDR_W -1:0] in_2_addr_op_dly1;
-    reg [  OP_ADDR_W -1:0] in_2_addr_op_dly2;
+    reg [OP_ADDR_W -1:0] in_1_addr_op_dly1;
+    reg [OP_ADDR_W -1:0] in_1_addr_op_dly2;
+    reg [OP_ADDR_W -1:0] in_2_addr_op_dly1;
+    reg [OP_ADDR_W -1:0] in_2_addr_op_dly2;
+    reg [OP_ADDR_W -1:0] dummy_addr_op_dly1;
+    reg [OP_ADDR_W -1:0] dummy_addr_op_dly2;
     
     always @(posedge clk) begin
         //
-        {in_1_addr_op_dly2, in_1_addr_op_dly1} <= {in_1_addr_op_dly1, in_1_addr_op};
-        {in_2_addr_op_dly2, in_2_addr_op_dly1} <= {in_2_addr_op_dly1, in_2_addr_op};
+        {in_1_addr_op_dly2,  in_1_addr_op_dly1}  <= {in_1_addr_op_dly1,  in_1_addr_op};
+        {in_2_addr_op_dly2,  in_2_addr_op_dly1}  <= {in_2_addr_op_dly1,  in_2_addr_op};
+        {dummy_addr_op_dly2, dummy_addr_op_dly1} <= {dummy_addr_op_dly1, dummy_addr_op};
         //
     end
 
@@ -241,10 +252,14 @@ module modexpng_io_manager
     //
     wire opcode_is_input = (opcode == UOP_OPCODE_INPUT_TO_WIDE) || (opcode == UOP_OPCODE_INPUT_TO_NARROW);
 
-    wire opcode_is_wide   = (opcode == UOP_OPCODE_INPUT_TO_WIDE  );
-    wire opcode_is_narrow = (opcode == UOP_OPCODE_INPUT_TO_NARROW); 
+    wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
+
+    wire opcode_is_input_wide   = opcode == UOP_OPCODE_INPUT_TO_WIDE;
+    wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; 
 
-    wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF);
+    wire sel_in_needs_extra = (sel_in == BANK_IN_1_N_COEFF) ||
+                              (sel_in == BANK_IN_2_P_COEFF) ||
+                              (sel_in == BANK_IN_2_Q_COEFF) ;
 
     wire sel_crt_is_x = sel_crt == UOP_CRT_X;
     wire sel_crt_is_y = sel_crt == UOP_CRT_Y;
@@ -252,22 +267,18 @@ module modexpng_io_manager
     wire sel_aux_is_1 = sel_aux == UOP_AUX_1;
     wire sel_aux_is_2 = sel_aux == UOP_AUX_2;
 
-    wire in_1_addr_op_is_last;
-    wire in_2_addr_op_is_last; 
-
-    wire in_1_addr_next_op_is_last;
-    wire in_2_addr_next_op_is_last; 
-
+    wire in_1_addr_op_next_is_last;
+    wire in_2_addr_op_next_is_last;
+    wire dummy_addr_op_next_is_last;
 
     //
-    // Enable Logic
+    // Source Enable Logic
     //
     always @(posedge clk)
         //
         if (rst) begin
             in_1_en <= 1'b0;
             in_2_en <= 1'b0;
-            out_en  <= 1'b0;
         end else case (io_fsm_state_next)
             //
             IO_FSM_STATE_LATENCY_PRE1,
@@ -279,7 +290,7 @@ module modexpng_io_manager
             //
             IO_FSM_STATE_EXTRA: begin
                 in_1_en <= opcode_is_input && sel_aux_is_1 && sel_in_needs_extra;
-                in_2_en <= 1'b0;
+                in_2_en <= opcode_is_input && sel_aux_is_2 && sel_in_needs_extra;
             end
             //
             default: begin
@@ -290,7 +301,7 @@ module modexpng_io_manager
         endcase
 
     //
-    // Enable Logic
+    // Destination Enable Logic
     //    
     always @(posedge clk)
         //
@@ -301,38 +312,52 @@ module modexpng_io_manager
             narrow_xy_ena_x <= 1'b0;
             narrow_xy_ena_y <= 1'b0;
             //
+            out_en <= 1'b0;
+            //
         end else case (io_fsm_state)
             //
             IO_FSM_STATE_BUSY,
             IO_FSM_STATE_EXTRA,
             IO_FSM_STATE_LATENCY_POST1: begin
-                wide_xy_ena_x   <= opcode_is_wide   && sel_crt_is_x;
-                wide_xy_ena_y   <= opcode_is_wide   && sel_crt_is_y;
-                narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x;
-                narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y;
+                //
+                wide_xy_ena_x   <= opcode_is_input_wide   && sel_crt_is_x;
+                wide_xy_ena_y   <= opcode_is_input_wide   && sel_crt_is_y;
+                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x;
+                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y;
+                //
+                out_en <= opcode_is_output;
+                //
             end
             //
             IO_FSM_STATE_LATENCY_POST2: begin
+                //
                 wide_xy_ena_x   <= 1'b0;
                 wide_xy_ena_y   <= 1'b0;
-                narrow_xy_ena_x <= opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra;
-                narrow_xy_ena_y <= opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra;
+                narrow_xy_ena_x <= opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra;
+                narrow_xy_ena_y <= opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra;
+                //
+                out_en <= opcode_is_output;
+                //
             end
             //
             default: begin
+                //
                 wide_xy_ena_x   <= 1'b0;
                 wide_xy_ena_y   <= 1'b0;
                 narrow_xy_ena_x <= 1'b0;
                 narrow_xy_ena_y <= 1'b0;
+                //
+                out_en <= 1'b0;
+                //
             end
             //
         endcase
 
 
     //
-    // Data Logic
+    // Output Data Logic
     //
-    wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_dout : io_in_2_dout};
+    wire [WORD_EXT_W -1:0] io_in_dout_mux = {{(WORD_EXT_W-WORD_W){1'b0}}, sel_aux_is_1 ? io_in_1_din : io_in_2_din};
 
     always @(posedge clk) begin
         //
@@ -345,20 +370,30 @@ module modexpng_io_manager
         narrow_x_din_y <= WORD_EXT_DNC;
         narrow_y_din_y <= WORD_EXT_DNC;
         //
+        out_dout       <= WORD_DNC;
+        //
         case (io_fsm_state)
             //
             IO_FSM_STATE_BUSY,
             IO_FSM_STATE_EXTRA,
             IO_FSM_STATE_LATENCY_POST1: begin
-                if (opcode_is_wide   && sel_crt_is_x) {wide_x_din_x,   wide_y_din_x}   <= {2{io_in_dout_mux}};
-                if (opcode_is_wide   && sel_crt_is_y) {wide_x_din_y,   wide_y_din_y}   <= {2{io_in_dout_mux}};
-                if (opcode_is_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
-                if (opcode_is_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+                //
+                if (opcode_is_input_wide   && sel_crt_is_x) {wide_x_din_x,   wide_y_din_x}   <= {2{io_in_dout_mux}};    // TODO: Make external ports smaller (WORD_W, not WORD_EXT_W)??
+                if (opcode_is_input_wide   && sel_crt_is_y) {wide_x_din_y,   wide_y_din_y}   <= {2{io_in_dout_mux}};
+                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+                //
+                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc;
+                //
             end
             //
             IO_FSM_STATE_LATENCY_POST2: begin
-                if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
-                if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+            //
+                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_x_din_x, narrow_y_din_x} <= {2{io_in_dout_mux}};
+                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_x_din_y, narrow_y_din_y} <= {2{io_in_dout_mux}};
+                //
+                if (opcode_is_output) out_dout <= sel_crt_is_x ? wrk_narrow_x_din_x_trunc : wrk_narrow_x_din_y_trunc;
+                //
             end            
             //
         endcase
@@ -367,7 +402,7 @@ module modexpng_io_manager
 
 
     //
-    // Address Logic
+    // Destination Address Logic
     //
     wire [OP_ADDR_W -1:0] in_addr_op_dly2_mux =
         sel_aux_is_1 ? in_1_addr_op_dly2 : in_2_addr_op_dly2;
@@ -378,21 +413,24 @@ module modexpng_io_manager
         {wide_xy_bank_y,   wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
         {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
         {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+        {out_addr_bank,    out_addr_op     } <= {BANK_DNC, OP_ADDR_DNC};
         //
         case (io_fsm_state)
             //
             IO_FSM_STATE_BUSY,
             IO_FSM_STATE_EXTRA,
             IO_FSM_STATE_LATENCY_POST1: begin
-                if (opcode_is_wide   && sel_crt_is_x) {wide_xy_bank_x,   wide_xy_addr_x  } <= {sel_out, in_addr_op_dly2_mux};
-                if (opcode_is_wide   && sel_crt_is_y) {wide_xy_bank_y,   wide_xy_addr_y  } <= {sel_out, in_addr_op_dly2_mux};
-                if (opcode_is_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
-                if (opcode_is_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_input_wide   && sel_crt_is_x) {wide_xy_bank_x,   wide_xy_addr_x  } <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_input_wide   && sel_crt_is_y) {wide_xy_bank_y,   wide_xy_addr_y  } <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_input_narrow && sel_crt_is_x) {narrow_xy_bank_x, narrow_xy_addr_x} <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_input_narrow && sel_crt_is_y) {narrow_xy_bank_y, narrow_xy_addr_y} <= {sel_out, in_addr_op_dly2_mux};
+                if (opcode_is_output                      ) {out_addr_bank,    out_addr_op}      <= {sel_out, dummy_addr_op_dly2};
             end
             //
             IO_FSM_STATE_LATENCY_POST2: begin
-                if (opcode_is_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
-                if (opcode_is_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF};
+                if (opcode_is_input_narrow && sel_crt_is_x && sel_in_needs_extra) {narrow_xy_bank_x, narrow_xy_addr_x} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
+                if (opcode_is_input_narrow && sel_crt_is_y && sel_in_needs_extra) {narrow_xy_bank_y, narrow_xy_addr_y} <= {BANK_NARROW_EXT, OP_ADDR_EXT_COEFF };
+                if (opcode_is_output                                            ) {out_addr_bank,    out_addr_op     } <= {sel_out,         dummy_addr_op_dly2};
             end            
             //
         endcase
@@ -401,21 +439,19 @@ module modexpng_io_manager
         
     
     //
-    // Address Logic
+    // Source Address Logic
     //
     reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr_next; 
     reg [BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr_next;
-    reg [BANK_ADDR_W + OP_ADDR_W -1:0] out_addr_next;
+    reg [              OP_ADDR_W -1:0] dummy_addr_next;
 
-    wire [OP_ADDR_W -1:0] in_1_addr_next_op = in_1_addr_next[OP_ADDR_W -1:0];
-    wire [OP_ADDR_W -1:0] in_2_addr_next_op = in_2_addr_next[OP_ADDR_W -1:0];
-    wire [OP_ADDR_W -1:0] out_addr_next_op  = out_addr_next [OP_ADDR_W -1:0]; 
+    wire [OP_ADDR_W -1:0] in_1_addr_op_next  = in_1_addr_next[OP_ADDR_W -1:0];
+    wire [OP_ADDR_W -1:0] in_2_addr_op_next  = in_2_addr_next[OP_ADDR_W -1:0];
+    wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;  
     
-    assign in_1_addr_op_is_last = in_1_addr_op == word_index_last;
-    assign in_2_addr_op_is_last = in_2_addr_op == word_index_last;  
-
-    assign in_1_addr_next_op_is_last = in_1_addr_next_op == word_index_last;
-    assign in_2_addr_next_op_is_last = in_2_addr_next_op == word_index_last;  
+    assign in_1_addr_op_next_is_last  = in_1_addr_op_next  == word_index_last;
+    assign in_2_addr_op_next_is_last  = in_2_addr_op_next  == word_index_last;  
+    assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; 
     
     always @(posedge clk)
         //
@@ -423,36 +459,42 @@ module modexpng_io_manager
             //
             IO_FSM_STATE_LATENCY_PRE1: begin
                 //
-                {in_1_addr_bank, in_1_addr_op} <= {sel_in,  OP_ADDR_ZERO};
-                {in_2_addr_bank, in_2_addr_op} <= {sel_in,  OP_ADDR_ZERO};
-                {out_addr_bank,  out_addr_op } <= {sel_out, OP_ADDR_ZERO};
+                {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
+                {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
+                {                dummy_addr_op} <= {        OP_ADDR_ZERO};
                 //
-                in_1_addr_next <= {sel_in,  OP_ADDR_ONE};
-                in_2_addr_next <= {sel_in,  OP_ADDR_ONE};
-                out_addr_next  <= {sel_out, OP_ADDR_ONE};
+                in_1_addr_next  <= {sel_in,  OP_ADDR_ONE};
+                in_2_addr_next  <= {sel_in,  OP_ADDR_ONE};
+                dummy_addr_next <= {         OP_ADDR_ONE};
                 //
             end
             //
             IO_FSM_STATE_LATENCY_PRE2, 
             IO_FSM_STATE_BUSY: begin
                 //
-                {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
-                {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
-                {out_addr_bank,  out_addr_op } <= out_addr_next;
+                {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
+                {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
+                {                dummy_addr_op} <= dummy_addr_next;
                 //
-                in_1_addr_next <= in_1_addr_next + 1'b1;
-                in_2_addr_next <= in_2_addr_next + 1'b1;
-                out_addr_next  <= out_addr_next  + 1'b1;
+                in_1_addr_next  <= in_1_addr_next  + 1'b1;
+                in_2_addr_next  <= in_2_addr_next  + 1'b1;
+                dummy_addr_next <= dummy_addr_next + 1'b1;
                 //
             end
             //
             IO_FSM_STATE_EXTRA:
                 //
-                if (opcode_is_input && sel_aux_is_1 && (sel_in == BANK_IN_1_N_COEFF)) begin
+                if (opcode_is_input && sel_in_needs_extra) begin
                     //
-                    {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+                    if (sel_aux_is_1) begin
+                        {in_1_addr_bank, in_1_addr_op} <= in_1_addr_next;
+                        in_1_addr_next <= in_1_addr_next + 1'b1;
+                    end
                     //
-                    in_1_addr_next <= in_1_addr_next + 1'b1;
+                    if (sel_aux_is_2) begin
+                        {in_2_addr_bank, in_2_addr_op} <= in_2_addr_next;
+                        in_2_addr_next <= in_2_addr_next + 1'b1;
+                    end                    
                     // 
                 end
             //
@@ -481,28 +523,33 @@ module modexpng_io_manager
         if (io_fsm_state == IO_FSM_STATE_BUSY) begin
             //
             if (opcode_is_input) begin
-                if (sel_aux_is_1 && in_1_addr_next_op_is_last) io_fsm_done <= 1'b1;
-                if (sel_aux_is_2 && in_2_addr_next_op_is_last) io_fsm_done <= 1'b1;
+                if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
+                if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
+            end else if (opcode_is_output) begin
+                if (dummy_addr_op_next_is_last)                io_fsm_done <= 1'b1;
             end
-             
+            //
         end
         //
     end
     
-        
+    
     //
     // FSM Transition Logic
     //
+    wire [2:0] io_fsm_state_after_busy = opcode_is_input ? IO_FSM_STATE_EXTRA : IO_FSM_STATE_LATENCY_POST1;
+    
     always @* begin
         //
         case (io_fsm_state)
             IO_FSM_STATE_IDLE:          io_fsm_state_next = ena         ? IO_FSM_STATE_LATENCY_PRE1  : IO_FSM_STATE_IDLE ;
             IO_FSM_STATE_LATENCY_PRE1:  io_fsm_state_next =               IO_FSM_STATE_LATENCY_PRE2  ;
             IO_FSM_STATE_LATENCY_PRE2:  io_fsm_state_next =               IO_FSM_STATE_BUSY          ;
-            IO_FSM_STATE_BUSY:          io_fsm_state_next = io_fsm_done ? IO_FSM_STATE_EXTRA         : IO_FSM_STATE_BUSY ;
+            IO_FSM_STATE_BUSY:          io_fsm_state_next = io_fsm_done ? io_fsm_state_after_busy    : IO_FSM_STATE_BUSY ;
             IO_FSM_STATE_EXTRA:         io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST1 ;
             IO_FSM_STATE_LATENCY_POST1: io_fsm_state_next =               IO_FSM_STATE_LATENCY_POST2 ;
-            IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next =               IO_FSM_STATE_IDLE          ;
+            IO_FSM_STATE_LATENCY_POST2: io_fsm_state_next =               IO_FSM_STATE_STOP          ;
+            IO_FSM_STATE_STOP:          io_fsm_state_next =               IO_FSM_STATE_IDLE          ;
         endcase
         //
     end
@@ -517,10 +564,10 @@ module modexpng_io_manager
     
     always @(posedge clk)
         //
-        if (rst)                        rdy_reg <= 1'b1;
+        if (rst)                 rdy_reg <= 1'b1;
         else case (io_fsm_state)
-            IO_FSM_STATE_IDLE:          rdy_reg <= ~ena;
-            IO_FSM_STATE_LATENCY_POST2: rdy_reg <= 1'b1;
+            IO_FSM_STATE_IDLE:   rdy_reg <= ~ena;
+            IO_FSM_STATE_STOP:   rdy_reg <= 1'b1;
         endcase
 
 
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 1465c48..2e591e7 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -32,8 +32,19 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW    = 4'd2;
 */
 
 localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+/* CRT tells from which of the dual MMM to read
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ * source and destination WIDE are don't care
+ */
 
-//localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X  = 4'd0;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X  = 4'd4;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ */
 
 localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY   = 4'd8;
 /* CRT is don't care
@@ -42,6 +53,14 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY   = 4'd8;
  * LADDER specifies Montgomery ladder mode 
  */
 
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ * source and destination WIDE are don't care
+ */ 
+
 // CRT
 localparam [UOP_CRT_W -1:0] UOP_CRT_X   = 1'b0;
 localparam [UOP_CRT_W -1:0] UOP_CRT_Y   = 1'b1;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index b9b41e8..14f1b47 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -17,16 +17,16 @@ module modexpng_mmm_dual
     rd_wide_xy_bank_aux,
     rd_wide_xy_addr,
     rd_wide_xy_addr_aux,
-    rd_wide_x_dout,
-    rd_wide_y_dout,
-    rd_wide_x_dout_aux,
-    rd_wide_y_dout_aux,
+    rd_wide_x_din,
+    rd_wide_y_din,
+    rd_wide_x_din_aux,
+    rd_wide_y_din_aux,
     
     rd_narrow_xy_ena,
     rd_narrow_xy_bank,
     rd_narrow_xy_addr,
-    rd_narrow_x_dout,
-    rd_narrow_y_dout,
+    rd_narrow_x_din,
+    rd_narrow_y_din,
     
     rcmb_wide_xy_bank,
     rcmb_wide_xy_addr,
@@ -82,16 +82,16 @@ module modexpng_mmm_dual
     output  [             BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
     output  [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
     output  [           8-1:0] rd_wide_xy_addr_aux;
-    input  [18*NUM_MULTS/2-1:0] rd_wide_x_dout;
-    input  [18*NUM_MULTS/2-1:0] rd_wide_y_dout;
-    input  [          18-1:0] rd_wide_x_dout_aux;
-    input  [          18-1:0] rd_wide_y_dout_aux;
+    input  [18*NUM_MULTS/2-1:0] rd_wide_x_din;
+    input  [18*NUM_MULTS/2-1:0] rd_wide_y_din;
+    input  [          18-1:0] rd_wide_x_din_aux;
+    input  [          18-1:0] rd_wide_y_din_aux;
 
     output                    rd_narrow_xy_ena;
     output [             BANK_ADDR_W -1:0] rd_narrow_xy_bank;
     output [ 7:0] rd_narrow_xy_addr;
-    input  [18-1:0] rd_narrow_x_dout;
-    input  [18-1:0] rd_narrow_y_dout;
+    input  [18-1:0] rd_narrow_x_din;
+    input  [18-1:0] rd_narrow_y_din;
 
     output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
     output [ 7:0] rcmb_wide_xy_addr;
@@ -626,8 +626,8 @@ module modexpng_mmm_dual
         //end
     //endgenerate
     
-    assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
-    assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
+    assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din};
+    assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din};
     
     //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
     //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
@@ -730,25 +730,25 @@ module modexpng_mmm_dual
     //
     // On-the-fly Carry Recombination
     //
-    wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
+    wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry};
+    wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry};
+    wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
   
     wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
-        rd_narrow_xy_dout_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
+        rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
   
     always @(posedge clk)
         //
         if (narrow_xy_ena_dly2) begin // rewrite
             //
             if (!dsp_merge_xy_b) begin
-                dsp_x_b <= rd_narrow_x_dout[15:0];
-                dsp_y_b <= rd_narrow_y_dout[15:0];
+                dsp_x_b <= rd_narrow_x_din[15:0];
+                dsp_y_b <= rd_narrow_y_din[15:0];
                 dsp_xy_b_carry <= 2'b00;
             end else begin
                 dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity;
                 dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity;
-                dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
+                dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16];
             end                 
             //
         end else begin
@@ -845,6 +845,7 @@ module modexpng_mmm_dual
     modexpng_recombinator_block recombinator_block
     (
         .clk                            (clk),
+        .rst                            (rst),
         
         .ena                            (rcmb_ena),
         .rdy                            (rcmb_rdy),
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index e610e47..6e6c3ca 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -31,9 +31,9 @@ localparam COL_INDEX_W = OP_ADDR_W - cryptech_clog2(NUM_MULTS);
 
 localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
 
-localparam RDCT_CARRY_W = WORD_EXT_W - WORD_W;
+localparam CARRY_W = WORD_EXT_W - WORD_W;
 
-localparam [RDCT_CARRY_W-1:0] RDCT_CARRY_ZEROES = {RDCT_CARRY_W{1'b0}};
+localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}};
 
 localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
 localparam [BANK_ADDR_W-1:0] BANK_WIDE_B = 3'd1;
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index a37333e..c100b8b 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -4,8 +4,8 @@ module modexpng_reductor
     ena, rdy,
     word_index_last,
     sel_wide_out, sel_narrow_out,
-    rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux,
-    rcmb_final_xy_bank,  rcmb_final_xy_addr,  rcmb_final_x_dout,  rcmb_final_y_dout,  rcmb_final_xy_valid,
+    rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_din_aux, rd_wide_y_din_aux,
+    rcmb_final_xy_bank,  rcmb_final_xy_addr,  rcmb_final_x_din,  rcmb_final_y_din,  rcmb_final_xy_valid,
     rdct_wide_xy_bank,   rdct_wide_xy_addr,   rdct_wide_x_dout,   rdct_wide_y_dout,   rdct_wide_xy_valid,
     rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
 );
@@ -41,13 +41,13 @@ module modexpng_reductor
     */
     input  [    BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
     input  [     7:0] rd_wide_xy_addr_aux;
-    input  [    17:0] rd_wide_x_dout_aux;
-    input  [    17:0] rd_wide_y_dout_aux;
+    input  [    17:0] rd_wide_x_din_aux;
+    input  [    17:0] rd_wide_y_din_aux;
     //
     input  [     BANK_ADDR_W -1:0] rcmb_final_xy_bank;
     input  [     7:0] rcmb_final_xy_addr;
-    input  [    17:0] rcmb_final_x_dout;
-    input  [    17:0] rcmb_final_y_dout;
+    input  [    17:0] rcmb_final_x_din;
+    input  [    17:0] rcmb_final_y_din;
     input             rcmb_final_xy_valid;
 
     output [     2:0] rdct_wide_xy_bank;
@@ -121,8 +121,8 @@ module modexpng_reductor
         if (rcmb_final_xy_valid) begin
             rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
             rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
-            rcmb_x_dout_dly1  <= rcmb_final_x_dout;
-            rcmb_y_dout_dly1  <= rcmb_final_y_dout;
+            rcmb_x_dout_dly1  <= rcmb_final_x_din;
+            rcmb_y_dout_dly1  <= rcmb_final_y_din;
         end
         //
         if (rcmb_xy_valid_dly1) begin
@@ -167,14 +167,14 @@ module modexpng_reductor
             case (rcmb_xy_bank_dly3)    
         
                 BANK_RCMB_ML: begin
-                    {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
-                    {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+                    {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
+                    {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
                 end
                     
                 BANK_RCMB_MH:
                     if (rcmb_xy_addr_dly3 == 8'd0) begin
-                        {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry;
-                        {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry;
+                        {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
+                        {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
                     end
                     
             endcase
@@ -273,8 +273,8 @@ module modexpng_reductor
     //
     //
     //
-    wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux;
-    wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux;
+    wire [17:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux;
+    wire [17:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux;
     
     wire [17:0] sum_rdct_x_carry = sum_rdct_x + {16'h0000, rcmb_x_lsb_carry};
     wire [17:0] sum_rdct_y_carry = sum_rdct_y + {16'h0000, rcmb_y_lsb_carry};
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index f1d5ae2..19601ef 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -1,6 +1,6 @@
 module modexpng_storage_block
 (
-    clk, clk_bus, rst,
+    clk, rst,
     
     wr_wide_xy_ena,
     wr_wide_xy_bank,
@@ -29,7 +29,19 @@ module modexpng_storage_block
     rd_narrow_xy_bank,
     rd_narrow_xy_addr,
     rd_narrow_x_dout,
-    rd_narrow_y_dout
+    rd_narrow_y_dout,
+    
+    wrk_wide_xy_ena,
+    wrk_wide_xy_bank,
+    wrk_wide_xy_addr,
+    wrk_wide_x_dout,
+    wrk_wide_y_dout,
+    
+    wrk_narrow_xy_ena,
+    wrk_narrow_xy_bank,
+    wrk_narrow_xy_addr,
+    wrk_narrow_x_dout,
+    wrk_narrow_y_dout
 );
 
     //
@@ -42,7 +54,6 @@ module modexpng_storage_block
     // Ports
     //
     input                                         clk;
-    input                                         clk_bus;
     input                                         rst;
 
     input                                         wr_wide_xy_ena;
@@ -74,6 +85,18 @@ module modexpng_storage_block
     output [                  WORD_EXT_W    -1:0] rd_narrow_x_dout;
     output [                  WORD_EXT_W    -1:0] rd_narrow_y_dout;
 
+    input                                         wrk_wide_xy_ena;
+    input  [                  BANK_ADDR_W   -1:0] wrk_wide_xy_bank;
+    input  [                  OP_ADDR_W     -1:0] wrk_wide_xy_addr;
+    output [                  WORD_EXT_W    -1:0] wrk_wide_x_dout;
+    output [                  WORD_EXT_W    -1:0] wrk_wide_y_dout;
+    
+    input                                         wrk_narrow_xy_ena;
+    input  [                  BANK_ADDR_W   -1:0] wrk_narrow_xy_bank;
+    input  [                  OP_ADDR_W     -1:0] wrk_narrow_xy_addr;
+    output [                  WORD_EXT_W    -1:0] wrk_narrow_x_dout;
+    output [                  WORD_EXT_W    -1:0] wrk_narrow_y_dout;
+    
     
     //
     // Internal Registers
@@ -81,6 +104,8 @@ module modexpng_storage_block
     reg rd_wide_xy_reg_ena     = 1'b0;
     reg rd_wide_xy_reg_ena_aux = 1'b0;
     reg rd_narrow_xy_reg_ena   = 1'b0;
+    reg wrk_wide_xy_reg_ena    = 1'b0;
+    reg wrk_narrow_xy_reg_ena  = 1'b0;
 
     always @(posedge clk)
         //
@@ -88,10 +113,14 @@ module modexpng_storage_block
             rd_wide_xy_reg_ena     <= 1'b0;
             rd_wide_xy_reg_ena_aux <= 1'b0;
             rd_narrow_xy_reg_ena   <= 1'b0;
+            wrk_wide_xy_reg_ena    <= 1'b0;
+            wrk_narrow_xy_reg_ena  <= 1'b0;
         end else begin
             rd_wide_xy_reg_ena     <= rd_wide_xy_ena;
             rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
             rd_narrow_xy_reg_ena   <= rd_narrow_xy_ena;
+            wrk_wide_xy_reg_ena    <= wrk_wide_xy_ena;
+            wrk_narrow_xy_reg_ena  <= wrk_narrow_xy_ena;
         end
     
     //
@@ -102,22 +131,26 @@ module modexpng_storage_block
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
     wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
+    wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_wide_xy_offset;
+    wire [BANK_ADDR_W + OP_ADDR_W -1:0] wrk_narrow_xy_offset;
 
     assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
-    assign rd_narrow_xy_offset   = {rd_narrow_xy_bank,   rd_narrow_xy_addr};
-    assign wr_wide_xy_offset     = {wr_wide_xy_bank,     wr_wide_xy_addr};
-    assign wr_narrow_xy_offset   = {wr_narrow_xy_bank,   wr_narrow_xy_addr};
+    assign rd_narrow_xy_offset   = {rd_narrow_xy_bank,   rd_narrow_xy_addr  };
+    assign wr_wide_xy_offset     = {wr_wide_xy_bank,     wr_wide_xy_addr    };
+    assign wr_narrow_xy_offset   = {wr_narrow_xy_bank,   wr_narrow_xy_addr  };
+    assign wrk_wide_xy_offset    = {wrk_wide_xy_bank,    wrk_wide_xy_addr   };
+    assign wrk_narrow_xy_offset  = {wrk_narrow_xy_bank,  wrk_narrow_xy_addr };
 
     //
     // "Wide" Storage
     //
     genvar z;
     generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
-        begin : gen_wide_bram
+        begin : gen_wide
             //
             assign rd_wide_xy_offset[z] = {1'b0, rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
             //
-            modexpng_sdp_36k_x18_wrapper wide_bram_x
+            modexpng_sdp_36k_x18_wrapper wide_x
             (
                 .clk    (clk),
                 
@@ -132,7 +165,7 @@ module modexpng_storage_block
                 .doutb  (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
             );
             //
-            modexpng_sdp_36k_x18_wrapper wide_bram_y
+            modexpng_sdp_36k_x18_wrapper wide_y
             (
                 .clk    (clk),
 
@@ -151,9 +184,42 @@ module modexpng_storage_block
     endgenerate
     
     //
-    // Auxilary Storage
+    // Worker "Wide" Storage
+    //
+    modexpng_sdp_36k_x18_wrapper wrk_wide_x
+    (
+        .clk    (clk),
+        
+        .ena    (wr_wide_xy_ena),
+        .wea    (wr_wide_xy_ena),
+        .addra  (wr_wide_xy_offset),
+        .dina   (wr_wide_x_din),
+        
+        .enb    (wrk_wide_xy_ena),
+        .regceb (wrk_wide_xy_reg_ena),
+        .addrb  (wrk_wide_xy_offset),
+        .doutb  (wrk_wide_x_dout)
+    );
     //
-    modexpng_sdp_36k_x18_wrapper wide_bram_x_aux
+    modexpng_sdp_36k_x18_wrapper wrk_wide_y
+    (
+        .clk    (clk),
+
+        .ena    (wr_wide_xy_ena),
+        .wea    (wr_wide_xy_ena),
+        .addra  (wr_wide_xy_offset),
+        .dina   (wr_wide_y_din),
+    
+        .enb    (wrk_wide_xy_ena),
+        .regceb (wrk_wide_xy_reg_ena),
+        .addrb  (wrk_wide_xy_offset),
+        .doutb  (wrk_wide_y_dout)
+    );
+    
+    //
+    // Auxilary "Wide" Storage
+    //
+    modexpng_sdp_36k_x18_wrapper wide_x_aux
     (
         .clk    (clk),
 
@@ -168,7 +234,7 @@ module modexpng_storage_block
         .doutb  (rd_wide_x_dout_aux)
     );
     //
-    modexpng_sdp_36k_x18_wrapper wide_bram_y_aux
+    modexpng_sdp_36k_x18_wrapper wide_y_aux
     (
         .clk    (clk),
 
@@ -186,7 +252,7 @@ module modexpng_storage_block
     //
     // "Narrow" Storage
     //
-    modexpng_sdp_36k_x18_wrapper narrow_bram_x
+    modexpng_sdp_36k_x18_wrapper narrow_x
     (
         .clk    (clk),
 
@@ -201,7 +267,7 @@ module modexpng_storage_block
         .doutb  (rd_narrow_x_dout)
     );
 
-    modexpng_sdp_36k_x18_wrapper narrow_bram_y
+    modexpng_sdp_36k_x18_wrapper narrow_y
     (
         .clk    (clk),
 
@@ -215,7 +281,39 @@ module modexpng_storage_block
         .addrb  (rd_narrow_xy_offset),
         .doutb  (rd_narrow_y_dout)
     );
+    
+    //
+    // Worker "Narrow" Storage
+    //
+    modexpng_sdp_36k_x18_wrapper wrk_narrow_x
+    (
+        .clk    (clk),
+
+        .ena    (wr_narrow_xy_ena),
+        .wea    (wr_narrow_xy_ena),
+        .addra  (wr_narrow_xy_offset),
+        .dina   (wr_narrow_x_din),
+    
+        .enb    (wrk_narrow_xy_ena),
+        .regceb (wrk_narrow_xy_reg_ena),
+        .addrb  (wrk_narrow_xy_offset),
+        .doutb  (wrk_narrow_x_dout)
+    );
+
+    modexpng_sdp_36k_x18_wrapper wrk_narrow_y
+    (
+        .clk    (clk),
 
+        .ena    (wr_narrow_xy_ena),
+        .wea    (wr_narrow_xy_ena),
+        .addra  (wr_narrow_xy_offset),
+        .dina   (wr_narrow_y_din),
+    
+        .enb    (wrk_narrow_xy_ena),
+        .regceb (wrk_narrow_xy_reg_ena),
+        .addrb  (wrk_narrow_xy_offset),
+        .doutb  (wrk_narrow_y_dout)
+    );
 
 endmodule
 
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index 6b34bed..c39e07a 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -2,53 +2,20 @@ module modexpng_storage_manager
 (
     clk, rst,
     
-    wr_wide_xy_ena,
-    wr_wide_xy_bank,
-    wr_wide_xy_addr,
-    wr_wide_x_din,
-    wr_wide_y_din,
+    wr_wide_xy_ena,      wr_wide_xy_bank,     wr_wide_xy_addr,     wr_wide_x_dout,    wr_wide_y_dout,
+    wr_narrow_xy_ena,    wr_narrow_xy_bank,   wr_narrow_xy_addr,   wr_narrow_x_dout,  wr_narrow_y_dout,    
 
-    wr_narrow_xy_ena,
-    wr_narrow_xy_bank,
-    wr_narrow_xy_addr,
-    wr_narrow_x_din,
-    wr_narrow_y_din,
-    
-    ext_wide_xy_ena,
-    ext_wide_xy_bank,
-    ext_wide_xy_addr,
-    ext_wide_x_din,
-    ext_wide_y_din,
-
-    ext_narrow_xy_ena,
-    ext_narrow_xy_bank,
-    ext_narrow_xy_addr,
-    ext_narrow_x_din,
-    ext_narrow_y_din,
-    
-    rcmb_wide_xy_ena,
-    rcmb_wide_xy_bank,
-    rcmb_wide_xy_addr,
-    rcmb_wide_x_din,
-    rcmb_wide_y_din,
-
-    rcmb_narrow_xy_ena,
-    rcmb_narrow_xy_bank,
-    rcmb_narrow_xy_addr,
-    rcmb_narrow_x_din,
-    rcmb_narrow_y_din,
-    
-    rdct_wide_xy_bank,
-    rdct_wide_xy_addr,
-    rdct_wide_x_din,
-    rdct_wide_y_din,
-    rdct_wide_xy_valid,
-
-    rdct_narrow_xy_bank,
-    rdct_narrow_xy_addr,
-    rdct_narrow_x_din,
-    rdct_narrow_y_din,
-    rdct_narrow_xy_valid
+    io_narrow_xy_ena,    io_narrow_xy_bank,   io_narrow_xy_addr,   io_narrow_x_din,   io_narrow_y_din,
+    io_wide_xy_ena,      io_wide_xy_bank,     io_wide_xy_addr,     io_wide_x_din,     io_wide_y_din,
+    
+    rcmb_wide_xy_ena,    rcmb_wide_xy_bank,   rcmb_wide_xy_addr,   rcmb_wide_x_din,   rcmb_wide_y_din,
+    rcmb_narrow_xy_ena,  rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din,
+    
+    rdct_wide_xy_bank,   rdct_wide_xy_addr,   rdct_wide_x_din,     rdct_wide_y_din,   rdct_wide_xy_valid,
+    rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din,   rdct_narrow_y_din, rdct_narrow_xy_valid,
+    
+    wrk_wide_xy_ena,     wrk_wide_xy_bank,    wrk_wide_xy_addr,    wrk_wide_x_din,    wrk_wide_y_din,
+    wrk_narrow_xy_ena,   wrk_narrow_xy_bank,  wrk_narrow_xy_addr,  wrk_narrow_x_din,  wrk_narrow_y_din
 );
 
 
@@ -67,51 +34,67 @@ module modexpng_storage_manager
     output                    wr_wide_xy_ena;
     output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
     output [  OP_ADDR_W -1:0] wr_wide_xy_addr;
-    output [ WORD_EXT_W -1:0] wr_wide_x_din;
-    output [ WORD_EXT_W -1:0] wr_wide_y_din;
+    output [ WORD_EXT_W -1:0] wr_wide_x_dout;
+    output [ WORD_EXT_W -1:0] wr_wide_y_dout;
 
     output                    wr_narrow_xy_ena;
     output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
     output [  OP_ADDR_W -1:0] wr_narrow_xy_addr;
-    output [ WORD_EXT_W -1:0] wr_narrow_x_din;
-    output [ WORD_EXT_W -1:0] wr_narrow_y_din;
+    output [ WORD_EXT_W -1:0] wr_narrow_x_dout;
+    output [ WORD_EXT_W -1:0] wr_narrow_y_dout;
    
-    input                     ext_wide_xy_ena;
-    input  [BANK_ADDR_W -1:0] ext_wide_xy_bank;
-    input  [  OP_ADDR_W -1:0] ext_wide_xy_addr;
-    input  [ WORD_EXT_W -1:0] ext_wide_x_din;
-    input  [ WORD_EXT_W -1:0] ext_wide_y_din;
-
-    input                     ext_narrow_xy_ena;
-    input  [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
-    input  [  OP_ADDR_W -1:0] ext_narrow_xy_addr;
-    input  [ WORD_EXT_W -1:0] ext_narrow_x_din;
-    input  [ WORD_EXT_W -1:0] ext_narrow_y_din;
+    input                     io_wide_xy_ena;
+    input  [BANK_ADDR_W -1:0] io_wide_xy_bank;
+    input  [  OP_ADDR_W -1:0] io_wide_xy_addr;
+    input  [ WORD_EXT_W -1:0] io_wide_x_din;
+    input  [ WORD_EXT_W -1:0] io_wide_y_din;
+
+    input                     io_narrow_xy_ena;
+    input  [BANK_ADDR_W -1:0] io_narrow_xy_bank;
+    input  [  OP_ADDR_W -1:0] io_narrow_xy_addr;
+    input  [ WORD_EXT_W -1:0] io_narrow_x_din;
+    input  [ WORD_EXT_W -1:0] io_narrow_y_din;
     
     input                     rcmb_wide_xy_ena;
     input  [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
-    input  [ 7:0] rcmb_wide_xy_addr;
-    input  [17:0] rcmb_wide_x_din;
-    input  [17:0] rcmb_wide_y_din;
+    input  [  OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+    input  [ WORD_EXT_W -1:0] rcmb_wide_x_din;
+    input  [ WORD_EXT_W -1:0] rcmb_wide_y_din;
 
     input                     rcmb_narrow_xy_ena;
     input  [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
-    input  [ 7:0] rcmb_narrow_xy_addr;
-    input  [17:0] rcmb_narrow_x_din;
-    input  [17:0] rcmb_narrow_y_din;
-    
-    input  [     2:0] rdct_wide_xy_bank;
-    input  [     7:0] rdct_wide_xy_addr;
-    input  [    17:0] rdct_wide_x_din;
-    input  [    17:0] rdct_wide_y_din;
-    input             rdct_wide_xy_valid;
-
-    input  [     2:0] rdct_narrow_xy_bank;
-    input  [     7:0] rdct_narrow_xy_addr;
-    input  [    17:0] rdct_narrow_x_din;
-    input  [    17:0] rdct_narrow_y_din;
-    input             rdct_narrow_xy_valid;
+    input  [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+    input  [ WORD_EXT_W -1:0] rcmb_narrow_x_din;
+    input  [ WORD_EXT_W -1:0] rcmb_narrow_y_din;
+    
+    input  [BANK_ADDR_W -1:0] rdct_wide_xy_bank;
+    input  [  OP_ADDR_W -1:0] rdct_wide_xy_addr;
+    input  [ WORD_EXT_W -1:0] rdct_wide_x_din;
+    input  [ WORD_EXT_W -1:0] rdct_wide_y_din;
+    input                     rdct_wide_xy_valid;
+
+    input  [BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
+    input  [  OP_ADDR_W -1:0] rdct_narrow_xy_addr;
+    input  [ WORD_EXT_W -1:0] rdct_narrow_x_din;
+    input  [ WORD_EXT_W -1:0] rdct_narrow_y_din;
+    input                     rdct_narrow_xy_valid;
     
+    input                     wrk_wide_xy_ena;
+    input  [BANK_ADDR_W -1:0] wrk_wide_xy_bank;
+    input  [  OP_ADDR_W -1:0] wrk_wide_xy_addr;
+    input  [ WORD_EXT_W -1:0] wrk_wide_x_din;
+    input  [ WORD_EXT_W -1:0] wrk_wide_y_din;
+
+    input                     wrk_narrow_xy_ena;
+    input  [BANK_ADDR_W -1:0] wrk_narrow_xy_bank;
+    input  [  OP_ADDR_W -1:0] wrk_narrow_xy_addr;
+    input  [ WORD_EXT_W -1:0] wrk_narrow_x_din;
+    input  [ WORD_EXT_W -1:0] wrk_narrow_y_din;
+    
+
+    //
+    // Output Registers
+    //
     reg                    wr_wide_xy_ena_reg = 1'b0;
     reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
     reg [  OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
@@ -186,40 +169,54 @@ module modexpng_storage_manager
         end
     endtask
     
+    
+    //
+    // Write Arbiter
+    //
     always @(posedge clk)
         //
         if (rst)                         disable_wide;
         else begin
             //
-            if      (ext_wide_xy_ena)    enable_wide(ext_wide_xy_bank,  ext_wide_xy_addr,  ext_wide_x_din,  ext_wide_y_din);
+            if      (io_wide_xy_ena)     enable_wide(io_wide_xy_bank,   io_wide_xy_addr,   io_wide_x_din,   io_wide_y_din);
             else if (rcmb_wide_xy_ena)   enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
             else if (rdct_wide_xy_valid) enable_wide(rdct_wide_xy_bank, rdct_wide_xy_addr, rdct_wide_x_din, rdct_wide_y_din);
+            else if (wrk_wide_xy_ena)    enable_wide(wrk_wide_xy_bank,  wrk_wide_xy_addr,  wrk_wide_x_din,  wrk_wide_y_din);
             else                         disable_wide;
             //
         end
             
+    
+    //
+    // Read Arbiter
+    //
     always @(posedge clk)
         //
         if (rst)                           disable_narrow;
         else begin
             //
-            if      (ext_narrow_xy_ena)    enable_narrow(ext_narrow_xy_bank,  ext_narrow_xy_addr,  ext_narrow_x_din,  ext_narrow_y_din);
+            if      (io_narrow_xy_ena)     enable_narrow(io_narrow_xy_bank,   io_narrow_xy_addr,   io_narrow_x_din,   io_narrow_y_din);
             else if (rcmb_narrow_xy_ena)   enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
             else if (rdct_narrow_xy_valid) enable_narrow(rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_din, rdct_narrow_y_din);
+            else if (wrk_narrow_xy_ena)    enable_narrow(wrk_narrow_xy_bank,  wrk_narrow_xy_addr,  wrk_narrow_x_din,  wrk_narrow_y_din);
             else                           disable_narrow;
             //
         end
 
+    
+    //
+    // Port Mapping
+    //
     assign wr_wide_xy_ena  = wr_wide_xy_ena_reg;
     assign wr_wide_xy_bank = wr_wide_xy_bank_reg;
     assign wr_wide_xy_addr = wr_wide_xy_addr_reg;
-    assign wr_wide_x_din   = wr_wide_x_din_reg;
-    assign wr_wide_y_din   = wr_wide_y_din_reg;
+    assign wr_wide_x_dout  = wr_wide_x_din_reg;
+    assign wr_wide_y_dout  = wr_wide_y_din_reg;
 
     assign wr_narrow_xy_ena  = wr_narrow_xy_ena_reg;
     assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg;
     assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg;
-    assign wr_narrow_x_din   = wr_narrow_x_din_reg;
-    assign wr_narrow_y_din   = wr_narrow_y_din_reg;
+    assign wr_narrow_x_dout  = wr_narrow_x_din_reg;
+    assign wr_narrow_y_dout  = wr_narrow_y_din_reg;
     
 endmodule
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 73b3142..016b1b0 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -15,25 +15,48 @@ module modexpng_uop_rom
     always @(posedge clk)
         //
         case (addr)
-            6'd00:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         };
-            6'd01:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         };
-            6'd02:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_X,        BANK_WIDE_A,    BANK_DNC         };
-            6'd03:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_Y,        BANK_WIDE_A,    BANK_DNC         };
-            6'd04:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         };
-            6'd05:      data <= {UOP_OPCODE_INPUT_TO_WIDE,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         };
-
-            6'd06:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF};
-            6'd07:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF};
-            6'd08:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    };
-            6'd09:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    };
-            6'd10:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    };
-            6'd11:      data <= {UOP_OPCODE_INPUT_TO_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    };
-            
-            6'd12:      data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_A, BANK_NARROW_A,      BANK_WIDE_B,    BANK_NARROW_B    };
-            6'd13:      data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_B, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    };
-            6'd14:      data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_2,   UOP_LADDER_11,  BANK_WIDE_C, BANK_DNC,           BANK_WIDE_D,    BANK_NARROW_D    };
-            
-            default:    data <= {UOP_OPCODE_STOP,             UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL};
+            6'd00:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd01:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd02:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_X,        BANK_WIDE_A,    BANK_DNC         }; //
+            6'd03:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_Y,        BANK_WIDE_A,    BANK_DNC         }; //
+            6'd04:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
+            6'd05:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
+                                                                                                                                                                                         //
+            6'd06:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd07:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd08:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd09:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd10:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
+            6'd11:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
+                                                                                                                                                                                         //
+            6'd12:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_A, BANK_NARROW_A,      BANK_WIDE_B,    BANK_NARROW_B    }; //
+            6'd13:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_B, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd14:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_2,   UOP_LADDER_11,  BANK_WIDE_C, BANK_DNC,           BANK_WIDE_D,    BANK_NARROW_D    }; //
+                                                                                                                                                                                         //
+            6'd15:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,  UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_NARROW_D    }; //
+                                                                                                                                                                                         //
+            6'd16:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_XM      }; //
+            6'd17:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_YM      }; //
+                                                                                                                                                                                         //            
+            6'd18:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_E, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+                                                                                                                                                                                         //
+            6'd19:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,  UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_C,      BANK_DNC,       BANK_NARROW_C    }; //
+                                                                                                                                                                                         //            
+            6'd20:   data <= {UOP_OPCODE_COPY_CRT_Y2X,       UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+                                                                                                                                                                                         //
+            6'd21:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd22:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd23:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
+            6'd24:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
+            6'd25:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_WIDE_E,    BANK_DNC         }; //
+                                                                                                                                                                                         //
+            6'd26:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd27:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd28:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd29:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd30:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_DNC,       BANK_NARROW_E    }; //
+                                                                                                                                                                                         //
+            default: data <= {UOP_OPCODE_STOP,               UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //                                            
         endcase
 
 endmodule



More information about the Commits mailing list