[Cryptech-Commits] [user/shatov/modexpng] 07/13: Further work: - added core wrapper - fixed module resets across entire core (all the resets are now consistently active-low) - continued refactoring

git at cryptech.is git at cryptech.is
Wed Oct 23 16:22:06 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit 584393ac5fc9bbe80887702ec2fc97bee999c5e7
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 21 15:13:01 2019 +0300

    Further work:
     - added core wrapper
     - fixed module resets across entire core (all the resets are now consistently
       active-low)
     - continued refactoring
---
 bench/tb_core_full_512.v                           |   4 +-
 bench/tb_wrapper.v                                 | 221 +++++++++
 rtl/modexpng_core_top.v                            |  28 +-
 rtl/modexpng_dsp_array_block.v                     |  76 +---
 rtl/modexpng_general_worker.v                      |  20 +-
 rtl/modexpng_io_block.v                            |   9 +-
 rtl/modexpng_io_manager.v                          |  24 +-
 rtl/modexpng_mmm_dual.v                            | 496 ++++++++++-----------
 ...odexpng_mmm_fsm.vh => modexpng_mmm_dual_fsm.vh} |  64 +--
 rtl/modexpng_parameters.vh                         |   2 +-
 rtl/modexpng_recombinator_block.v                  |  51 ++-
 rtl/modexpng_reductor.v                            |  29 +-
 rtl/modexpng_storage_block.v                       |   8 +-
 rtl/modexpng_storage_manager.v                     |  12 +-
 rtl/modexpng_uop_engine.v                          |  18 +-
 rtl/modexpng_wrapper.v                             | 393 ++++++++++++++++
 16 files changed, 997 insertions(+), 458 deletions(-)

diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v
index e2604f0..cbcdd1d 100644
--- a/bench/tb_core_full_512.v
+++ b/bench/tb_core_full_512.v
@@ -238,7 +238,7 @@ module tb_core_full_512;
         
             sync_clk_bus;           // switch to slow bus clock
             core_set_input;         // write to core input banks
-            /*
+            
             sync_clk;               // switch to fast core clock
             core_set_crt_mode(1);   // enable CRT signing
             core_pulse_next;        // assert 'next' bit for one cycle
@@ -247,7 +247,7 @@ module tb_core_full_512;
             sync_clk_bus;           // switch to slow bus clock
             core_get_output;        // read from core output banks
             core_verify_output;     // check, whether core output matches precomputed known good refrence values
-            */
+            
             sync_clk;               // switch to fast core clock
             core_set_crt_mode(0);   // disable CRT signing
             core_pulse_next;        // assert 'next' bit for one cycle
diff --git a/bench/tb_wrapper.v b/bench/tb_wrapper.v
new file mode 100644
index 0000000..2000ec5
--- /dev/null
+++ b/bench/tb_wrapper.v
@@ -0,0 +1,221 @@
+`timescale 1ns / 1ps
+
+module tb_wrapper;
+
+
+    //
+    // Clocks
+    //
+    `define CLK_FREQUENCY_MHZ     (100.0)
+    `define CLK_PERIOD_NS         (1000.0 / `CLK_FREQUENCY_MHZ)
+    `define CLK_PERIOD_HALF_NS    (0.5    * `CLK_PERIOD_NS)
+    `define CLK_PERIOD_QUARTER_NS (0.5    * `CLK_PERIOD_HALF_NS)
+
+    `define CLK_BUS_FREQUENCY_MHZ     (25.0)
+    `define CLK_BUS_PERIOD_NS         (1000.0 / `CLK_BUS_FREQUENCY_MHZ)
+    `define CLK_BUS_PERIOD_HALF_NS    (0.5    * `CLK_BUS_PERIOD_NS)
+    
+	reg  clk          = 1'b1;
+	reg  clk_bus      = 1'b1;
+	reg  clk_bus_dly  = 1'b0;
+	wire clk_bus_idle = clk_bus & clk_bus_dly;
+
+    always #`CLK_PERIOD_HALF_NS     clk     <= ~clk;
+    always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus;
+    
+    always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus;  
+    
+    
+    //
+    // Clock Sync
+    //
+    task sync_clk_bus;
+        while (clk_bus_idle !== 1) _wait_quarter_clk_tick;
+    endtask
+    
+    
+    //
+    // Reset
+    //
+    reg rst_n = 1'b0;   
+   
+    
+    //
+    // System Bus
+    //
+    reg         bus_cs = 1'b0;
+    reg         bus_we = 1'b0;
+    reg  [11:0] bus_addr;
+    reg  [31:0] bus_data_wr;
+    wire [31:0] bus_data_rd;
+
+
+    //
+    // UUT
+    //
+    modexpng_wrapper uut
+    (
+        .clk        (clk_bus),
+        .rst_n      (rst_n),
+    
+        .clk_core   (clk),
+
+        .cs         (bus_cs),
+        .we         (bus_we),
+
+        .address    (bus_addr),
+        .write_data (bus_data_wr),
+        .read_data  (bus_data_rd)
+    );
+
+
+    //
+    // Script
+    //
+    initial main;
+    
+    
+    //
+    // Main Routine (Control/Status, Bus)
+    //
+    reg [31:0] ti, to;
+    task main;
+        begin
+        
+            sync_clk_bus;
+            wait_clk_bus_ticks(10);
+            rst_n = 1'b1;
+            wait_clk_bus_ticks(10);
+            
+                                            bus_read('h11, to); $display("modulus_bits = %0d",            to);
+            ti =  100; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti =  510; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti =  511; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti =  512; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti =  513; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti =  514; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            
+            
+            ti = 1022; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 1023; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 1024; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 1025; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 1026; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            
+            ti = 4094; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 4095; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 4096; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 4097; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+            ti = 4098; bus_write('h11, ti); bus_read('h11, to); $display("modulus_bits = %0d -> %0d", ti, to);
+
+
+                                            bus_read('h12, to); $display("exponent_bits = %0d",            to);
+            ti =    0; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    1; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    2; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    3; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    4; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    5; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    6; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    7; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =    8; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+
+            ti =  100; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =  510; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =  511; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =  512; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =  513; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti =  514; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            
+            
+            ti = 1022; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 1023; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 1024; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 1025; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 1026; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            
+            ti = 4094; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 4095; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 4096; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 4097; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+            ti = 4098; bus_write('h12, ti); bus_read('h12, to); $display("exponent_bits = %0d -> %0d", ti, to);
+
+
+        end
+    endtask
+    
+
+    //
+    // _bus_drive()
+    //    
+    task _bus_drive;
+        input cs;
+        input we;
+        input [11:0] addr;
+        input [31:0] data;
+        {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data};
+    endtask
+    
+    
+    //
+    // bus_write()
+    //
+    task bus_write;
+        input  [ 9:0] offset;
+        input  [31:0] data;
+        begin
+            _bus_drive(1'b1, 1'b1, {2'b00, offset}, data);
+            wait_clk_bus_tick;
+            _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+        end
+    endtask
+    
+    
+    //
+    // bus_read()
+    //
+    task bus_read;
+        input  [ 9:0] offset;
+        output [31:0] data;
+        begin
+            _bus_drive(1'b1, 1'b0, {2'b00, offset}, 32'hXXXXXXXX);
+            wait_clk_bus_tick;
+            data = bus_data_rd;
+            _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+        end
+    endtask
+    
+
+    //
+    // Variables
+    //    
+    integer _n;
+    
+    
+    //
+    // _wait_quarter_clk_tick()
+    //
+    task _wait_quarter_clk_tick;
+        #`CLK_PERIOD_QUARTER_NS;
+    endtask
+    
+
+    //
+    // wait_clk_bus_tick()
+    //
+    task wait_clk_bus_tick;
+        #`CLK_BUS_PERIOD_NS;
+    endtask
+    
+
+    //
+    // wait_clk_bus_ticks()
+    //    
+    task wait_clk_bus_ticks;
+        input integer num_ticks;
+        for (_n=0; _n<num_ticks; _n=_n+1)
+            wait_clk_bus_tick;
+    endtask
+
+
+endmodule
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index a991c61..18c32bb 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -1,7 +1,7 @@
 module modexpng_core_top
 (
     clk, clk_bus,
-    rst,
+    rst_n,
     next, valid,
     crt_mode,
     word_index_last_n,
@@ -28,7 +28,7 @@ module modexpng_core_top
     input                                         clk;
     input                                         clk_bus;
     
-    input                                         rst;
+    input                                         rst_n;
     
     input                                         next;
     output                                        valid;
@@ -118,7 +118,7 @@ module modexpng_core_top
     modexpng_uop_engine uop_engine
     (
         .clk                            (clk),
-        .rst                            (rst),
+        .rst_n                          (rst_n),
         
         .ena                            (next),
         .rdy                            (valid),
@@ -419,7 +419,7 @@ module modexpng_core_top
     modexpng_storage_block storage_block_x
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
 
         .wr_wide_xy_ena         (wr_wide_xy_ena_x),
         .wr_wide_xy_bank        (wr_wide_xy_bank_x),
@@ -466,7 +466,7 @@ module modexpng_core_top
     modexpng_storage_block storage_block_y
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
 
         .wr_wide_xy_ena         (wr_wide_xy_ena_y),
         .wr_wide_xy_bank        (wr_wide_xy_bank_y),
@@ -518,7 +518,7 @@ module modexpng_core_top
     modexpng_storage_manager storage_manager_x
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
         
         .wr_wide_xy_ena         (wr_wide_xy_ena_x),
         .wr_wide_xy_bank        (wr_wide_xy_bank_x),
@@ -584,7 +584,7 @@ module modexpng_core_top
     modexpng_storage_manager storage_manager_y
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
         
         .wr_wide_xy_ena         (wr_wide_xy_ena_y),
         .wr_wide_xy_bank        (wr_wide_xy_bank_y),
@@ -656,7 +656,7 @@ module modexpng_core_top
         .clk            (clk),
         .clk_bus        (clk_bus),
         
-        .rst            (rst),
+        .rst_n          (rst_n),
         
         .bus_cs         (bus_cs),
         .bus_we         (bus_we),
@@ -685,7 +685,7 @@ module modexpng_core_top
     modexpng_io_manager io_manager
     (
         .clk                        (clk),
-        .rst                        (rst),
+        .rst_n                      (rst_n),
     
         .ena                        (io_mgr_ena),
         .rdy                        (io_mgr_rdy),
@@ -755,7 +755,7 @@ module modexpng_core_top
     modexpng_mmm_dual mmm_x
     (
         .clk                        (clk),
-        .rst                        (rst),
+        .rst_n                      (rst_n),
         
         .ena                        (mmm_ena_x),
         .rdy                        (mmm_rdy_x),
@@ -812,7 +812,7 @@ module modexpng_core_top
     modexpng_mmm_dual mmm_y
     (
         .clk                        (clk),
-        .rst                        (rst),
+        .rst_n                      (rst_n),
         
         .ena                        (mmm_ena_y),
         .rdy                        (mmm_rdy_y),
@@ -872,7 +872,7 @@ module modexpng_core_top
     modexpng_reductor reductor_x
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
         
         .ena                    (rdct_ena_x),
         .rdy                    (rdct_rdy_x),
@@ -909,7 +909,7 @@ module modexpng_core_top
     modexpng_reductor reductor_y
     (
         .clk                    (clk),
-        .rst                    (rst),
+        .rst_n                  (rst_n),
         
         .ena                    (rdct_ena_y),
         .rdy                    (rdct_rdy_y),
@@ -950,7 +950,7 @@ module modexpng_core_top
     modexpng_general_worker general_worker
     (
         .clk                        (clk),
-        .rst                        (rst),
+        .rst_n                      (rst_n),
         
         .ena                        (wrk_ena),
         .rdy                        (wrk_rdy),
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
index 6b4ad3c..1444aa7 100644
--- a/rtl/modexpng_dsp_array_block.v
+++ b/rtl/modexpng_dsp_array_block.v
@@ -38,81 +38,7 @@ module modexpng_dsp_array_block
         ce_a1 <= ce_a0;
         ce_a2 <= ce_a1;
         ce_b1 <= ce_b0;
-    end
-    
-    ///
-    wire [46:0] p_debug_direct;
-    wire [17:0] casc_a_debug_direct;
-    wire [15:0] casc_b_debug_direct;
-    
-    wire [46:0] p_debug_cascade;
-
-    wire [46:0] p_ref_direct  = p[    0 +: MAC_W];
-    wire [46:0] p_ref_cascade = p[MAC_W +: MAC_W];
-    
-    modexpng_dsp_slice_wrapper_xilinx #
-    (
-        .AB_INPUT("DIRECT"),
-        .B_REG(2)
-    )
-    dsp_debug_direct
-    (
-        .clk            (clk),
-                
-        .ce_a1          (ce_a0),
-        .ce_b1          (ce_b0),
-        .ce_a2          (ce_a1),
-        .ce_b2          (ce_b1),
-        .ce_m           (ce_m),
-        .ce_p           (ce_p),
-        .ce_mode        (ce_mode),
-                
-        .a              (a[0 +: 18]),
-        .b              (b),
-        .p              (p_debug_direct),
-                
-        .inmode         ({DSP48E1_INMODE_W{1'b0}}),
-        .opmode         ({1'b0, mode_z[0], 1'b0, 2'b01, 2'b01}),
-        .alumode        ({DSP48E1_ALUMODE_W{1'b0}}),
-                
-        .casc_a_in      (WORD_EXT_ZERO),
-        .casc_b_in      (WORD_ZERO),
-                
-        .casc_a_out     (casc_a_debug_direct),
-        .casc_b_out     (casc_b_debug_direct)
-    );
-            
-    modexpng_dsp_slice_wrapper_xilinx #
-    (
-        .AB_INPUT("CASCADE"),
-        .B_REG(1)
-    )
-    dsp_debug_cascade
-    (
-        .clk            (clk),
-                
-        .ce_a1          (ce_a1),
-        .ce_b1          (1'b0),
-        .ce_a2          (ce_a2),
-        .ce_b2          (ce_b1),
-        .ce_m           (ce_m),
-        .ce_p           (ce_p),
-        .ce_mode        (ce_mode),
-                
-        .a              (a[0 +: 18]),
-        .b              (b),
-        .p              (p_debug_cascade),
-                
-        .inmode         ({DSP48E1_INMODE_W{1'b0}}),
-        .opmode         ({1'b0, mode_z[1], 1'b0, 2'b01, 2'b01}),
-        .alumode        ({DSP48E1_ALUMODE_W{1'b0}}),
-                
-        .casc_a_in      (casc_a_debug_direct),
-        .casc_b_in      (casc_b_debug_direct),
-                
-        .casc_a_out     (),
-        .casc_b_out     ()
-    );    
+    end    
     
     genvar z;
     generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 4202066..ba8342f 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -1,6 +1,6 @@
 module modexpng_general_worker
 (
-    clk, rst,
+    clk, rst_n,
     ena, rdy,
     sel_narrow_in, sel_narrow_out,
     sel_wide_in,   sel_wide_out,
@@ -28,7 +28,7 @@ module modexpng_general_worker
     // Ports
     //
     input                                    clk;
-    input                                    rst;
+    input                                    rst_n;
 
     input                                    ena;
     output                                   rdy;
@@ -301,9 +301,9 @@ module modexpng_general_worker
     task enable_narrow_xy_rd_en;  _update_narrow_xy_rd_en(1'b1); endtask
     task disable_narrow_xy_rd_en; _update_narrow_xy_rd_en(1'b0); endtask
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             //
             disable_wide_xy_rd_en;
             disable_narrow_xy_rd_en;
@@ -412,9 +412,9 @@ module modexpng_general_worker
     task enable_narrow_xy_wr_en;  _update_narrow_xy_wr_en(1'b1); endtask
     task disable_narrow_xy_wr_en; _update_narrow_xy_wr_en(1'b0); endtask
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             //
             disable_wide_xy_wr_en;
             disable_narrow_xy_wr_en;
@@ -1007,9 +1007,9 @@ module modexpng_general_worker
     // FSM Process
     //
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
+        if (!rst_n) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
         else case (opcode)
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_OUTPUT_FROM_NARROW,
@@ -1156,9 +1156,9 @@ module modexpng_general_worker
     
     assign rdy = rdy_reg;
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                  rdy_reg <= 1'b1;
+        if (!rst_n)               rdy_reg <= 1'b1;
         else case (wrk_fsm_state)
             WRK_FSM_STATE_IDLE:   rdy_reg <= ~ena;
             WRK_FSM_STATE_STOP:   rdy_reg <= 1'b1;
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
index 6d008aa..622c25e 100644
--- a/rtl/modexpng_io_block.v
+++ b/rtl/modexpng_io_block.v
@@ -1,6 +1,6 @@
 module modexpng_io_block
 (
-    clk, clk_bus, rst,
+    clk, rst_n, clk_bus,
         
     bus_cs,
     bus_we,
@@ -33,8 +33,9 @@ module modexpng_io_block
     // Ports
     //
     input                                         clk;
+    input                                         rst_n;
+
     input                                         clk_bus;
-    input                                         rst;
 
     input                                         bus_cs;
     input                                         bus_we;
@@ -62,9 +63,9 @@ module modexpng_io_block
     reg in_1_reg_en            = 1'b0;
     reg in_2_reg_en            = 1'b0;
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             in_1_reg_en            <= 1'b0;
             in_2_reg_en            <= 1'b0;
         end else begin
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index 5bce191..26661d4 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -1,7 +1,7 @@
 module modexpng_io_manager
 (
     clk,
-    rst,
+    rst_n,
     
     ena,
     rdy,
@@ -75,7 +75,7 @@ module modexpng_io_manager
     // Ports
     //
     input                                    clk;
-    input                                    rst;
+    input                                    rst_n;
 
     input                                    ena;
     output                                   rdy;
@@ -376,9 +376,9 @@ module modexpng_io_manager
     //
     // Source Enable Logic
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             in_1_en <= 1'b0;
             in_2_en <= 1'b0;
         end else case (io_fsm_state_next)
@@ -405,9 +405,9 @@ module modexpng_io_manager
     //
     // Destination Enable Logic
     //    
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             //
             wide_xy_ena_x   <= 1'b0;
             wide_xy_ena_y   <= 1'b0;
@@ -556,7 +556,7 @@ module modexpng_io_manager
     
     assign in_1_addr_op_next_is_last  = in_1_addr_op_next  == word_index_last;
     assign in_2_addr_op_next_is_last  = in_2_addr_op_next  == word_index_last;
-    assign in_2_addr_op_next_is_one   = in_2_addr_op_next  == OP_ADDR_ONE;
+//  assign in_2_addr_op_next_is_one   = in_2_addr_op_next  == OP_ADDR_ONE;
     assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; 
     
     always @(posedge clk) begin
@@ -635,10 +635,10 @@ module modexpng_io_manager
     //
     // FSM Process
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) io_fsm_state <= IO_FSM_STATE_IDLE;
-        else     io_fsm_state <= io_fsm_state_next;
+        if (!rst_n) io_fsm_state <= IO_FSM_STATE_IDLE;
+        else        io_fsm_state <= io_fsm_state_next;
     
     
     //
@@ -692,9 +692,9 @@ module modexpng_io_manager
     
     assign rdy = rdy_reg;
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                 rdy_reg <= 1'b1;
+        if (!rst_n)              rdy_reg <= 1'b1;
         else case (io_fsm_state)
             IO_FSM_STATE_IDLE:   rdy_reg <= ~ena;
             IO_FSM_STATE_STOP:   rdy_reg <= 1'b1;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 13a8773..a868362 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -1,6 +1,6 @@
 module modexpng_mmm_dual
 (
-    clk, rst,
+    clk, rst_n,
     
     ena, rdy,
         
@@ -56,16 +56,14 @@ module modexpng_mmm_dual
     // Headers
     //
     `include "modexpng_parameters.vh"
-    `include "../rtl_1/modexpng_mmm_fsm_old.vh"
-    //`include "../rtl_1/modexpng_parameters_old.vh"
-    //`include "../rtl_1/modexpng_parameters_x8_old.vh"
+    `include "modexpng_mmm_dual_fsm.vh"
 
 
     //
     // Ports
     //
     input                        clk;
-    input                        rst;
+    input                        rst_n;
     
     input                        ena;
     output                       rdy;
@@ -122,23 +120,23 @@ module modexpng_mmm_dual
     //
     // FSM Declaration
     //
-    reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
-    reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
+    reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE;
+    reg [MMM_FSM_STATE_W-1:0] fsm_state_next;
 
-    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;    
-    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
-    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
-    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
-    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_square_holdoff;
+    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_idle;    
+    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square;
+    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle;
+    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle;
+    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_square_holdoff;
 
     
     //
     // FSM Process
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) fsm_state <= FSM_STATE_IDLE;
-        else     fsm_state <= fsm_state_next;
+        if (!rst_n) fsm_state <= MMM_FSM_STATE_IDLE;
+        else        fsm_state <= fsm_state_next;
 
         
     //
@@ -193,9 +191,9 @@ module modexpng_mmm_dual
         //
         case (fsm_state_next)
             //
-            FSM_STATE_MULT_SQUARE_COL_0_INIT,
-            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-            FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
                 col_index       <= 5'd0;
                 col_index_last  <= word_index_last[7:3];
                 col_index_next  <= 5'd1;
@@ -203,9 +201,9 @@ module modexpng_mmm_dual
                 
             end
             //
-            FSM_STATE_MULT_SQUARE_COL_N_INIT,
-            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-            FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
                 col_index <= col_index_next;
                 col_is_last <= col_index_next == col_index_last;
                 col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;   
@@ -242,8 +240,8 @@ module modexpng_mmm_dual
         //
         case (fsm_state)
             //
-            FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY:
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:
                 square_almost_done_flop <= square_almost_done_comb;
             //
             default:
@@ -262,8 +260,8 @@ module modexpng_mmm_dual
         //
         case (fsm_state)
             //
-            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
                 triangle_almost_done_flop <= triangle_almost_done_comb;
             //
             default:
@@ -283,8 +281,8 @@ module modexpng_mmm_dual
         //
         case (fsm_state)
             //
-            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
                 rectangle_almost_done_flop <= rectangle_almost_done_comb;
             //
             default:
@@ -301,36 +299,36 @@ module modexpng_mmm_dual
     //
     // Narrow Storage Control Logic
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) narrow_xy_ena <= 1'b0;
+        if (!rst_n) narrow_xy_ena <= 1'b0;
         else begin
             //
             // Narrow Address
             //
             case (fsm_state_next)
                 //
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT:   narrow_xy_addr <= 8'd0;
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   narrow_xy_addr <= 8'd0;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
                 //
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
                     8'd0 :  narrow_xy_addr + 1'b1;
                 //
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
                     8'd1 :  narrow_xy_addr + 1'b1;            
                 //
                 default:                            narrow_xy_addr <= 8'dX;
@@ -341,27 +339,27 @@ module modexpng_mmm_dual
             //
             case (fsm_state_next)
                 //
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_bank <= sel_narrow_in;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_bank <= sel_narrow_in;
                 //
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
                     BANK_NARROW_EXT : BANK_NARROW_COEFF;
                 //
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
                     BANK_NARROW_EXT : BANK_NARROW_Q;            
                 //
                 default:                            narrow_xy_bank <= 2'bXX;
@@ -370,24 +368,24 @@ module modexpng_mmm_dual
             //
             case (fsm_state_next)
                 //
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_ena <= ~square_almost_done_flop;
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   narrow_xy_ena <= ~rectangle_surely_done_flop;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_ena <= ~square_almost_done_flop;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   narrow_xy_ena <= ~rectangle_surely_done_flop;
                 //
                 default:                              narrow_xy_ena <= 1'b0;
                 //
@@ -420,9 +418,9 @@ module modexpng_mmm_dual
     endfunction
     
     integer j;
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             wide_xy_ena <= 1'b0;
             wide_xy_ena_aux <= 1'b0;
         end else begin
@@ -435,26 +433,26 @@ module modexpng_mmm_dual
                     //
                     // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
                     //
-                    FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                    FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                    FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                    FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+                    MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                    MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                    MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                    MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
                     //
-                    FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                    FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                    FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                    FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
                     //
-                    FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
-                    FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                    FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                    FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                    FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
                     //
                     default:                            wide_xy_addr[j] <= 8'dX;
                 endcase
@@ -465,27 +463,27 @@ module modexpng_mmm_dual
                 //
                 // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
                 //
-                FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
                 //
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
                 //
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                FSM_STATE_MULT_RECTANGLE_HOLDOFF:    wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
                 //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
                     //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
                 //
@@ -495,49 +493,49 @@ module modexpng_mmm_dual
             // Wide Bank
             //
             case (fsm_state_next)
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:    wide_xy_bank <= sel_wide_in;
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  wide_xy_bank <= BANK_WIDE_L;
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_L;
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_N;            
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:    wide_xy_bank <= sel_wide_in;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  wide_xy_bank <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_N;            
                 default:                             wide_xy_bank <= 3'bXXX;
             endcase
             //
             // Wide Aux Bank
             //
             case (fsm_state_next)
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_bank_aux <= sel_wide_in;
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank_aux <= BANK_WIDE_L;
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                FSM_STATE_MULT_RECTANGLE_HOLDOFF:   if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_bank_aux <= sel_wide_in;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank_aux <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
                     case (rcmb_xy_bank)
                         BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
                         BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
@@ -551,43 +549,43 @@ module modexpng_mmm_dual
             // Wide Enable
             //
             case (fsm_state_next)
-                FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                FSM_STATE_MULT_SQUARE_COL_N_INIT,
-                FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-                FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                FSM_STATE_MULT_SQUARE_COL_N_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_ena <= 1'b1;
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_ena <= 1'b1;
                 default:                               wide_xy_ena <= 1'b0;
             endcase
             //
             // Wide Aux Enable
             //
             case (fsm_state_next)
-                FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
-                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  wide_xy_ena_aux <= 1'b1;
-                FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
-                FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
-                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                FSM_STATE_MULT_RECTANGLE_HOLDOFF:   wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  wide_xy_ena_aux <= 1'b1;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
                 default:                            wide_xy_ena_aux <= 1'b0;
             endcase
             //
@@ -686,9 +684,9 @@ module modexpng_mmm_dual
     reg narrow_xy_ena_dly1 = 1'b0;
     reg narrow_xy_ena_dly2 = 1'b0;
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             //
             narrow_xy_ena_dly1 <= 1'b0;
             narrow_xy_ena_dly2 <= 1'b0;
@@ -721,13 +719,13 @@ module modexpng_mmm_dual
     always @(posedge clk) begin
         //
         case (fsm_state)
-            FSM_STATE_MULT_SQUARE_COL_0_TRIG:   dsp_merge_xy_b <= 1'b1;
-            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG:   dsp_merge_xy_b <= 1'b1;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
         endcase
         //
         case (fsm_state)
-            FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1;
             default:                          dsp_merge_xy_b_first <= 1'b0;
         endcase
         //
@@ -815,18 +813,18 @@ module modexpng_mmm_dual
     always @(posedge clk)
         //
         case (fsm_state_next)
-            FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            FSM_STATE_MULT_SQUARE_COL_N_TRIG:       dsp_xy_mode_z_adv4 <= {9{1'b0}};
-            FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
-            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
-            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
-            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
-            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:       dsp_xy_mode_z_adv4 <= {9{1'b0}};
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
             default:                                dsp_xy_mode_z_adv4 <= {9{1'b1}};
         endcase
 
@@ -851,7 +849,7 @@ module modexpng_mmm_dual
     modexpng_recombinator_block recombinator_block
     (
         .clk                            (clk),
-        .rst                            (rst),
+        .rst_n                          (rst_n),
         
         .ena                            (rcmb_ena),
         .rdy                            (rcmb_rdy),
@@ -894,10 +892,10 @@ module modexpng_mmm_dual
     //
     // Recombinator Enable Logic
     //    
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) rcmb_ena <= 1'b0;
-        else     rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
+        if (!rst_n) rcmb_ena <= 1'b0;
+        else        rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
 
         
     //
@@ -911,55 +909,55 @@ module modexpng_mmm_dual
     //
     // FSM Transition Logic
     //
-    assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
-    assign fsm_state_after_mult_square    = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
-    assign fsm_state_after_mult_triangle  = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
-    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
-    assign fsm_state_after_square_holdoff = just_multiply ? FSM_STATE_STOP : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
+    assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
+    assign fsm_state_after_mult_square    = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF   : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT;
+    assign fsm_state_after_mult_triangle  = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+    assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+    assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
      
 
     always @* begin
         //
-        fsm_state_next = FSM_STATE_IDLE;
+        fsm_state_next = MMM_FSM_STATE_IDLE;
         //
         case (fsm_state)
-            FSM_STATE_IDLE:                   fsm_state_next = ena                   ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE;
+            MMM_FSM_STATE_IDLE:                   fsm_state_next = ena                   ? fsm_state_after_idle /*MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : MMM_FSM_STATE_IDLE;
                         
-            FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
-            FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
-            FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
             
-            FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
-            FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square    : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square    : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
             
-            FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? fsm_state_after_square_holdoff : FSM_STATE_MULT_SQUARE_HOLDOFF;
+            MMM_FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
 
-            FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
-            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
-            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
             
-            FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
-            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
             
-            FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+            MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
 
-            FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
-            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
-            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
             
-            FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
-            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
-            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
             
-            FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? FSM_STATE_WAIT_REDUCTOR : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+            MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
             
-            FSM_STATE_WAIT_REDUCTOR:             fsm_state_next =                         rdct_rdy ? FSM_STATE_STOP : FSM_STATE_WAIT_REDUCTOR;
+            MMM_FSM_STATE_WAIT_REDUCTOR:             fsm_state_next =                         rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR;
             
-            FSM_STATE_STOP:                     fsm_state_next =                         FSM_STATE_IDLE                   ;
+            MMM_FSM_STATE_STOP:                     fsm_state_next =                         MMM_FSM_STATE_IDLE                   ;
             
-            default:                             fsm_state_next =                         FSM_STATE_IDLE                   ;
+            default:                             fsm_state_next =                         MMM_FSM_STATE_IDLE                   ;
 
         endcase
         //
@@ -973,11 +971,11 @@ module modexpng_mmm_dual
 
     assign rdct_ena = rdct_ena_reg; 
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                                rdct_ena_reg <= 1'b0;
+        if (!rst_n)                                rdct_ena_reg <= 1'b0;
         else case (fsm_state)
-           FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
+           MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
            default:                             rdct_ena_reg <= 1'b0;
         endcase
     
@@ -989,12 +987,12 @@ module modexpng_mmm_dual
     
     assign rdy = rdy_reg;
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) rdy_reg <= 1'b1;
+        if (!rst_n) rdy_reg <= 1'b1;
         else begin
             if (rdy && ena) rdy_reg <= 1'b0;
-            if (!rdy && (fsm_state == FSM_STATE_STOP)) rdy_reg <= 1'b1; 
+            if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1; 
         end
     
 endmodule
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_dual_fsm.vh
similarity index 59%
rename from rtl/modexpng_mmm_fsm.vh
rename to rtl/modexpng_mmm_dual_fsm.vh
index 1c2a57b..a0e5efc 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_dual_fsm.vh
@@ -1,43 +1,47 @@
-localparam MMM_FSM_STATE_W = 32;
+`define MMM_FSM_STATE_W_DEF 6
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE                      = 0;
+localparam MMM_FSM_STATE_W = `MMM_FSM_STATE_W_DEF;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_IDLE                      = `MMM_FSM_STATE_W_DEF'd00;
     
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1               = 1;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2               = 2;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3               = 3;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_1               = `MMM_FSM_STATE_W_DEF'd01;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_2               = `MMM_FSM_STATE_W_DEF'd02;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_T1T2_3               = `MMM_FSM_STATE_W_DEF'd03;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1           = 4;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2           = 5;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3           = 6;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_1           = `MMM_FSM_STATE_W_DEF'd04;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_2           = `MMM_FSM_STATE_W_DEF'd05;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_LOAD_NN_COEFF_3           = `MMM_FSM_STATE_W_DEF'd06;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT    = 11;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG    = 12;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY    = 13;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT    = `MMM_FSM_STATE_W_DEF'd11;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG    = `MMM_FSM_STATE_W_DEF'd12;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY    = `MMM_FSM_STATE_W_DEF'd13;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    = 14;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG    = 15;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY    = 16;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    = `MMM_FSM_STATE_W_DEF'd14;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG    = `MMM_FSM_STATE_W_DEF'd15;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY    = `MMM_FSM_STATE_W_DEF'd16;
     
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF       = 17;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_SQUARE_HOLDOFF       = `MMM_FSM_STATE_W_DEF'd17;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  = 21;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG  = 22;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY  = 23;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  = `MMM_FSM_STATE_W_DEF'd21;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG  = `MMM_FSM_STATE_W_DEF'd22;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY  = `MMM_FSM_STATE_W_DEF'd23;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  = 24;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG  = 25;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY  = 26;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  = `MMM_FSM_STATE_W_DEF'd24;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG  = `MMM_FSM_STATE_W_DEF'd25;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY  = `MMM_FSM_STATE_W_DEF'd26;
     
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF     = 27;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF     = `MMM_FSM_STATE_W_DEF'd27;
+
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = `MMM_FSM_STATE_W_DEF'd31;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = `MMM_FSM_STATE_W_DEF'd32;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = `MMM_FSM_STATE_W_DEF'd33;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = `MMM_FSM_STATE_W_DEF'd34;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = `MMM_FSM_STATE_W_DEF'd35;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = `MMM_FSM_STATE_W_DEF'd36;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF    = `MMM_FSM_STATE_W_DEF'd37;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF    = 37;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_WAIT_REDUCTOR             = `MMM_FSM_STATE_W_DEF'd41;
 
-localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP                      = 40;
+localparam [MMM_FSM_STATE_W-1:0] MMM_FSM_STATE_STOP                      = `MMM_FSM_STATE_W_DEF'd51;
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 7d2837d..5320c1b 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -9,7 +9,7 @@
 //
 
 /* Only define this for simulation, must be turned off to synthesize properly! */
-`define MODEXPNG_ENABLE_DEBUG
+//`define MODEXPNG_ENABLE_DEBUG
 
 //
 // Core Settings
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index f698c18..7a9154a 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -1,6 +1,6 @@
 module modexpng_recombinator_block
 (
-    clk, rst,
+    clk, rst_n,
     ena, rdy,
     fsm_state_next,
     word_index_last,
@@ -18,15 +18,14 @@ module modexpng_recombinator_block
     // Headers
     //
     `include "modexpng_parameters.vh"
-    `include "../rtl_1/modexpng_mmm_fsm_old.vh"
-    //`include "../rtl_1/modexpng_parameters_x8_old.vh"
+    `include "modexpng_mmm_dual_fsm.vh"
 
 
     input                        clk;
-    input                        rst;
+    input                        rst_n;
     input                        ena;
     output                       rdy;
-    input  [FSM_STATE_WIDTH-1:0] fsm_state_next;
+    input  [MMM_FSM_STATE_W-1:0] fsm_state_next;
     input [7:0]                  word_index_last;
     input                        dsp_xy_ce_p;
     input  [9*47-1:0] dsp_x_p;
@@ -83,10 +82,10 @@ module modexpng_recombinator_block
     //
     reg dsp_xy_ce_p_dly1 = 1'b0;
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) dsp_xy_ce_p_dly1 <= 1'b0;
-        else     dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
+        if (!rst_n) dsp_xy_ce_p_dly1 <= 1'b0;
+        else        dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
 
 
     //
@@ -144,9 +143,9 @@ module modexpng_recombinator_block
        if (ena)
            //
            case (fsm_state_next)
-               FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
-               FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
-               FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
+               MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
+               MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
+               MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
                default:                                 rcmb_mode <= 2'd0;
            endcase
 
@@ -583,7 +582,7 @@ module modexpng_recombinator_block
             rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
         //
         if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1])
-            rcmb_xy_msb_ce_purge = 2'b11;
+            rcmb_xy_msb_ce_purge <= 2'b11;
         else
             rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
         //
@@ -628,10 +627,10 @@ module modexpng_recombinator_block
         //
         case (fsm_state_next)
             //
-            FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            FSM_STATE_MULT_SQUARE_COL_N_TRIG,
-            FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
                 //
                 xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                 xy_aux_lsb_adv   [6] <= 1'b0;
@@ -645,10 +644,10 @@ module modexpng_recombinator_block
                 //
             end
             //
-            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
-            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
                 //
                 xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                 xy_aux_lsb_adv   [6] <= calc_triangle_aux_lsb   (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
@@ -662,10 +661,10 @@ module modexpng_recombinator_block
                 //
             end
             //
-            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
-            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
                 //
                 xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
                 xy_aux_lsb_adv   [6] <= 1'b0;
@@ -772,9 +771,9 @@ module modexpng_recombinator_block
     reg rcmb_xy_lsb_valid = 1'b0;
     reg rcmb_xy_msb_valid = 1'b0;
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             rcmb_xy_lsb_valid <= 1'b0;
             rcmb_xy_msb_valid <= 1'b0;        
         end else begin
diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index c9de32d..9a95f55 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -1,6 +1,6 @@
 module modexpng_reductor
 (
-    clk, rst,
+    clk, rst_n,
     ena, rdy,
     word_index_last,
     sel_wide_out, sel_narrow_out,
@@ -14,13 +14,10 @@ module modexpng_reductor
     // Headers
     //
     `include "modexpng_parameters.vh"
-    //`include "../rtl_1/modexpng_mmm_fsm.vh"
-    
-    //`include "../rtl_1/modexpng_parameters_x8.vh"
 
 
     input                        clk;
-    input                        rst;
+    input                        rst_n;
     input                        ena;
     output                       rdy;
     /*
@@ -70,11 +67,11 @@ module modexpng_reductor
 
     assign rdy = rdy_reg;
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) rdy_reg <= 1'b1;
+        if (!rst_n)                rdy_reg <= 1'b1;
         else begin
-            if (rdy && ena) rdy_reg <= 1'b0;
+            if (rdy && ena)        rdy_reg <= 1'b0;
             if (!rdy && !busy_now) rdy_reg <= 1'b1;
         end
         
@@ -103,9 +100,9 @@ module modexpng_reductor
     reg [17:0] rcmb_y_dout_dly2;
     reg [17:0] rcmb_y_dout_dly3;
     
-    always @(posedge clk) 
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             rcmb_xy_valid_dly1 <= 1'b0;
             rcmb_xy_valid_dly2 <= 1'b0;
             rcmb_xy_valid_dly3 <= 1'b0;
@@ -283,9 +280,9 @@ module modexpng_reductor
     //
     //
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             clear_rdct_wide;
             clear_rdct_narrow;
         end else begin
@@ -325,17 +322,17 @@ module modexpng_reductor
 
     assign busy_now = busy_now_shreg[2];
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) busy_now_shreg <= {3{1'b0}};
+        if (!rst_n)         busy_now_shreg <= {3{1'b0}};
         else begin
             if (rdy && ena) busy_now_shreg <= {3{1'b1}};
             else            busy_now_shreg <= {busy_now_shreg[1:0], busy_next};
         end
     
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) busy_next <= 1'b0;
+        if (!rst_n) busy_next <= 1'b0;
         else begin
             if (rdy && ena) busy_next <= 1'b1;
             if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0;    
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index 5a03b24..4b81e0b 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -1,6 +1,6 @@
 module modexpng_storage_block
 (
-    clk, rst,
+    clk, rst_n,
     
     wr_wide_xy_ena,
     wr_wide_xy_bank,
@@ -55,7 +55,7 @@ module modexpng_storage_block
     // Ports
     //
     input                                         clk;
-    input                                         rst;
+    input                                         rst_n;
 
     input                                         wr_wide_xy_ena;
     input  [                  BANK_ADDR_W   -1:0] wr_wide_xy_bank;
@@ -108,9 +108,9 @@ module modexpng_storage_block
     reg wrk_wide_xy_reg_ena    = 1'b0;
     reg wrk_narrow_xy_reg_ena  = 1'b0;
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             rd_wide_xy_reg_ena     <= 1'b0;
             rd_wide_xy_reg_ena_aux <= 1'b0;
             rd_narrow_xy_reg_ena   <= 1'b0;
diff --git a/rtl/modexpng_storage_manager.v b/rtl/modexpng_storage_manager.v
index c39e07a..eb4da4c 100644
--- a/rtl/modexpng_storage_manager.v
+++ b/rtl/modexpng_storage_manager.v
@@ -1,6 +1,6 @@
 module modexpng_storage_manager
 (
-    clk, rst,
+    clk, rst_n,
     
     wr_wide_xy_ena,      wr_wide_xy_bank,     wr_wide_xy_addr,     wr_wide_x_dout,    wr_wide_y_dout,
     wr_narrow_xy_ena,    wr_narrow_xy_bank,   wr_narrow_xy_addr,   wr_narrow_x_dout,  wr_narrow_y_dout,    
@@ -29,7 +29,7 @@ module modexpng_storage_manager
     // Ports
     //
     input                     clk;
-    input                     rst;
+    input                     rst_n;
 
     output                    wr_wide_xy_ena;
     output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
@@ -173,9 +173,9 @@ module modexpng_storage_manager
     //
     // Write Arbiter
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                         disable_wide;
+        if (!rst_n)                      disable_wide;
         else begin
             //
             if      (io_wide_xy_ena)     enable_wide(io_wide_xy_bank,   io_wide_xy_addr,   io_wide_x_din,   io_wide_y_din);
@@ -190,9 +190,9 @@ module modexpng_storage_manager
     //
     // Read Arbiter
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                           disable_narrow;
+        if (!rst_n)                        disable_narrow;
         else begin
             //
             if      (io_narrow_xy_ena)     enable_narrow(io_narrow_xy_bank,   io_narrow_xy_addr,   io_narrow_x_din,   io_narrow_y_din);
diff --git a/rtl/modexpng_uop_engine.v b/rtl/modexpng_uop_engine.v
index 8ad2122..c7b064a 100644
--- a/rtl/modexpng_uop_engine.v
+++ b/rtl/modexpng_uop_engine.v
@@ -1,7 +1,7 @@
 module modexpng_uop_engine
 (
     clk,
-    rst,
+    rst_n,
 
     ena,
     rdy,
@@ -83,7 +83,7 @@ module modexpng_uop_engine
     // Ports
     //
     input                      clk;
-    input                      rst;
+    input                      rst_n;
     
     input                      ena;
     output                     rdy;
@@ -369,9 +369,9 @@ module modexpng_uop_engine
     //
     // UOP Trigger Logic
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) begin
+        if (!rst_n) begin
             io_mgr_ena_r <= 1'b0;
             mmm_ena_x_r  <= 1'b0;
             mmm_ena_y_r  <= 1'b0;
@@ -618,10 +618,10 @@ module modexpng_uop_engine
     //
     // UOP FSM Process
     //
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst) uop_fsm_state <= UOP_FSM_STATE_IDLE;
-        else     uop_fsm_state <= uop_fsm_state_next;
+        if (!rst_n) uop_fsm_state <= UOP_FSM_STATE_IDLE;
+        else        uop_fsm_state <= uop_fsm_state_next;
             
     
     //
@@ -645,9 +645,9 @@ module modexpng_uop_engine
     reg rdy_r = 1'b1;
     assign rdy = rdy_r;
 
-    always @(posedge clk)
+    always @(posedge clk or negedge rst_n)
         //
-        if (rst)                  rdy_r <= 1'b1;
+        if (!rst_n)               rdy_r <= 1'b1;
         else case (uop_fsm_state)
             UOP_FSM_STATE_IDLE:   rdy_r <= ~ena;
             UOP_FSM_STATE_DECODE: rdy_r <= uop_opcode_is_stop;
diff --git a/rtl/modexpng_wrapper.v b/rtl/modexpng_wrapper.v
new file mode 100644
index 0000000..687a963
--- /dev/null
+++ b/rtl/modexpng_wrapper.v
@@ -0,0 +1,393 @@
+//======================================================================
+//
+// Copyright (c) 2019, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpng_wrapper
+(
+    input           clk,
+    input           rst_n,
+    
+    input           clk_core,
+
+    input           cs,
+    input           we,
+
+    input   [11:0]  address,
+    input   [31:0]  write_data,
+    output  [31:0]  read_data
+);
+
+    //
+    // Headers
+    //
+    `include "modexpng_parameters.vh"
+
+
+    //
+    // Address Decoder
+    //
+    localparam ADDR_MSB_WRAP = 2'b00;
+
+    wire [1:0] addr_msb = address[11:10];
+    wire [9:0] addr_lsb = address[ 9: 0];
+    
+    wire addr_msb_is_wrap = addr_msb == ADDR_MSB_WRAP;
+
+
+    //
+    // Register Addresses
+    //
+    localparam ADDR_NAME0         = 10'h000;
+    localparam ADDR_NAME1         = 10'h001;
+    localparam ADDR_VERSION       = 10'h002;
+
+    localparam ADDR_CONTROL       = 10'h008; // {next, init}
+    localparam ADDR_STATUS        = 10'h009; // {valid, ready}
+    localparam ADDR_MODE          = 10'h010; // {crt, dummy}
+    localparam ADDR_MODULUS_BITS  = 10'h011; // number of bits in modulus
+    localparam ADDR_EXPONENT_BITS = 10'h012; // number of bits in exponent
+    localparam ADDR_BANK_BITS     = 10'h013; // largest supported number of bits
+    localparam ADDR_NUM_MULTS     = 10'h014; // number of parallel multipliers
+
+
+    //
+    // Register Bits
+    //
+    
+    // localparam CONTROL_INIT_BIT    = 0; -- not used
+    localparam CONTROL_NEXT_BIT       = 1;
+
+    // localparam STATUS_READY_BIT    = 0; -- hardcoded to always read 1
+    localparam STATUS_VALID_BIT       = 1;
+
+    // localparam MODE_FASTUNSAFE_BIT = 0; - not used
+    localparam MODE_FULLCRT_BIT       = 1;
+
+    
+    //
+    // Default Values
+    //
+    `define MODEXPNG_DEFAULT_NAME0   32'h6D6F6465
+    `define MODEXPNG_DEFAULT_NAME1   32'h78706E67
+    `define MODEXPNG_DEFAULT_VERSION 32'h302E3130
+
+    `define MODEXPNG_DEFAULT_CONTROL 1'b0
+    `define MODEXPNG_DEFAULT_MODE    1'b0
+
+    `define MODEXPNG_DEFAULT_MODULUS_BITS  13'd1024
+    `define MODEXPNG_DEFAULT_EXPONENT_BITS 13'd17
+    
+    
+    //
+    // Handy Values
+    //
+    localparam MIN_OP_W = 2 * NUM_MULTS * WORD_W * 2;
+    localparam MIN_EXP_W = 2 * 2;
+    localparam ZEROES_BIT_INDEX_W = 1 + cryptech_clog2(NUM_MULTS) + cryptech_clog2(WORD_W);
+    
+    
+    //
+    // Register Values
+    //
+    localparam CORE_NAME0   = `MODEXPNG_DEFAULT_NAME0;   // "mode"
+    localparam CORE_NAME1   = `MODEXPNG_DEFAULT_NAME1;   // "xpng"
+    localparam CORE_VERSION = `MODEXPNG_DEFAULT_VERSION; // "0.10"
+
+
+    //
+    // Registers
+    //
+    reg wrap_reg_control     = `MODEXPNG_DEFAULT_CONTROL;
+    reg sync_reg_control     = `MODEXPNG_DEFAULT_CONTROL;
+    reg sync_reg_control_dly = `MODEXPNG_DEFAULT_CONTROL;
+    reg core_reg_control     = `MODEXPNG_DEFAULT_CONTROL;
+    reg wrap_reg_mode        = `MODEXPNG_DEFAULT_MODE;
+    reg sync_reg_mode;
+    reg core_reg_mode;
+        
+    reg [BIT_INDEX_W:ZEROES_BIT_INDEX_W] wrap_modulus_bits_msb;
+    reg [BIT_INDEX_W:                 0] wrap_exponent_bits;
+
+    initial write_modulus_bits(`MODEXPNG_DEFAULT_MODULUS_BITS);
+    initial write_exponent_bits(`MODEXPNG_DEFAULT_EXPONENT_BITS);
+
+    wire sync_reg_control_rising = sync_reg_control & ~sync_reg_control_dly;
+
+
+    //
+    // Wires
+    //
+    reg  wrap_reg_status = 1'b1;
+    reg  sync_reg_status = 1'b1;
+    wire core_reg_status;
+    
+    always @(posedge clk or negedge rst_n)
+        //
+        if (!rst_n) {wrap_reg_status, sync_reg_status} <= {           1'b1,            1'b1};
+        else        {wrap_reg_status, sync_reg_status} <= {sync_reg_status, core_reg_status};
+
+
+    //
+    // Output Mux
+    //
+    reg  [31: 0] wrap_read_data;
+    wire [31: 0] core_read_data;
+    
+    
+    //
+    // Reset Resync
+    //
+    wire core_rst_n;
+    
+    reg [15: 0] core_rst_shreg = {16{1'b0}};
+
+    always @(posedge clk_core or negedge rst_n)
+        //
+        if (!rst_n) core_rst_shreg <= {16{1'b0}};
+        else        core_rst_shreg <= {core_rst_shreg[14:0], 1'b1};
+
+    assign core_rst_n = core_rst_shreg[15];
+    
+
+    //
+    // Parameters Resync
+    //
+    reg [OP_ADDR_W   -1:0] wrap_word_index_last_n;
+    reg [OP_ADDR_W   -1:0] wrap_word_index_last_pq;
+
+    reg [BIT_INDEX_W -1:0] wrap_bit_index_last_n;
+    reg [BIT_INDEX_W -1:0] wrap_bit_index_last_pq;
+    
+    reg [OP_ADDR_W   -1:0] sync_word_index_last_n;
+    reg [OP_ADDR_W   -1:0] sync_word_index_last_pq;
+
+    reg [BIT_INDEX_W -1:0] sync_bit_index_last_n;
+    reg [BIT_INDEX_W -1:0] sync_bit_index_last_pq;
+    
+    reg [OP_ADDR_W   -1:0] core_word_index_last_n;
+    reg [OP_ADDR_W   -1:0] core_word_index_last_pq;
+
+    reg [BIT_INDEX_W -1:0] core_bit_index_last_n;
+    reg [BIT_INDEX_W -1:0] core_bit_index_last_pq;
+    
+    always @(posedge clk_core) begin
+        //
+        sync_reg_mode <= wrap_reg_mode;
+        //
+        sync_word_index_last_n  <= wrap_word_index_last_n;
+        sync_word_index_last_pq <= wrap_word_index_last_pq;
+        //
+        sync_bit_index_last_n  <= wrap_bit_index_last_n;
+        sync_bit_index_last_pq <= wrap_bit_index_last_pq;
+        //
+    end
+    
+    always @(posedge clk_core)
+        //
+        if (sync_reg_control_rising) begin
+            //
+            core_reg_mode <= sync_reg_mode;
+            //
+            core_word_index_last_n  <= sync_word_index_last_n;
+            core_word_index_last_pq <= sync_word_index_last_pq;
+            //
+            core_bit_index_last_n  <= sync_bit_index_last_n;
+            core_bit_index_last_pq <= sync_bit_index_last_pq;
+            //
+        end
+    
+    
+    //
+    // Trigger Logic
+    //    
+    always @(posedge clk_core or negedge rst_n)
+        //
+        if (!rst_n) {sync_reg_control_dly, sync_reg_control} <= {`MODEXPNG_DEFAULT_CONTROL, `MODEXPNG_DEFAULT_CONTROL};
+        else        {sync_reg_control_dly, sync_reg_control} <= {         sync_reg_control,          wrap_reg_control};
+
+    always @(posedge clk_core or negedge rst_n)
+        //
+        if (!rst_n) core_reg_control <= `MODEXPNG_DEFAULT_CONTROL;
+        else        core_reg_control <= sync_reg_control_rising;     
+    
+    
+    //
+    // ModExpNG
+    //
+    modexpng_core_top modexpng
+    (
+        .clk                (clk_core),
+        .clk_bus            (clk),
+        
+        .rst_n              (core_rst_n),
+        
+        .next               (core_reg_control),
+        .valid              (core_reg_status),
+        
+        .crt_mode           (core_reg_mode),
+        
+        .word_index_last_n  (core_word_index_last_n),
+        .word_index_last_pq (core_word_index_last_pq),
+        
+        .bit_index_last_n   (core_bit_index_last_n),
+        .bit_index_last_pq  (core_bit_index_last_pq),
+        
+        .bus_cs             (cs),
+        .bus_we             (we),
+        .bus_addr           (address),
+        .bus_data_wr        (write_data),
+        .bus_data_rd        (core_read_data)
+    );
+
+
+    //
+    // Write Interface
+    //
+    always @(posedge clk or negedge rst_n)
+        //
+        if (!rst_n) begin
+            //
+            wrap_reg_control   <= `MODEXPNG_DEFAULT_CONTROL;
+            wrap_reg_mode      <= `MODEXPNG_DEFAULT_MODE;
+            //
+            write_modulus_bits(`MODEXPNG_DEFAULT_MODULUS_BITS);
+            write_exponent_bits(`MODEXPNG_DEFAULT_EXPONENT_BITS);
+            //
+        end else if (cs && we && addr_msb_is_wrap)
+            //
+            case (addr_lsb)
+                ADDR_CONTROL:       wrap_reg_control <= write_data[CONTROL_NEXT_BIT];
+                ADDR_MODE:          wrap_reg_mode    <= write_data[MODE_FULLCRT_BIT];
+                ADDR_MODULUS_BITS:  write_modulus_bits(write_data[BIT_INDEX_W:0]);
+                ADDR_EXPONENT_BITS: write_exponent_bits(write_data[BIT_INDEX_W:0]);
+            endcase
+
+
+    //
+    // Update modulus width
+    // 
+    function [BIT_INDEX_W:ZEROES_BIT_INDEX_W] fix_modulus_bits;
+       input [BIT_INDEX_W:                 0] width;
+            if      (width < MIN_OP_W) fix_modulus_bits = MIN_OP_W[BIT_INDEX_W:ZEROES_BIT_INDEX_W];
+            else if (width > MAX_OP_W) fix_modulus_bits = MAX_OP_W[BIT_INDEX_W:ZEROES_BIT_INDEX_W];
+            else                       fix_modulus_bits = width   [BIT_INDEX_W:ZEROES_BIT_INDEX_W];
+    endfunction
+    
+    function [OP_ADDR_W-1:                 0] calc_modulus_num_words_n;
+       input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width;
+       calc_modulus_num_words_n = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W){1'b0}}} - 1'b1; // truncates msb
+    endfunction
+    
+    function [OP_ADDR_W-1:                 0] calc_modulus_num_words_pq;
+       input [BIT_INDEX_W:ZEROES_BIT_INDEX_W] width;
+       calc_modulus_num_words_pq = {width, {(ZEROES_BIT_INDEX_W-WORD_MUX_W-1){1'b0}}} - 1'b1; // fits exactly
+    endfunction
+   
+    task write_modulus_bits;
+        input [BIT_INDEX_W:0] width;
+        begin
+            wrap_modulus_bits_msb   <= fix_modulus_bits(width);
+            wrap_word_index_last_n  <= calc_modulus_num_words_n(fix_modulus_bits(width));
+            wrap_word_index_last_pq <= calc_modulus_num_words_pq(fix_modulus_bits(width));
+        end
+    endtask
+    
+    
+    //
+    // Update exponent width 
+    //
+    function [BIT_INDEX_W:0] fix_exponent_bits;
+       input [BIT_INDEX_W:0] width;
+            if      (width < MIN_EXP_W) fix_exponent_bits = MIN_EXP_W;
+            else if (width > MAX_OP_W ) fix_exponent_bits = MAX_OP_W;
+            else                        fix_exponent_bits = width;
+    endfunction
+    
+    function [BIT_INDEX_W-1:0] calc_exponent_num_bits_n;
+       input [BIT_INDEX_W  :0] width;
+       calc_exponent_num_bits_n = width - 1'b1;  // truncates msb
+    endfunction
+    
+    function [BIT_INDEX_W-1:0] calc_exponent_num_bits_pq;
+       input [BIT_INDEX_W:  0] width;
+       calc_exponent_num_bits_pq = width[BIT_INDEX_W:1] - 1'b1; // fits exactly
+    endfunction
+   
+    task write_exponent_bits;
+        input [BIT_INDEX_W:0] width;
+        begin
+            wrap_exponent_bits     <= fix_exponent_bits(width);
+            wrap_bit_index_last_n  <= calc_exponent_num_bits_n(fix_exponent_bits(width));
+            wrap_bit_index_last_pq <= calc_exponent_num_bits_pq(fix_exponent_bits(width));
+        end
+    endtask
+    
+    
+    //
+    // Read Interface
+    //
+    always @(posedge clk)
+        //
+        if (cs && addr_msb_is_wrap)
+            //
+            case (address)
+                //
+                ADDR_NAME0:         wrap_read_data <= CORE_NAME0;
+                ADDR_NAME1:         wrap_read_data <= CORE_NAME1;
+                ADDR_VERSION:       wrap_read_data <= CORE_VERSION;
+                ADDR_CONTROL:       wrap_read_data <= {{30{1'b0}}, wrap_reg_control, 1'b0};
+                ADDR_STATUS:        wrap_read_data <= {{30{1'b0}}, wrap_reg_status,  1'b1};
+                //
+                ADDR_MODE:          wrap_read_data <= {{30{1'b0}}, wrap_reg_mode, 1'b0};
+                ADDR_MODULUS_BITS:  wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_modulus_bits_msb, {ZEROES_BIT_INDEX_W{1'b0}}};
+                ADDR_EXPONENT_BITS: wrap_read_data <= {{(31-BIT_INDEX_W){1'b0}}, wrap_exponent_bits};
+                ADDR_BANK_BITS:     wrap_read_data <= MAX_OP_W;
+                ADDR_NUM_MULTS:     wrap_read_data <= NUM_MULTS;
+                //
+                default:            wrap_read_data <= 32'h00000000;
+                //
+            endcase
+
+
+    //
+    // Register / Core Memory Selector
+    //
+    reg [1:0] addr_msb_last;
+    
+    wire addr_msb_last_is_wrap = addr_msb_last == ADDR_MSB_WRAP;
+    
+    always @(posedge clk)   
+        addr_msb_last <= addr_msb;
+
+    assign read_data = addr_msb_last_is_wrap ? wrap_read_data : core_read_data;
+
+
+endmodule



More information about the Commits mailing list