[Cryptech-Commits] [user/shatov/modexpng] 19/21: Refactored the MMM module, now uses meaningful constant names from the include file, not hardcoded widths.

git at cryptech.is git at cryptech.is
Mon Jan 20 21:18:20 UTC 2020


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit b0cbd33df04f024a7dea928756f4937b79c91631
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Jan 21 00:13:33 2020 +0300

    Refactored the MMM module, now uses meaningful constant names from the include
    file, not hardcoded widths.
---
 rtl/modexpng_mmm_dual.v | 733 +++++++++++++++++++++++++-----------------------
 1 file changed, 380 insertions(+), 353 deletions(-)

diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 8d8b83d..bb1a55c 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -94,72 +94,73 @@ module modexpng_mmm_dual
     //
     // Ports
     //
-    input                        clk;
-    input                        rst_n;
+    input                                      clk;
+    input                                      rst_n;
     
-    input                        ena;
-    output                       rdy;
+    input                                      ena;
+    output                                     rdy;
     
-    input                   ladder_mode;
-    input [7:0] word_index_last;
-    input [7:0] word_index_last_minus1;
-    input       force_unity_b;
-    input only_reduce;
-    input just_multiply;
+    input                                      ladder_mode;
+    input  [  OP_ADDR_W                  -1:0] word_index_last;
+    input  [  OP_ADDR_W                  -1:0] word_index_last_minus1;
+    input                                      force_unity_b;
+    input                                      only_reduce;
+    input                                      just_multiply;
         
-    input [BANK_ADDR_W-1:0] sel_wide_in;
-    input [BANK_ADDR_W-1:0] sel_narrow_in;
+    input  [BANK_ADDR_W                  -1:0] sel_wide_in;
+    input  [BANK_ADDR_W                  -1:0] sel_narrow_in;
     
-    output                     rd_wide_xy_ena;
-    output                     rd_wide_xy_ena_aux;
-    output  [             BANK_ADDR_W -1:0] rd_wide_xy_bank;
-    output  [             BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
-    output  [ 8*NUM_MULTS/2-1:0] rd_wide_xy_addr;
-    output  [           8-1:0] rd_wide_xy_addr_aux;
-    input  [18*NUM_MULTS/2-1:0] rd_wide_x_din;
-    input  [18*NUM_MULTS/2-1:0] rd_wide_y_din;
-    input  [          18-1:0] rd_wide_x_din_aux;
-    input  [          18-1:0] rd_wide_y_din_aux;
+    output                                     rd_wide_xy_ena;
+    output                                     rd_wide_xy_ena_aux;
+    output [BANK_ADDR_W                  -1:0] rd_wide_xy_bank;
+    output [BANK_ADDR_W                  -1:0] rd_wide_xy_bank_aux;
+    
+    output [  OP_ADDR_W * NUM_MULTS_HALF -1:0] rd_wide_xy_addr;
+    output [  OP_ADDR_W                  -1:0] rd_wide_xy_addr_aux;
+    input  [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_x_din;
+    input  [ WORD_EXT_W * NUM_MULTS_HALF -1:0] rd_wide_y_din;    
+    input  [ WORD_EXT_W                  -1:0] rd_wide_x_din_aux;
+    input  [ WORD_EXT_W                  -1:0] rd_wide_y_din_aux;
 
-    output                    rd_narrow_xy_ena;
-    output [             BANK_ADDR_W -1:0] rd_narrow_xy_bank;
-    output [ 7:0] rd_narrow_xy_addr;
-    input  [18-1:0] rd_narrow_x_din;
-    input  [18-1:0] rd_narrow_y_din;
+    output                                     rd_narrow_xy_ena;
+    output [BANK_ADDR_W                  -1:0] rd_narrow_xy_bank;
+    output [  OP_ADDR_W                  -1:0] rd_narrow_xy_addr;
+    input  [ WORD_EXT_W                  -1:0] rd_narrow_x_din;
+    input  [ WORD_EXT_W                  -1:0] rd_narrow_y_din;
 
-    output [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
-    output [ 7:0] rcmb_wide_xy_addr;
-    output [17:0] rcmb_wide_x_dout;
-    output [17:0] rcmb_wide_y_dout;
-    output        rcmb_wide_xy_valid;
+    output [BANK_ADDR_W                  -1:0] rcmb_wide_xy_bank;
+    output [  OP_ADDR_W                  -1:0] rcmb_wide_xy_addr;
+    output [ WORD_EXT_W                  -1:0] rcmb_wide_x_dout;
+    output [ WORD_EXT_W                  -1:0] rcmb_wide_y_dout;
+    output                                     rcmb_wide_xy_valid;
 
-    output [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
-    output [ 7:0] rcmb_narrow_xy_addr;
-    output [17:0] rcmb_narrow_x_dout;
-    output [17:0] rcmb_narrow_y_dout;
-    output        rcmb_narrow_xy_valid;
+    output [BANK_ADDR_W                  -1:0] rcmb_narrow_xy_bank;
+    output [  OP_ADDR_W                  -1:0] rcmb_narrow_xy_addr;
+    output [ WORD_EXT_W                  -1:0] rcmb_narrow_x_dout;
+    output [ WORD_EXT_W                  -1:0] rcmb_narrow_y_dout;
+    output                                     rcmb_narrow_xy_valid;
 
-    output [BANK_ADDR_W -1:0] rcmb_xy_bank;
-    output [ 7:0] rcmb_xy_addr;
-    output [17:0] rcmb_x_dout;
-    output [17:0] rcmb_y_dout;
-    output        rcmb_xy_valid;
+    output [BANK_ADDR_W                  -1:0] rcmb_xy_bank;
+    output [  OP_ADDR_W                  -1:0] rcmb_xy_addr;
+    output [ WORD_EXT_W                  -1:0] rcmb_x_dout;
+    output [ WORD_EXT_W                  -1:0] rcmb_y_dout;
+    output                                     rcmb_xy_valid;
     
-    output        rdct_ena;
-    input         rdct_rdy;
+    output                                     rdct_ena;
+    input                                      rdct_rdy;
 
     
     //
     // FSM Declaration
     //
-    reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE;
-    reg [MMM_FSM_STATE_W-1:0] fsm_state_next;
+    reg  [MMM_FSM_STATE_W -1:0] fsm_state = MMM_FSM_STATE_IDLE;
+    reg  [MMM_FSM_STATE_W -1:0] fsm_state_next;
 
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_idle;    
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square;
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle;
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle;
-    wire [MMM_FSM_STATE_W-1:0] fsm_state_after_square_holdoff;
+    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_idle;    
+    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_square;
+    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_triangle;
+    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_mult_rectangle;
+    wire [MMM_FSM_STATE_W -1:0] fsm_state_after_square_holdoff;
 
     
     //
@@ -174,48 +175,55 @@ module modexpng_mmm_dual
     //
     // Storage Control Interface
     //
-    reg         wide_xy_ena = 1'b0;
-    reg         wide_xy_ena_aux = 1'b0;
-    reg  [ BANK_ADDR_W -1:0] wide_xy_bank;
-    reg  [ BANK_ADDR_W -1:0] wide_xy_bank_aux;
-    reg  [ 8-1:0] wide_xy_addr[0:3];
-    reg  [ 8-1:0] wide_xy_addr_aux;
+    reg                     wide_xy_ena = 1'b0;
+    reg                     wide_xy_ena_aux = 1'b0;
+    reg  [BANK_ADDR_W -1:0] wide_xy_bank;
+    reg  [BANK_ADDR_W -1:0] wide_xy_bank_aux;
+    reg  [  OP_ADDR_W -1:0] wide_xy_addr[0:NUM_MULTS_HALF-1];
+    reg  [  OP_ADDR_W -1:0] wide_xy_addr_aux;
+    
+    reg                     narrow_xy_ena = 1'b0;
+    reg  [BANK_ADDR_W -1:0] narrow_xy_bank;
+    reg  [  OP_ADDR_W -1:0] narrow_xy_addr;
+    reg  [  OP_ADDR_W -1:0] narrow_xy_addr_dly;
+    wire [  OP_ADDR_W -1:0] narrow_xy_addr_inc = narrow_xy_addr + 1'b1;
     
-    reg         narrow_xy_ena = 1'b0;
-    reg  [ BANK_ADDR_W -1:0] narrow_xy_bank;
-    reg  [ 7:0] narrow_xy_addr;
-    reg  [ 7:0] narrow_xy_addr_dly;
     
-    assign rd_wide_xy_ena  = wide_xy_ena;
+    //
+    // Outmap Port Mapping
+    //
+    assign rd_wide_xy_ena      = wide_xy_ena;
     assign rd_wide_xy_ena_aux  = wide_xy_ena_aux;
-    assign rd_wide_xy_bank = wide_xy_bank;
+    assign rd_wide_xy_bank     = wide_xy_bank;
     assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
     assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
 
-    assign rd_narrow_xy_ena  = narrow_xy_ena;
-    assign rd_narrow_xy_bank = narrow_xy_bank;
-    assign rd_narrow_xy_addr = narrow_xy_addr;
+    assign rd_narrow_xy_ena    = narrow_xy_ena;
+    assign rd_narrow_xy_bank   = narrow_xy_bank;
+    assign rd_narrow_xy_addr   = narrow_xy_addr;
 
     genvar z;
-    generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+    generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
         begin : gen_rd_wide_xy_addr
-            assign rd_wide_xy_addr[8*z+:8] = wide_xy_addr[z];
+            assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
         end
     endgenerate
         
+    
     //
     // Column Counter
     //
-    reg  [4:0] col_index;       // current column index
-    reg  [4:0] col_index_prev;  // delayed column index value
-    reg  [4:0] col_index_last;  // index of the very last column
-    reg  [4:0] col_index_next;  // precomputed next column index
-    reg        col_is_last;     // flag set during the very last column
+    reg [COL_INDEX_W -1:0] col_index;      // current column index
+    reg [COL_INDEX_W -1:0] col_index_prev; // delayed column index value
+    reg [COL_INDEX_W -1:0] col_index_last; // index of the very last column
+    reg [COL_INDEX_W -1:0] col_index_next; // precomputed next column index
+    reg                    col_is_last;    // flag set during the very last column
 
     always @(posedge clk)
         //
         col_index_prev <= col_index;
 
+    
     //
     // Column Counter Increment Logic
     //
@@ -226,9 +234,9 @@ module modexpng_mmm_dual
             MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
             MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
             MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
-                col_index       <= 5'd0;
-                col_index_last  <= word_index_last[7:3];
-                col_index_next  <= 5'd1;
+                col_index       <= COL_INDEX_ZERO;
+                col_index_last  <= word_index_last[OP_ADDR_W-1:MAC_INDEX_W];
+                col_index_next  <= COL_INDEX_ONE;
                 col_is_last     <= 1'b0;
                 
             end
@@ -236,9 +244,9 @@ module modexpng_mmm_dual
             MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
             MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
             MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
-                col_index <= col_index_next;
-                col_is_last <= col_index_next == col_index_last;
-                col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;   
+                col_index      <= col_index_next;
+                col_is_last    <= col_index_next == col_index_last;
+                col_index_next <= col_index_next == col_index_last ? COL_INDEX_ZERO : col_index_next + 1'b1;
             end
             //
         endcase
@@ -261,9 +269,10 @@ module modexpng_mmm_dual
     reg  rectangle_surely_done_flop = 1'b0;
     reg  rectangle_tardy_done_flop = 1'b0;
 
-    assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
-    assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index);
+    assign square_almost_done_comb    = narrow_xy_addr == word_index_last_minus1;
+    assign triangle_almost_done_comb  = narrow_xy_addr == {col_index, word_index_last_minus1[MAC_INDEX_W-1:0]};
     assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
+    
 
     //
     // Square Completion Flags
@@ -273,11 +282,9 @@ module modexpng_mmm_dual
         case (fsm_state)
             //
             MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:
-                square_almost_done_flop <= square_almost_done_comb;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: square_almost_done_flop <= square_almost_done_comb;
             //
-            default:
-               square_almost_done_flop <= 1'b0;
+            default:                              square_almost_done_flop <= 1'b0;
            //
         endcase
         //
@@ -293,11 +300,9 @@ module modexpng_mmm_dual
         case (fsm_state)
             //
             MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
-                triangle_almost_done_flop <= triangle_almost_done_comb;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: triangle_almost_done_flop <= triangle_almost_done_comb;
             //
-            default:
-                triangle_almost_done_flop <= 1'b0;
+            default:                                triangle_almost_done_flop <= 1'b0;
             //
         endcase
         //
@@ -314,11 +319,9 @@ module modexpng_mmm_dual
         case (fsm_state)
             //
             MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
-                rectangle_almost_done_flop <= rectangle_almost_done_comb;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: rectangle_almost_done_flop <= rectangle_almost_done_comb;
             //
-            default:
-                rectangle_almost_done_flop <= 1'b0;
+            default:                                 rectangle_almost_done_flop <= 1'b0;
             //
         endcase
         //
@@ -341,29 +344,28 @@ module modexpng_mmm_dual
             case (fsm_state_next)
                 //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   narrow_xy_addr <= 8'd0;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= OP_ADDR_ZERO;
+                //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr_inc : OP_ADDR_ZERO;
                 //
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
-                    8'd0 :  narrow_xy_addr + 1'b1;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ? OP_ADDR_ZERO : narrow_xy_addr_inc;
                 //
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
-                    8'd1 :  narrow_xy_addr + 1'b1;            
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ? OP_ADDR_ONE : narrow_xy_addr_inc;
                 //
-                default:                            narrow_xy_addr <= 8'dX;
+                default: narrow_xy_addr <= OP_ADDR_DNC;
                 //
             endcase
             //
@@ -376,50 +378,53 @@ module modexpng_mmm_dual
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_bank <= sel_narrow_in;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
                 //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
-                    BANK_NARROW_EXT : BANK_NARROW_COEFF;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ? BANK_NARROW_EXT : BANK_NARROW_COEFF;
                 //
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
-                    BANK_NARROW_EXT : BANK_NARROW_Q;            
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ? BANK_NARROW_EXT : BANK_NARROW_Q;            
                 //
-                default:                            narrow_xy_bank <= 2'bXX;
+                default: narrow_xy_bank <= BANK_DNC;
                 //
             endcase        
             //
+            // Narrow Enable
+            //
             case (fsm_state_next)
                 //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   narrow_xy_ena <= ~square_almost_done_flop;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:   narrow_xy_ena <= 1'b1;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
+                //
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
+                //
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop; 
+                //
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   narrow_xy_ena <= ~rectangle_surely_done_flop;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
                 //
-                default:                              narrow_xy_ena <= 1'b0;
+                default: narrow_xy_ena <= 1'b0;
                 //
             endcase
             //
@@ -430,23 +435,19 @@ module modexpng_mmm_dual
     // Wide Storage Control Logic
     //
 
-    wire [2:0] wide_offset_rom[0:3];
+    wire [MAC_INDEX_W-1:0] wide_offset_rom[0:NUM_MULTS_HALF-1];
     
     generate for (z=1; z<NUM_MULTS; z=z+2)
         begin : gen_wide_offset_rom
-            assign wide_offset_rom[(z-1)/2] = z[2:0];
+            assign wide_offset_rom[(z-1)/2] = z[MAC_INDEX_W-1:0];
         end
     endgenerate    
 
-    function  [7:0] wide_xy_addr_next;
-        input [7:0] wide_xy_addr_current;
-        input [7:0] wide_xy_addr_last;
-        begin
-            if (wide_xy_addr_current > 8'd0)
-                wide_xy_addr_next = wide_xy_addr_current - 1'b1;
-            else
-                wide_xy_addr_next = wide_xy_addr_last;
-        end
+    function  [OP_ADDR_W-1:0] wide_xy_addr_next;
+        input [OP_ADDR_W-1:0] wide_xy_addr_current;
+        input [OP_ADDR_W-1:0] wide_xy_addr_last;
+        if (wide_xy_addr_current > OP_ADDR_ZERO) wide_xy_addr_next = wide_xy_addr_current - 1'b1;
+        else                                     wide_xy_addr_next = wide_xy_addr_last;
     endfunction
     
     integer j;
@@ -459,128 +460,143 @@ module modexpng_mmm_dual
             //
             // Wide Address
             //        
-            for (j=0; j<(NUM_MULTS/2); j=j+1)
+            for (j=0; j<NUM_MULTS_HALF; j=j+1)
                 //
                 case (fsm_state_next)
                     //
-                    // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+                    // another way to code this is to extend the look-up table to 8 entries and just use shifts
+                    // instead of subtractions (this requires further research, let's maybe not do it right now)
+                    //
+                    MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {OP_ADDR_ZERO, wide_offset_rom[j]};
+                    //
+                    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
                     //
-                    MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
                     MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                     MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                     MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
-                    //
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
                     MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                     MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                     MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
-                    //
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
+                    MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
                     MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                     MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                     MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+                    MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
+                    //
+                    default: wide_xy_addr[j] <= OP_ADDR_DNC;
                     //
-                    default:                            wide_xy_addr[j] <= 8'dX;
                 endcase
             //
             // Wide Aux Address
             //
             case (fsm_state_next)
                 //
-                // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+                // there's a potentially more efficient way to code the switch (see above) 
+                //
+                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= OP_ADDR_ONE;
                 //
-                MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
-                //
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   wide_xy_addr_aux <= {5'd0, 3'd1};
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
+                //
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= OP_ADDR_DNC;
                 //
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
-                //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
-                    //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : OP_ADDR_DNC;
+                //
+                default: wide_xy_addr_aux <= OP_ADDR_DNC;
                 //
-                default:                            wide_xy_addr_aux <= 8'dX;
             endcase
             //
             // Wide Bank
             //
             case (fsm_state_next)
+                //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:    wide_xy_bank <= sel_wide_in;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
+                //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  wide_xy_bank <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L;
+                //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
+                //
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_bank <= BANK_WIDE_N;            
-                default:                             wide_xy_bank <= 3'bXXX;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
+                //
+                default: wide_xy_bank <= BANK_DNC;
+                //
             endcase
             //
             // Wide Aux Bank
             //
             case (fsm_state_next)
+                //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:   wide_xy_bank_aux <= sel_wide_in;
+                MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
+                //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
+                //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   wide_xy_bank_aux <= BANK_WIDE_L;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
+                //
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
-                    case (rcmb_xy_bank)
-                        BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
-                        BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
-                        //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
-                        default: wide_xy_bank_aux <= 3'bXXX; 
-                     endcase
-                     else wide_xy_bank_aux <= 3'bXXX;
-                default:                            wide_xy_bank_aux <= 3'bXXX;
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: begin
+                    wide_xy_bank_aux <= BANK_DNC;
+                    if (rcmb_xy_valid)
+                        case (rcmb_xy_bank)
+                            BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
+                            BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
+                        endcase
+                    end
+                    //
+                default: wide_xy_bank_aux <= BANK_DNC;
+                //
             endcase
             //
             // Wide Enable
             //
             case (fsm_state_next)
+                //
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
@@ -598,27 +614,34 @@ module modexpng_mmm_dual
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   wide_xy_ena <= 1'b1;
-                default:                               wide_xy_ena <= 1'b0;
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
+                //
+                default: wide_xy_ena <= 1'b0;
+                //
             endcase
             //
             // Wide Aux Enable
             //
             case (fsm_state_next)
+                //
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  wide_xy_ena_aux <= 1'b1;
-                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
+                MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
+                //
+                MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;
+                //
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
                 MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
-                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:   wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
-                default:                            wide_xy_ena_aux <= 1'b0;
+                MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;
+                //
+                default: wide_xy_ena_aux <= 1'b0;
+                //
             endcase
             //
         end
@@ -635,79 +658,66 @@ module modexpng_mmm_dual
     //
     // DSP Array Logic
     //
-    reg             dsp_xy_ce_a = 1'b0;
-    reg             dsp_xy_ce_b = 1'b0;
+    reg             dsp_xy_ce_a     = 1'b0;
+    reg             dsp_xy_ce_b     = 1'b0;
     reg             dsp_xy_ce_b_dly = 1'b0;
-    reg             dsp_xy_ce_m = 1'b0;
-    reg             dsp_xy_ce_p = 1'b0;
-    reg             dsp_xy_ce_mode = 1'b0;
+    reg             dsp_xy_ce_m     = 1'b0;
+    reg             dsp_xy_ce_p     = 1'b0;
+    reg             dsp_xy_ce_mode  = 1'b0;
     
-    reg  [9   -1:0] dsp_xy_mode_z = {9{1'b1}};
+    reg  [             NUM_MULTS_AUX      -1:0] dsp_xy_mode_z = {NUM_MULTS_AUX{1'b1}};
     
-    wire [5*18-1:0] dsp_x_a;
-    wire [5*18-1:0] dsp_y_a;
+    wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_x_a;
+    wire [WORD_EXT_W * NUM_MULTS_HALF_AUX -1:0] dsp_y_a;
 
-    reg  [1*16-1:0] dsp_x_b;
-    reg  [1*16-1:0] dsp_y_b;
+    reg  [WORD_W                          -1:0] dsp_x_b;
+    reg  [WORD_W                          -1:0] dsp_y_b;
     
-    reg  [ 1:0] dsp_xy_b_carry;
+    reg  [CARRY_W                         -1:0] dsp_xy_b_carry;
 
-    wire [9*47-1:0] dsp_x_p;            
-    wire [9*47-1:0] dsp_y_p;
-        
-    //generate for (z=0; z<(NUM_MULTS/2); z=z+1)
-        //begin : gen_dsp_xy_a_split
-            //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z];
-            //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z];
-        //end
-    //endgenerate
+    wire [MAC_W      * NUM_MULTS_AUX      -1:0] dsp_x_p;            
+    wire [MAC_W      * NUM_MULTS_AUX      -1:0] dsp_y_p;
     
     assign dsp_x_a = {rd_wide_x_din_aux, rd_wide_x_din};
     assign dsp_y_a = {rd_wide_y_din_aux, rd_wide_y_din};
-    
-    //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
-    //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
             
     always @(posedge clk)
         //
         dsp_xy_ce_b_dly <= dsp_xy_ce_b;
-    
 
     modexpng_dsp_array_block dsp_array_block_x
     (
-        .clk            (clk),
+        .clk        (clk),
         
-        .ce_a           (dsp_xy_ce_a),
-        .ce_b           (dsp_xy_ce_b),
-        .ce_m           (dsp_xy_ce_m),
-        .ce_p           (dsp_xy_ce_p),
-        .ce_mode        (dsp_xy_ce_mode),
+        .ce_a       (dsp_xy_ce_a),
+        .ce_b       (dsp_xy_ce_b),
+        .ce_m       (dsp_xy_ce_m),
+        .ce_p       (dsp_xy_ce_p),
+        .ce_mode    (dsp_xy_ce_mode),
 
-        .mode_z         (dsp_xy_mode_z),
+        .mode_z     (dsp_xy_mode_z),
         
-        .a              (dsp_x_a),
-        .b              (dsp_x_b),
-        .p              (dsp_x_p)
+        .a          (dsp_x_a),
+        .b          (dsp_x_b),
+        .p          (dsp_x_p)
     );
 
     modexpng_dsp_array_block dsp_array_block_y
     (
-        .clk            (clk),
+        .clk        (clk),
         
-        .ce_a           (dsp_xy_ce_a),
-        .ce_b           (dsp_xy_ce_b),
-        .ce_m           (dsp_xy_ce_m),
-        .ce_p           (dsp_xy_ce_p),
-        .ce_mode        (dsp_xy_ce_mode),
+        .ce_a       (dsp_xy_ce_a),
+        .ce_b       (dsp_xy_ce_b),
+        .ce_m       (dsp_xy_ce_m),
+        .ce_p       (dsp_xy_ce_p),
+        .ce_mode    (dsp_xy_ce_mode),
 
-        .mode_z         (dsp_xy_mode_z),
+        .mode_z     (dsp_xy_mode_z),
         
-        .a              (dsp_y_a),
-        .b              (dsp_y_b),
-        .p              (dsp_y_p)
+        .a          (dsp_y_a),
+        .b          (dsp_y_b),
+        .p          (dsp_y_p)
     );
-
-
    
 
     //
@@ -742,6 +752,7 @@ module modexpng_mmm_dual
             //
         end    
         
+    
     //
     // DSP Feed Logic
     //
@@ -758,76 +769,87 @@ module modexpng_mmm_dual
         case (fsm_state)
             MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
             MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_merge_xy_b_first <= 1'b1;
-            default:                          dsp_merge_xy_b_first <= 1'b0;
+            default:                              dsp_merge_xy_b_first <= 1'b0;
         endcase
         //
     end
 
+    
     //
-    // On-the-fly Carry Recombination
+    // On-the-fly Carry Recombination Logic
     //
-    wire [17:0] rd_narrow_x_din_carry = rd_narrow_x_din + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_y_din_carry = rd_narrow_y_din + {{16{1'b0}}, dsp_xy_b_carry};
-    wire [17:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
+    wire [WORD_EXT_W-1:0] rd_narrow_x_din_carry = rd_narrow_x_din + {WORD_ZERO, dsp_xy_b_carry};
+    wire [WORD_EXT_W-1:0] rd_narrow_y_din_carry = rd_narrow_y_din + {WORD_ZERO, dsp_xy_b_carry};
+    wire [WORD_EXT_W-1:0] rd_narrow_xy_din_carry_mux = ladder_mode ? rd_narrow_y_din_carry : rd_narrow_x_din_carry;
   
-    wire [15:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
-        rd_narrow_xy_din_carry_mux[15:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
+    wire [WORD_W-1:0] rd_narrow_xy_dout_carry_mux_or_unity = !force_unity_b ?
+        rd_narrow_xy_din_carry_mux[WORD_W-1:0] : dsp_merge_xy_b_first ? WORD_ONE : WORD_ZERO;
   
-    always @(posedge clk)
+    always @(posedge clk) begin
         //
-        if (narrow_xy_ena_dly2) begin // rewrite
+        dsp_x_b <= WORD_DNC;
+        dsp_y_b <= WORD_DNC;
+        //
+        dsp_xy_b_carry <= CARRY_ZERO;
+        //
+        if (narrow_xy_ena_dly2) begin
             //
             if (!dsp_merge_xy_b) begin
-                dsp_x_b <= rd_narrow_x_din[15:0];
-                dsp_y_b <= rd_narrow_y_din[15:0];
-                dsp_xy_b_carry <= 2'b00;
+                //
+                dsp_x_b <= rd_narrow_x_din[WORD_W-1:0];
+                dsp_y_b <= rd_narrow_y_din[WORD_W-1:0];
+                //
+                dsp_xy_b_carry <= CARRY_ZERO;
+                //
             end else begin
+                //
                 dsp_x_b <= rd_narrow_xy_dout_carry_mux_or_unity;
                 dsp_y_b <= rd_narrow_xy_dout_carry_mux_or_unity;
-                dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[17:16];
+                //
+                dsp_xy_b_carry <= rd_narrow_xy_din_carry_mux[WORD_EXT_W-1:WORD_W];
+                //
             end                 
             //
-        end else begin
-            //
-            dsp_x_b <= WORD_DNC;
-            dsp_y_b <= WORD_DNC;
-            //
-            dsp_xy_b_carry <= 2'b00;
-            //
         end
+        //
+    end
 
+
+    //
+    // DSP Mode Logic
+    //
+    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv1 = {NUM_MULTS_AUX{1'b1}};
+    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv2 = {NUM_MULTS_AUX{1'b1}};
+    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv3 = {NUM_MULTS_AUX{1'b1}};
+    reg  [NUM_MULTS_AUX -1:0] dsp_xy_mode_z_adv4 = {NUM_MULTS_AUX{1'b1}};
         
-    reg  [9   -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
-    reg  [9   -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
-        
-         function  [NUM_MULTS:0] calc_mac_mode_z_square;
-        input [        4:0] col_index_value;
-        input [        7:0] narrow_xy_addr_value;
-        begin
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
-                    3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
-                    3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
-                    3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
-                    3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
-                    3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
-                    3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
-                    3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
-                endcase
-            else
-                calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
-        end
+    function  [NUM_MULTS_AUX     -1:0] calc_mac_mode_z_square;
+        //
+        input [NUM_MULTS_HALF_AUX-1:0] col_index_value;
+        input [OP_ADDR_W         -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
+            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
+                3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+                3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+                3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+                3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+                3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+                3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+                3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+                3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+            endcase
+        else            calc_mac_mode_z_square = {1'b1, 8'b11111111};
     endfunction
     
-    function  [NUM_MULTS:0] calc_mac_mode_z_rectangle;
-        input [        4:0] col_index_value;
-        input [        7:0] narrow_xy_addr_value;
+    function  [NUM_MULTS_AUX      -1:0] calc_mac_mode_z_rectangle;
+        //
+        input [NUM_MULTS_HALF_AUX -1:0] col_index_value;
+        input [OP_ADDR_W          -1:0] narrow_xy_addr_value;
+        //
         begin
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                case (narrow_xy_addr_value[2:0])
+            if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
+                case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
                     3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
                     3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
                     3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
@@ -837,41 +859,45 @@ module modexpng_mmm_dual
                     3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
                     3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
                 endcase
-            else
-                calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
+            else            calc_mac_mode_z_rectangle = {1'b1, 8'b11111111};
         end
     endfunction
         
     always @(posedge clk)
         //
         case (fsm_state_next)
+            //
             MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:       dsp_xy_mode_z_adv4 <= {9{1'b0}};
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
             MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b0}};
+            //
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
+            //
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
+            //
             MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
-            default:                                dsp_xy_mode_z_adv4 <= {9{1'b1}};
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
+            //
+            default: dsp_xy_mode_z_adv4 <= {NUM_MULTS_AUX{1'b1}};
+            //
         endcase
 
     always @(posedge clk) begin
+        //
         dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
         //
         dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
         dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
         dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
+        //
     end
-        
 
 
-    
-    
     //
     // Recombinator
     //
@@ -880,43 +906,43 @@ module modexpng_mmm_dual
 
     modexpng_recombinator_block recombinator_block
     (
-        .clk                            (clk),
-        .rst_n                          (rst_n),
+        .clk                    (clk),
+        .rst_n                  (rst_n),
         
-        .ena                            (rcmb_ena),
-        .rdy                            (rcmb_rdy),
+        .ena                    (rcmb_ena),
+        .rdy                    (rcmb_rdy),
         
-        .fsm_state_next                 (fsm_state_next),
+        .fsm_state_next         (fsm_state_next),
         
-        .word_index_last                (word_index_last),
+        .word_index_last        (word_index_last),
         
-        .dsp_xy_ce_p                    (dsp_xy_ce_p),
-        .dsp_x_p                        (dsp_x_p),
-        .dsp_y_p                        (dsp_y_p),
+        .dsp_xy_ce_p            (dsp_xy_ce_p),
+        .dsp_x_p                (dsp_x_p),
+        .dsp_y_p                (dsp_y_p),
         
-        .col_index                      (col_index),
-        .col_index_last                 (col_index_last),
+        .col_index              (col_index),
+        .col_index_last         (col_index_last),
         
-        .rd_narrow_xy_addr                 (narrow_xy_addr),
-        .rd_narrow_xy_bank                 (narrow_xy_bank),
+        .rd_narrow_xy_addr      (narrow_xy_addr),
+        .rd_narrow_xy_bank      (narrow_xy_bank),
         
-        .rcmb_wide_xy_bank          (rcmb_wide_xy_bank),
-        .rcmb_wide_xy_addr          (rcmb_wide_xy_addr),
-        .rcmb_wide_x_dout           (rcmb_wide_x_dout),
-        .rcmb_wide_y_dout           (rcmb_wide_y_dout),
-        .rcmb_wide_xy_valid         (rcmb_wide_xy_valid),
+        .rcmb_wide_xy_bank      (rcmb_wide_xy_bank),
+        .rcmb_wide_xy_addr      (rcmb_wide_xy_addr),
+        .rcmb_wide_x_dout       (rcmb_wide_x_dout),
+        .rcmb_wide_y_dout       (rcmb_wide_y_dout),
+        .rcmb_wide_xy_valid     (rcmb_wide_xy_valid),
         
-        .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank),
-        .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr),
-        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout),
-        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout),
-        .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid),
+        .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank),
+        .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr),
+        .rcmb_narrow_x_dout     (rcmb_narrow_x_dout),
+        .rcmb_narrow_y_dout     (rcmb_narrow_y_dout),
+        .rcmb_narrow_xy_valid   (rcmb_narrow_xy_valid),
         
-        .rdct_narrow_xy_bank        (rcmb_xy_bank),
-        .rdct_narrow_xy_addr        (rcmb_xy_addr),
-        .rdct_narrow_x_dout         (rcmb_x_dout),
-        .rdct_narrow_y_dout         (rcmb_y_dout),
-        .rdct_narrow_xy_valid       (rcmb_xy_valid)
+        .rdct_narrow_xy_bank    (rcmb_xy_bank),
+        .rdct_narrow_xy_addr    (rcmb_xy_addr),
+        .rdct_narrow_x_dout     (rcmb_x_dout),
+        .rdct_narrow_y_dout     (rcmb_y_dout),
+        .rdct_narrow_xy_valid   (rcmb_xy_valid)
 
     );
     
@@ -941,11 +967,11 @@ module modexpng_mmm_dual
     //
     // FSM Transition Logic
     //
-    assign fsm_state_after_idle = !only_reduce ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
-    assign fsm_state_after_mult_square    = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF   : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT;
-    assign fsm_state_after_mult_triangle  = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
-    assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
-    assign fsm_state_after_square_holdoff = just_multiply ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
+    assign fsm_state_after_idle           = !only_reduce   ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  ;
+    assign fsm_state_after_mult_square    =  col_is_last   ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF    : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    ;
+    assign fsm_state_after_mult_triangle  =  col_is_last   ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF  : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  ;
+    assign fsm_state_after_mult_rectangle =  col_is_last   ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT ;
+    assign fsm_state_after_square_holdoff =  just_multiply ? MMM_FSM_STATE_STOP                   : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT  ;
      
 
     always @* begin
@@ -953,43 +979,43 @@ module modexpng_mmm_dual
         fsm_state_next = MMM_FSM_STATE_IDLE;
         //
         case (fsm_state)
-            MMM_FSM_STATE_IDLE:                   fsm_state_next = ena                   ? fsm_state_after_idle /*MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : MMM_FSM_STATE_IDLE;
+            MMM_FSM_STATE_IDLE:                      fsm_state_next = ena ?            fsm_state_after_idle                    : MMM_FSM_STATE_IDLE;
                         
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG    ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY    ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:    fsm_state_next = square_done ?    MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT    : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
             
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square    : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG    ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG:    fsm_state_next =                  MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY    ;
+            MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:    fsm_state_next = square_done ?    fsm_state_after_mult_square             : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
             
-            MMM_FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? fsm_state_after_square_holdoff : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
+            MMM_FSM_STATE_MULT_SQUARE_HOLDOFF:       fsm_state_next = rcmb_rdy ?       fsm_state_after_square_holdoff          : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
 
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG  ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY  ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:  fsm_state_next = triangle_done ?  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT  : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
             
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG  ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:  fsm_state_next =                  MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY  ;
+            MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:  fsm_state_next = triangle_done ?  fsm_state_after_mult_triangle           : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
             
-            MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+            MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF:     fsm_state_next = rcmb_rdy ?       MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
 
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
             MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
             
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
-            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                  MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+            MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle          : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
             
-            MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? MMM_FSM_STATE_WAIT_REDUCTOR : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+            MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next = rcmb_rdy ?       MMM_FSM_STATE_WAIT_REDUCTOR             : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
             
-            MMM_FSM_STATE_WAIT_REDUCTOR:             fsm_state_next =                         rdct_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_WAIT_REDUCTOR;
+            MMM_FSM_STATE_WAIT_REDUCTOR:             fsm_state_next = rdct_rdy ?       MMM_FSM_STATE_STOP                      : MMM_FSM_STATE_WAIT_REDUCTOR;
             
-            MMM_FSM_STATE_STOP:                     fsm_state_next =                         MMM_FSM_STATE_IDLE                   ;
+            MMM_FSM_STATE_STOP:                      fsm_state_next =                  MMM_FSM_STATE_IDLE                      ;
             
-            default:                             fsm_state_next =                         MMM_FSM_STATE_IDLE                   ;
+            default:                                 fsm_state_next =                  MMM_FSM_STATE_IDLE                      ;
 
         endcase
         //
@@ -1005,10 +1031,10 @@ module modexpng_mmm_dual
     
     always @(posedge clk or negedge rst_n)
         //
-        if (!rst_n)                                rdct_ena_reg <= 1'b0;
+        if (!rst_n)                                 rdct_ena_reg <= 1'b0;
         else case (fsm_state)
            MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
-           default:                             rdct_ena_reg <= 1'b0;
+           default:                                 rdct_ena_reg <= 1'b0;
         endcase
     
     
@@ -1021,9 +1047,9 @@ module modexpng_mmm_dual
     
     always @(posedge clk or negedge rst_n)
         //
-        if (!rst_n) rdy_reg <= 1'b1;
+        if (!rst_n)                                        rdy_reg <= 1'b1;
         else begin
-            if (rdy && ena) rdy_reg <= 1'b0;
+            if (rdy && ena)                                rdy_reg <= 1'b0;
             if (!rdy && (fsm_state == MMM_FSM_STATE_STOP)) rdy_reg <= 1'b1; 
         end
 
@@ -1032,11 +1058,12 @@ module modexpng_mmm_dual
     // Debug
     //
     `ifdef MODEXPNG_ENABLE_DEBUG
-        real load_cyc_mult = 0.0;
-        always @(posedge clk)
-            //
-            if (dsp_xy_ce_m)
-                load_cyc_mult <= load_cyc_mult + 1.0;    
+    real load_cyc_mult = 0.0;
+    always @(posedge clk)
+        //
+        if (dsp_xy_ce_m)
+            load_cyc_mult <= load_cyc_mult + 1.0;    
     `endif
     
+    
 endmodule



More information about the Commits mailing list