[Cryptech-Commits] [user/shatov/modexpng] 18/21: Refactored MMM recombinator module, accomodated the changes in DSP slice wrapper names.

git at cryptech.is git at cryptech.is
Mon Jan 20 21:18:19 UTC 2020


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit c6029af4482192c3e25ef0f6561bfbefa76e75ec
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Jan 21 00:11:42 2020 +0300

    Refactored MMM recombinator module, accomodated the changes in DSP slice
    wrapper names.
---
 rtl/modexpng_recombinator_block.v | 1244 ++++++++++++++++++-------------------
 rtl/modexpng_recombinator_cell.v  |   94 ++-
 2 files changed, 676 insertions(+), 662 deletions(-)

diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index 077ae47..f6e23e5 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -53,58 +53,58 @@ module modexpng_recombinator_block
     `include "modexpng_mmm_dual_fsm.vh"
 
 
-    input                        clk;
-    input                        rst_n;
-    input                        ena;
-    output                       rdy;
-    input  [MMM_FSM_STATE_W-1:0] fsm_state_next;
-    input [7:0]                  word_index_last;
-    input                        dsp_xy_ce_p;
-    input  [9*47-1:0] dsp_x_p;
-    input  [9*47-1:0] dsp_y_p;
-    input  [     4:0] col_index;
-    input  [     4:0] col_index_last;
+    input                                         clk;
+    input                                         rst_n;
+    input                                         ena;
+    output                                        rdy;
+    input  [MMM_FSM_STATE_W                 -1:0] fsm_state_next;
+    input  [      OP_ADDR_W                 -1:0] word_index_last;
+    input                                         dsp_xy_ce_p;
+    input  [          MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p;
+    input  [          MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p;
+    input  [    COL_INDEX_W                 -1:0] col_index;
+    input  [    COL_INDEX_W                 -1:0] col_index_last;
     
-    input  [     BANK_ADDR_W -1:0] rd_narrow_xy_bank;
-    input  [     7:0] rd_narrow_xy_addr;
+    input  [    BANK_ADDR_W                 -1:0] rd_narrow_xy_bank;
+    input  [      OP_ADDR_W                 -1:0] rd_narrow_xy_addr;
 
-    output [     BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
-    output [     7:0] rcmb_wide_xy_addr;
-    output [    17:0] rcmb_wide_x_dout;
-    output [    17:0] rcmb_wide_y_dout;
-    output            rcmb_wide_xy_valid;
+    output [    BANK_ADDR_W                 -1:0] rcmb_wide_xy_bank;
+    output [      OP_ADDR_W                 -1:0] rcmb_wide_xy_addr;
+    output [     WORD_EXT_W                 -1:0] rcmb_wide_x_dout;
+    output [     WORD_EXT_W                 -1:0] rcmb_wide_y_dout;
+    output                                        rcmb_wide_xy_valid;
 
-    output [    BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
-    output [     7:0] rcmb_narrow_xy_addr;
-    output [    17:0] rcmb_narrow_x_dout;
-    output [    17:0] rcmb_narrow_y_dout;
-    output            rcmb_narrow_xy_valid;
+    output [    BANK_ADDR_W                 -1:0] rcmb_narrow_xy_bank;
+    output [      OP_ADDR_W                 -1:0] rcmb_narrow_xy_addr;
+    output [     WORD_EXT_W                 -1:0] rcmb_narrow_x_dout;
+    output [     WORD_EXT_W                 -1:0] rcmb_narrow_y_dout;
+    output                                        rcmb_narrow_xy_valid;
 
-    output [     BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
-    output [     7:0] rdct_narrow_xy_addr;
-    output [    17:0] rdct_narrow_x_dout;
-    output [    17:0] rdct_narrow_y_dout;
-    output            rdct_narrow_xy_valid;
+    output [    BANK_ADDR_W                 -1:0] rdct_narrow_xy_bank;
+    output [      OP_ADDR_W                 -1:0] rdct_narrow_xy_addr;
+    output [     WORD_EXT_W                 -1:0] rdct_narrow_x_dout;
+    output [     WORD_EXT_W                 -1:0] rdct_narrow_y_dout;
+    output                                        rdct_narrow_xy_valid;
 
 
     //
     // Latches
     //
-    reg  [1*47-1:0] dsp_x_p_latch[0:8];
-    reg  [1*47-1:0] dsp_y_p_latch[0:8];
+    reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS_AUX-1];
+    reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS_AUX-1];
 
 
     //
     // Mapping
     //
-    wire [46:0] dsp_x_p_split[0:8];
-    wire [46:0] dsp_y_p_split[0:8];
+    wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS_AUX-1];
+    wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS_AUX-1];
     
     genvar z;
-    generate for (z=0; z<(NUM_MULTS+1); z=z+1)
+    generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
         begin : gen_dsp_xy_p_split
-            assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
-            assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
+            assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
+            assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
         end
     endgenerate
 
@@ -125,211 +125,200 @@ module modexpng_recombinator_block
     //
     
     // valid
-    reg       xy_valid_lsb = 1'b0;
-    reg       xy_aux_lsb   = 1'b0;
-    reg       xy_valid_msb = 1'b0;
+    reg xy_valid_lsb = 1'b0;
+    reg xy_aux_lsb   = 1'b0;
+    reg xy_valid_msb = 1'b0;
     
     // bitmap
-    reg [7:0] xy_bitmap_lsb = {8{1'b0}};
-    reg [7:0] xy_bitmap_msb = {8{1'b0}};
+    reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
+    reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
     
     // index
-    reg [2:0] xy_index_lsb = 3'dX;
+    reg [MAC_INDEX_W-1:0] xy_index_lsb;
     
     // purge
-    reg       xy_purge_lsb = 1'b0;
-    reg       xy_purge_msb = 1'b0;
+    reg xy_purge_lsb = 1'b0;
+    reg xy_purge_msb = 1'b0;
     
     // valid - latch
-    reg       xy_valid_latch_lsb = 1'b0;
+    reg xy_valid_latch_lsb = 1'b0;
     
     // aux - latch
-    reg       xy_aux_latch_lsb = 1'b0;
+    reg xy_aux_latch_lsb = 1'b0;
     
     // bitmap - latch
-    reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}};
-    reg [7:0] xy_bitmap_latch_msb = {8{1'b0}};
+    reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = {NUM_MULTS{1'b0}};
+    reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = {NUM_MULTS{1'b0}};
 
     // index - latch
-    reg [2:0] xy_index_latch_lsb = 3'dX;
+    reg [MAC_INDEX_W-1:0] xy_index_latch_lsb;
     
     // purge - index
     reg       xy_purge_latch_lsb = 1'b0;
     reg       xy_purge_latch_msb = 1'b0;
 
-    // 
-    reg       xy_valid_lsb_adv[1:6];
-    reg       xy_valid_msb_adv[1:6];
-    reg       xy_aux_lsb_adv[1:6];
-    reg [7:0] xy_bitmap_lsb_adv[1:6];
-    reg [7:0] xy_bitmap_msb_adv[1:6];
-    reg [2:0] xy_index_lsb_adv[1:6];
-    reg [2:0] xy_index_msb_adv[1:6];
-    reg       xy_purge_lsb_adv[1:6];
-    reg       xy_purge_msb_adv[1:6];
+
+    //
+    // Anticipatory Values
+    //
+    reg                    xy_valid_lsb_adv [1:6];
+    reg                    xy_valid_msb_adv [1:6];
+    reg                    xy_aux_lsb_adv   [1:6];
+    reg [NUM_MULTS   -1:0] xy_bitmap_lsb_adv[1:6];
+    reg [NUM_MULTS   -1:0] xy_bitmap_msb_adv[1:6];
+    reg [MAC_INDEX_W -1:0] xy_index_lsb_adv [1:6];
+    reg [MAC_INDEX_W -1:0] xy_index_msb_adv [1:6];
+    reg                    xy_purge_lsb_adv [1:6];
+    reg                    xy_purge_msb_adv [1:6];
     
-    reg [1:0] rcmb_mode;
-       
+    
+    //
+    // Recombination Mode
+    //
+    localparam [1:0] RCMB_MODE_UNUSED    = 2'd0;
+    localparam [1:0] RCMB_MODE_SQUARE    = 2'd1;
+    localparam [1:0] RCMB_MODE_TRIANGLE  = 2'd2;
+    localparam [1:0] RCMB_MODE_RECTANGLE = 2'd3;
+
+    reg [1:0] rcmb_mode = RCMB_MODE_UNUSED;
+           
     always @(posedge clk)
        //
        if (ena)
            //
            case (fsm_state_next)
-               MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
-               MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
-               MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
-               default:                                 rcmb_mode <= 2'd0;
+               MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY:    rcmb_mode <= RCMB_MODE_SQUARE;
+               MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:  rcmb_mode <= RCMB_MODE_TRIANGLE;
+               MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_RECTANGLE;
+               default:                                 rcmb_mode <= RCMB_MODE_UNUSED;
            endcase
 
-               
+
+    //
+    // Initialization
+    //
     integer i;
-    initial for (i=1; i<6; i=i+1) begin
-        xy_valid_lsb_adv[i] = 1'b0;
-        xy_valid_msb_adv[i] = 1'b0;
-        xy_aux_lsb_adv[i] = 1'b0;
-        xy_bitmap_lsb_adv[i] = {8{1'b0}};
-        xy_bitmap_msb_adv[i] = {8{1'b0}};
-        xy_index_lsb_adv[i] = 3'dX;
-        xy_index_msb_adv[i] = 3'dX;
-        xy_purge_lsb_adv[i] = 1'b0;
-        xy_purge_msb_adv[i] = 1'b0;
+    initial for (i=1; i<=6; i=i+1) begin
+        xy_valid_lsb_adv [i] = 1'b0;
+        xy_valid_msb_adv [i] = 1'b0;
+        xy_aux_lsb_adv   [i] = 1'b0;
+        xy_bitmap_lsb_adv[i] = {NUM_MULTS{1'b0}};
+        xy_bitmap_msb_adv[i] = {NUM_MULTS{1'b0}};
+        xy_index_lsb_adv [i] = {MAC_INDEX_W{1'bX}};
+        xy_index_msb_adv [i] = {MAC_INDEX_W{1'bX}};
+        xy_purge_lsb_adv [i] = 1'b0;
+        xy_purge_msb_adv [i] = 1'b0;
     end
     
-    function        calc_square_triangle_valid_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_triangle_valid_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                calc_square_triangle_valid_lsb = 1'b1;
-            else
-                calc_square_triangle_valid_lsb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_triangle_valid_lsb = 1'b1;
+        else                                                                  calc_square_triangle_valid_lsb = 1'b0;
+        //
     endfunction
 
-    function        calc_square_valid_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_valid_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
     endfunction
 
-    function        calc_triangle_valid_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_triangle_valid_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);   
     endfunction
     
-    function        calc_rectangle_valid_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_rectangle_valid_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value) 
-                calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
-            else
-                calc_rectangle_valid_lsb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
+        else                                                                  calc_rectangle_valid_lsb = 1'b0;
+        //
     endfunction
 
-    function        calc_triangle_aux_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_triangle_aux_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_bank_value == BANK_NARROW_EXT)
-                calc_triangle_aux_lsb = 1'b1;
-            else
-                calc_triangle_aux_lsb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_bank_value == BANK_NARROW_EXT) calc_triangle_aux_lsb = 1'b1;
+        else calc_triangle_aux_lsb = 1'b0;
+        //
     endfunction
     
-    function  [7:0] calc_square_triangle_bitmap_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    //
+    // TODO: This will need some generic replacement defined in modexpng_parameters.vh
+    //       in case anything different from NUM_MULTS = 8 is used.
+    //
+    function  [  NUM_MULTS -1:0] calc_square_triangle_bitmap_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                //
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
-                    3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
-                    3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
-                    3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
-                    3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
-                    3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
-                    3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
-                    3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
-                endcase
-                //
-            else
-                calc_square_triangle_bitmap_lsb = {8{1'b0}};
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
+            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
+                3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
+                3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
+                3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
+                3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
+                3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
+                3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
+                3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
+                3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
+            endcase
+        else            calc_square_triangle_bitmap_lsb = 8'b00000000;
+        //
     endfunction
 
-    function  [7:0] calc_square_bitmap_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [  NUM_MULTS -1:0] calc_square_bitmap_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function  [7:0] calc_triangle_bitmap_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [  NUM_MULTS -1:0] calc_triangle_bitmap_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function  [7:0] calc_rectangle_bitmap_lsb;
-       input [4:0] col_index_value;
-       input [4:0] col_index_last_value;
-       input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-       input [7:0] narrow_xy_addr_value;
-       begin
-           //
-           if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
-               //
-               case (narrow_xy_addr_value[2:0])
-                   3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
-                   3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
-                   3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
-                   3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
-                   3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
-                   3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
-                   3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
-                   3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
-               endcase
-               //
-           else
-               calc_rectangle_bitmap_lsb = {8{1'b0}};
-           //
-        end
+    function  [  NUM_MULTS -1:0] calc_rectangle_bitmap_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
+        input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if ((narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
+            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
+                3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
+                3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
+                3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
+                3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
+                3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
+                3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
+                3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
+                3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
+            endcase
+        else            calc_rectangle_bitmap_lsb = 8'b00000000;
+        //
     endfunction
        
        /*
@@ -337,269 +326,243 @@ module modexpng_recombinator_block
         * rectangle is that the bank is checked or not). A universal function would
         * accept a parameter that tells it whether it should check the bank or not.
         * Let's do it later, too early to optimize now, it seems.
-        *
-        *
         */
        
-    function  [2:0] calc_square_triangle_index_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [MAC_INDEX_W -1:0] calc_square_triangle_index_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                //
-                case (narrow_xy_addr_value[2:0])
-                    3'b000: calc_square_triangle_index_lsb = 3'd0;
-                    3'b001: calc_square_triangle_index_lsb = 3'd1;
-                    3'b010: calc_square_triangle_index_lsb = 3'd2;
-                    3'b011: calc_square_triangle_index_lsb = 3'd3;
-                    3'b100: calc_square_triangle_index_lsb = 3'd4;
-                    3'b101: calc_square_triangle_index_lsb = 3'd5;
-                    3'b110: calc_square_triangle_index_lsb = 3'd6;
-                    3'b111: calc_square_triangle_index_lsb = 3'd7;
-                endcase
-                //
-            else
-                calc_square_triangle_index_lsb = 3'dX;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value)
+            case (narrow_xy_addr_value[MAC_INDEX_W-1:0])
+                3'b000: calc_square_triangle_index_lsb = 3'd0;
+                3'b001: calc_square_triangle_index_lsb = 3'd1;
+                3'b010: calc_square_triangle_index_lsb = 3'd2;
+                3'b011: calc_square_triangle_index_lsb = 3'd3;
+                3'b100: calc_square_triangle_index_lsb = 3'd4;
+                3'b101: calc_square_triangle_index_lsb = 3'd5;
+                3'b110: calc_square_triangle_index_lsb = 3'd6;
+                3'b111: calc_square_triangle_index_lsb = 3'd7;
+            endcase
+        else            calc_square_triangle_index_lsb = 3'dX;
+        //
     endfunction
 
-    function  [2:0] calc_square_index_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [MAC_INDEX_W -1:0] calc_square_index_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function  [2:0] calc_triangle_index_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [MAC_INDEX_W -1:0] calc_triangle_index_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function  [2:0] calc_rectangle_index_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [MAC_INDEX_W -1:0] calc_rectangle_index_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value;
-        input [7:0] slim_bram_xy_addr_value;
-        begin
-            //
-            if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
-                //
-                case (slim_bram_xy_addr_value[2:0])
-                    3'b000: calc_rectangle_index_lsb = 3'd0;
-                    3'b001: calc_rectangle_index_lsb = 3'd1;
-                    3'b010: calc_rectangle_index_lsb = 3'd2;
-                    3'b011: calc_rectangle_index_lsb = 3'd3;
-                    3'b100: calc_rectangle_index_lsb = 3'd4;
-                    3'b101: calc_rectangle_index_lsb = 3'd5;
-                    3'b110: calc_rectangle_index_lsb = 3'd6;
-                    3'b111: calc_rectangle_index_lsb = 3'd7;
-                endcase
-                //
-            else
-                calc_rectangle_index_lsb = 3'dX;
-            //
-        end
+        input [  OP_ADDR_W -1:0] slim_bram_xy_addr_value;
+        //
+        if ((slim_bram_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
+            case (slim_bram_xy_addr_value[MAC_INDEX_W-1:0])
+                3'b000: calc_rectangle_index_lsb = 3'd0;
+                3'b001: calc_rectangle_index_lsb = 3'd1;
+                3'b010: calc_rectangle_index_lsb = 3'd2;
+                3'b011: calc_rectangle_index_lsb = 3'd3;
+                3'b100: calc_rectangle_index_lsb = 3'd4;
+                3'b101: calc_rectangle_index_lsb = 3'd5;
+                3'b110: calc_rectangle_index_lsb = 3'd6;
+                3'b111: calc_rectangle_index_lsb = 3'd7;
+            endcase
+        else            calc_rectangle_index_lsb = 3'dX;
+        //
     endfunction
     
-    function        calc_square_rectangle_purge_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_rectangle_purge_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            //
-            if (narrow_xy_addr_value[7:3] == col_index_value)
-                calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value;
-            else
-                calc_square_rectangle_purge_lsb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        //
+        if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_rectangle_purge_lsb = narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_last_value;
+        else                                                                  calc_square_rectangle_purge_lsb = 1'b0;
+        //
     endfunction
 
-    function        calc_square_purge_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_purge_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function        calc_rectangle_purge_lsb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_rectangle_purge_lsb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        begin
-            calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
     endfunction
 
-    function        calc_square_valid_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_valid_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value)
-                calc_square_valid_msb = 1'b1;
-            else
-                calc_square_valid_msb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if (narrow_xy_addr_value == index_last_value) calc_square_valid_msb = 1'b1;
+        else                                          calc_square_valid_msb = 1'b0;
+        //
     endfunction
 
-    function        calc_rectangle_valid_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_rectangle_valid_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
-                calc_rectangle_valid_msb = 1'b1;
-            else
-                calc_rectangle_valid_msb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_valid_msb = 1'b1;
+        else                                                                                    calc_rectangle_valid_msb = 1'b0;
+        //
     endfunction
     
-    function  [7:0] calc_square_bitmap_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [  NUM_MULTS -1:0] calc_square_bitmap_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value) begin
-                calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
-                calc_square_bitmap_msb[6:0] = 7'b1111111;
-            end else
-                calc_square_bitmap_msb[7:0] = 8'b00000000;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if (narrow_xy_addr_value == index_last_value) calc_square_bitmap_msb = {col_index_value != col_index_last_value, 7'b1111111};
+        else                                          calc_square_bitmap_msb = 8'b00000000;
+        //
     endfunction
 
-    function  [7:0] calc_rectangle_bitmap_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function  [  NUM_MULTS -1:0] calc_rectangle_bitmap_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin
-                calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
-            end else
-                calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_bitmap_msb = 8'b11111111;
+        else                                                                                    calc_rectangle_bitmap_msb = 8'b00000000;
+        //
     endfunction
 
-    function        calc_square_purge_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_square_purge_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if (narrow_xy_addr_value == index_last_value)
-                calc_square_purge_msb = col_index_value == col_index_last_value;
-            else
-                calc_square_purge_msb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if (narrow_xy_addr_value == index_last_value) calc_square_purge_msb = col_index_value == col_index_last_value;
+        else                                          calc_square_purge_msb = 1'b0;
+        //
     endfunction
 
-    function        calc_rectangle_purge_msb;
-        input [4:0] col_index_value;
-        input [4:0] col_index_last_value;
+    function                     calc_rectangle_purge_msb;
+        input [COL_INDEX_W -1:0] col_index_value;
+        input [COL_INDEX_W -1:0] col_index_last_value;
         input [BANK_ADDR_W -1:0] narrow_xy_bank_value;
-        input [7:0] narrow_xy_addr_value;
-        input [7:0] index_last_value;
-        begin
-            //
-            if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
-                calc_rectangle_purge_msb = col_index_value == col_index_last_value;
-            else
-                calc_rectangle_purge_msb = 1'b0;
-            //
-        end
+        input [  OP_ADDR_W -1:0] narrow_xy_addr_value;
+        input [  OP_ADDR_W -1:0] index_last_value;
+        //
+        if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_purge_msb = col_index_value == col_index_last_value;
+        else                                                                                    calc_rectangle_purge_msb = 1'b0;
+        //
     endfunction
 
+
+    //
+    // Recombinator Cell Instances
+    //
+    reg [WORD_W -1:0] rcmb_x_msb_carry_0;
+    reg [WORD_W -1:0] rcmb_y_msb_carry_0;
+    reg [WORD_W -1:0] rcmb_x_msb_carry_1;
+    reg [WORD_W -1:0] rcmb_y_msb_carry_1;
     
     reg         rcmb_xy_lsb_ce = 1'b0;
-    reg         rcmb_xy_lsb_ce_aux;
+    reg         rcmb_xy_lsb_ce_aux = 1'b0;
     reg  [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
     wire        rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
     reg         rcmb_xy_lsb_clr;
+    wire        rcmb_xy_lsb_cry = !rcmb_xy_lsb_ce_purge[2] && (rcmb_xy_lsb_ce_purge[1] || rcmb_xy_lsb_ce_purge[0]);
 
-    reg  [46:0] rcmb_x_lsb_din;
-    reg  [46:0] rcmb_y_lsb_din;
-    wire [15:0] rcmb_x_lsb_dout;
-    wire [15:0] rcmb_y_lsb_dout;
+    reg  [ MAC_W     -1:0] rcmb_x_lsb_din;
+    reg  [ MAC_W     -1:0] rcmb_y_lsb_din;
+    wire [WORD_W     -1:0] rcmb_x_lsb_dout;
+    wire [WORD_W     -1:0] rcmb_y_lsb_dout;
+    wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext;
+    wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext;    
 
     reg         rcmb_xy_msb_ce = 1'b0;
     reg  [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
     wire        rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
     reg         rcmb_xy_msb_clr;
     
-    reg  [46:0] rcmb_x_msb_din;
-    reg  [46:0] rcmb_y_msb_din;
-    wire [15:0] rcmb_x_msb_dout;
-    wire [15:0] rcmb_y_msb_dout;
+    reg  [ MAC_W -1:0] rcmb_x_msb_din;
+    reg  [ MAC_W -1:0] rcmb_y_msb_din;
+    wire [WORD_W -1:0] rcmb_x_msb_dout;
+    wire [WORD_W -1:0] rcmb_y_msb_dout;
     
     modexpng_recombinator_cell recomb_x_lsb
     (
-        .clk    (clk),
-        .ce     (rcmb_xy_lsb_ce_combined),
-        .clr    (rcmb_xy_lsb_clr),
-        .din    (rcmb_x_lsb_din),
-        .dout   (rcmb_x_lsb_dout)
+        .clk      (clk),
+        .ce       (rcmb_xy_lsb_ce_combined),
+        .clr      (rcmb_xy_lsb_clr),
+        .cry      (rcmb_xy_lsb_cry),
+        .cin      (rcmb_x_msb_carry_1),
+        .din      (rcmb_x_lsb_din),
+        .dout     (rcmb_x_lsb_dout),
+        .dout_ext (rcmb_x_lsb_dout_ext)
     );
     modexpng_recombinator_cell recomb_y_lsb
     (
-        .clk    (clk),
-        .ce     (rcmb_xy_lsb_ce_combined),
-        .clr    (rcmb_xy_lsb_clr),
-        .din    (rcmb_y_lsb_din),
-        .dout   (rcmb_y_lsb_dout)
+        .clk      (clk),
+        .ce       (rcmb_xy_lsb_ce_combined),
+        .clr      (rcmb_xy_lsb_clr),
+        .cry      (rcmb_xy_lsb_cry),
+        .cin      (rcmb_y_msb_carry_1),
+        .din      (rcmb_y_lsb_din),
+        .dout     (rcmb_y_lsb_dout),
+        .dout_ext (rcmb_y_lsb_dout_ext)
     );
 
     modexpng_recombinator_cell recomb_x_msb
     (
-        .clk    (clk),
-        .ce     (rcmb_xy_msb_ce_combined),
-        .clr    (rcmb_xy_msb_clr),
-        .din    (rcmb_x_msb_din),
-        .dout   (rcmb_x_msb_dout)
+        .clk      (clk),
+        .ce       (rcmb_xy_msb_ce_combined),
+        .clr      (rcmb_xy_msb_clr),
+        .cry      (1'b0),
+        .cin      (WORD_ZERO),
+        .din      (rcmb_x_msb_din),
+        .dout     (rcmb_x_msb_dout),
+        .dout_ext ()
     );
     
     modexpng_recombinator_cell recomb_y_msb
     (
-        .clk    (clk),
-        .ce     (rcmb_xy_msb_ce_combined),
-        .clr    (rcmb_xy_msb_clr),
-        .din    (rcmb_y_msb_din),
-        .dout   (rcmb_y_msb_dout)
+        .clk      (clk),
+        .ce       (rcmb_xy_msb_ce_combined),
+        .clr      (rcmb_xy_msb_clr),
+        .cry      (1'b0),
+        .cin      (WORD_ZERO),
+        .din      (rcmb_y_msb_din),
+        .dout     (rcmb_y_msb_dout),
+        .dout_ext ()
     );
 
     always @(posedge clk) begin
@@ -608,19 +571,14 @@ module modexpng_recombinator_block
         rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
         rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
         //
-        if (xy_purge_latch_lsb)
-            rcmb_xy_lsb_ce_purge <= 3'b111;
-        else
-            rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
+        if (xy_purge_latch_lsb) rcmb_xy_lsb_ce_purge <= 3'b111;
+        else                    rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
         //
-        if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1])
-            rcmb_xy_msb_ce_purge <= 2'b11;
-        else
-            rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
+        if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) rcmb_xy_msb_ce_purge <= 2'b11;
+        else                                                                         rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
         //
     end
 
-
     always @(posedge clk)
         //
         if (ena) begin
@@ -637,11 +595,11 @@ module modexpng_recombinator_block
             rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
             rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
         end else if (xy_aux_latch_lsb) begin
-            rcmb_x_lsb_din <= dsp_x_p_latch[8];
-            rcmb_y_lsb_din <= dsp_y_p_latch[8];
+            rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1];
+            rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1];
         end else begin
-            rcmb_x_lsb_din <= {47{1'b0}};
-            rcmb_y_lsb_din <= {47{1'b0}};
+            rcmb_x_lsb_din <= {MAC_W{1'b0}};
+            rcmb_y_lsb_din <= {MAC_W{1'b0}};
         end
 
     always @(posedge clk)
@@ -650,8 +608,8 @@ module modexpng_recombinator_block
             rcmb_x_msb_din <= dsp_x_p_latch[0];
             rcmb_y_msb_din <= dsp_y_p_latch[0];
         end else begin
-            rcmb_x_msb_din <= {47{1'b0}};
-            rcmb_y_msb_din <= {47{1'b0}};
+            rcmb_x_msb_din <= {MAC_W{1'b0}};
+            rcmb_y_msb_din <= {MAC_W{1'b0}};
         end
 
 
@@ -688,7 +646,7 @@ module modexpng_recombinator_block
                 xy_purge_lsb_adv [6] <= 1'b0;
                 //
                 xy_valid_msb_adv [6] <= 1'b0;
-                xy_bitmap_msb_adv[6] <= {8{1'b0}};
+                xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
                 xy_purge_msb_adv [6] <= 1'b0;
                 //
             end
@@ -714,12 +672,12 @@ module modexpng_recombinator_block
                 //
                 xy_valid_lsb_adv [6] <= 1'b0;
                 xy_aux_lsb_adv   [6] <= 1'b0;
-                xy_bitmap_lsb_adv[6] <= {8{1'b0}};
-                xy_index_lsb_adv [6] <= 3'dX;
+                xy_bitmap_lsb_adv[6] <= {NUM_MULTS{1'b0}};
+                xy_index_lsb_adv [6] <= {MAC_INDEX_W{1'bX}};
                 xy_purge_lsb_adv [6] <= 1'b0;
                 //
                 xy_valid_msb_adv [6] <= 1'b0;
-                xy_bitmap_msb_adv[6] <= {8{1'b0}};
+                xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}};
                 xy_purge_msb_adv [6] <= 1'b0;
                 //
             end
@@ -749,7 +707,7 @@ module modexpng_recombinator_block
             xy_bitmap_latch_msb <= xy_bitmap_msb;
             xy_purge_latch_msb  <= xy_purge_msb;
         end else begin
-            xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]};
+            xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]};
         end
         //
         //
@@ -771,19 +729,19 @@ module modexpng_recombinator_block
         //
         if (xy_bitmap_latch_msb[1])   // only shift 7 times
             //
-            for (i=0; i<8; i=i+1)
+            for (i=0; i<NUM_MULTS; i=i+1)
                 //            
-                if (i < 7) begin
+                if (i < (NUM_MULTS-1)) begin
                     dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
                     dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
                 end else begin
-                    dsp_x_p_latch[i] <= {47{1'bX}};
-                    dsp_y_p_latch[i] <= {47{1'bX}};
+                    dsp_x_p_latch[i] <= {MAC_W{1'bX}};
+                    dsp_y_p_latch[i] <= {MAC_W{1'bX}};
                 end
             //
         else if (dsp_xy_ce_p_dly1) begin
             //
-            for (i=0; i<8; i=i+1)
+            for (i=0; i<NUM_MULTS; i=i+1)
                 //
                 if (xy_bitmap_lsb[i]) begin
                     dsp_x_p_latch[i] <= dsp_x_p_split[i];
@@ -794,12 +752,26 @@ module modexpng_recombinator_block
                 end
             //
             if (xy_aux_lsb) begin
-                dsp_x_p_latch[8] <= dsp_x_p_split[8];
-                dsp_y_p_latch[8] <= dsp_y_p_split[8];
+                dsp_x_p_latch[NUM_MULTS_AUX-1] <= dsp_x_p_split[NUM_MULTS_AUX-1];
+                dsp_y_p_latch[NUM_MULTS_AUX-1] <= dsp_y_p_split[NUM_MULTS_AUX-1];
             end
             //
         end
 
+
+    reg rcmb_xy_lsb_ce_combined_dly = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly = 1'b0;
+
+    always @(posedge clk or negedge rst_n)
+        //
+        if (!rst_n) begin
+            rcmb_xy_lsb_ce_combined_dly <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly <= 1'b0;        
+        end else begin
+            rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined;
+            rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined;
+        end        
+
     reg rcmb_xy_lsb_valid = 1'b0;
     reg rcmb_xy_msb_valid = 1'b0;
 
@@ -809,50 +781,58 @@ module modexpng_recombinator_block
             rcmb_xy_lsb_valid <= 1'b0;
             rcmb_xy_msb_valid <= 1'b0;        
         end else begin
-            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined;
-            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined;
+            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly;
+            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly;
         end        
 
 
-    reg [ BANK_ADDR_W -1:0] wide_xy_bank;
-    reg [ 7:0] wide_xy_addr;
-    reg [17:0] wide_x_dout;
-    reg [17:0] wide_y_dout;
-    reg        wide_xy_valid = 1'b0;
+    //
+    // Output Registers
+    //
+    reg [BANK_ADDR_W -1:0] wide_xy_bank;
+    reg [  OP_ADDR_W -1:0] wide_xy_addr;
+    reg [ WORD_EXT_W -1:0] wide_x_dout;
+    reg [ WORD_EXT_W -1:0] wide_y_dout;
+    reg                    wide_xy_valid = 1'b0;
+
+    reg [BANK_ADDR_W -1:0] narrow_xy_bank;
+    reg [  OP_ADDR_W -1:0] narrow_xy_addr;
+    reg [ WORD_EXT_W -1:0] narrow_x_dout;
+    reg [ WORD_EXT_W -1:0] narrow_y_dout;
+    reg                    narrow_xy_valid = 1'b0;
 
-    reg [ BANK_ADDR_W -1:0] narrow_xy_bank;
-    reg [ 7:0] narrow_xy_addr;
-    reg [17:0] narrow_x_dout;
-    reg [17:0] narrow_y_dout;
-    reg        narrow_xy_valid = 1'b0;
+    reg [BANK_ADDR_W -1:0] rdct_xy_bank;
+    reg [  OP_ADDR_W -1:0] rdct_xy_addr;
+    reg [ WORD_EXT_W -1:0] rdct_x_dout;
+    reg [ WORD_EXT_W -1:0] rdct_y_dout;
+    reg                    rdct_xy_valid = 1'b0;
 
-    reg [ BANK_ADDR_W -1:0] rdct_xy_bank;
-    reg [ 7:0] rdct_xy_addr;
-    reg [17:0] rdct_x_dout;
-    reg [17:0] rdct_y_dout;
-    reg        rdct_xy_valid = 1'b0;
 
-    reg [ 7:0] cnt_lsb;
-    reg [ 7:0] cnt_msb;
+    //
+    // Internal Counters
+    //
+    reg [OP_ADDR_W -1:0] cnt_lsb;
+    reg [OP_ADDR_W -1:0] cnt_msb;
     
-    reg        cnt_lsb_wrapped;
-    reg        cnt_msb_wrapped;
-
-    reg [31:0] rcmb_xy_msb_carry_0;
-    reg [31:0] rcmb_xy_msb_carry_1;
+    reg cnt_lsb_wrapped;
+    reg cnt_msb_wrapped;
     
     reg [31:0] rcmb_xy_msb_delay_0;
     reg [31:0] rcmb_xy_msb_delay_1;
     reg [31:0] rcmb_xy_msb_delay_2;
     
-    reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0;
-    reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0;
-    reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO;
 
-    reg        rcmb_msb_flag_delay_0;
-    reg        rcmb_msb_flag_delay_1;
-    reg        rcmb_msb_flag_delay_2;
+    reg rcmb_msb_flag_delay_0 = 1'b0;
+    reg rcmb_msb_flag_delay_1 = 1'b0;
+    reg rcmb_msb_flag_delay_2 = 1'b0;
     
+    
+    //
+    // Output Port Mapping
+    //
     assign rcmb_wide_xy_bank  = wide_xy_bank;
     assign rcmb_wide_xy_addr  = wide_xy_addr;
     assign rcmb_wide_x_dout   = wide_x_dout;
@@ -870,23 +850,30 @@ module modexpng_recombinator_block
     assign rdct_narrow_x_dout   = rdct_x_dout;
     assign rdct_narrow_y_dout   = rdct_y_dout;
     assign rdct_narrow_xy_valid = rdct_xy_valid;
-    
+
+
+    //
+    // Ready Logic
+    //
     reg rdy_reg = 1'b1;
     reg rdy_adv = 1'b1;
-    
+
     assign rdy = rdy_reg;
-    
-    
+
     always @(posedge clk)
         //
         if (ena) rdy_reg <= 1'b0;
         else     rdy_reg <= rdy_adv;
-            
+
+
+    //
+    // Helper Tasks
+    //
     task advance_rcmb_msb_delay;
-        input [15:0] dout_x;
-        input [15:0] dout_y;
-        input [ 7:0] cnt;
-        input        flag;
+        input [   WORD_W -1:0] dout_x;
+        input [   WORD_W -1:0] dout_y;
+        input [OP_ADDR_W -1:0] cnt;
+        input                  flag;
         begin
             //
             rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
@@ -905,134 +892,119 @@ module modexpng_recombinator_block
     endtask
          
     task shift_rcmb_msb_carry;
-        input [15:0] dout_x;
-        input [15:0] dout_y;
+        input [WORD_W -1:0] dout_x;
+        input [WORD_W -1:0] dout_y;
         begin
-            rcmb_xy_msb_carry_0 <= {dout_y, dout_x};
-            rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0;
+            rcmb_x_msb_carry_0 <= dout_x;
+            rcmb_y_msb_carry_0 <= dout_y;
+            rcmb_x_msb_carry_1 <= rcmb_x_msb_carry_0;
+            rcmb_y_msb_carry_1 <= rcmb_y_msb_carry_0;
         end
     endtask
     
     task _update_wide;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        input                    valid;
         begin
-            wide_xy_bank       <= bank;
-            wide_xy_addr       <= addr;
-            wide_x_dout        <= dout_x;
-            wide_y_dout        <= dout_y;
+            wide_xy_bank  <= bank;
+            wide_xy_addr  <= addr;
+            wide_x_dout   <= dout_x;
+            wide_y_dout   <= dout_y;
             wide_xy_valid <= valid;
         end
     endtask
     
     task _update_narrow;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        input                    valid;
         begin
-            narrow_xy_bank       <= bank;
-            narrow_xy_addr       <= addr;
-            narrow_x_dout        <= dout_x;
-            narrow_y_dout        <= dout_y;
+            narrow_xy_bank  <= bank;
+            narrow_xy_addr  <= addr;
+            narrow_x_dout   <= dout_x;
+            narrow_y_dout   <= dout_y;
             narrow_xy_valid <= valid;
         end
     endtask
 
     task _update_rdct;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        input        valid;
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        input                    valid;
         begin
-            rdct_xy_bank       <= bank;
-            rdct_xy_addr       <= addr;
-            rdct_x_dout        <= dout_x;
-            rdct_y_dout        <= dout_y;
+            rdct_xy_bank  <= bank;
+            rdct_xy_addr  <= addr;
+            rdct_x_dout   <= dout_x;
+            rdct_y_dout   <= dout_y;
             rdct_xy_valid <= valid;
         end
     endtask
             
     task set_wide;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_wide(bank, addr, dout_x, dout_y, 1'b1);
-        end
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        _update_wide(bank, addr, dout_x, dout_y, 1'b1);
     endtask
     
     task set_narrow;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
-        end
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
     endtask
     
     task set_rdct;
-        input [ BANK_ADDR_W -1:0] bank;
-        input [ 7:0] addr;
-        input [17:0] dout_x;
-        input [17:0] dout_y;
-        begin
-            _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
-        end
+        input [BANK_ADDR_W -1:0] bank;
+        input [  OP_ADDR_W -1:0] addr;
+        input [ WORD_EXT_W -1:0] dout_x;
+        input [ WORD_EXT_W -1:0] dout_y;
+        _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
     endtask
     
     task clear_wide;
-        begin
-            _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
-        end
+        _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
     endtask
 
     task clear_narrow;
-        begin
-            _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
-        end
+        _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
     endtask
 
     task clear_rdct;
-        begin
-            _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
-        end
+        _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0);
     endtask
     
     task _set_cnt_lsb;
-        input [7:0] cnt;
-        input       wrapped;
-        begin
-            cnt_lsb <= cnt;
-            cnt_lsb_wrapped <= wrapped;
-        end
+        input [OP_ADDR_W-1:0] cnt;
+        input                 wrapped;
+        {cnt_lsb, cnt_lsb_wrapped} <= {cnt, wrapped};
     endtask
     
     task _set_cnt_msb;
-        input [7:0] cnt;
-        input       wrapped;
-        begin
-            cnt_msb <= cnt;
-            cnt_msb_wrapped <= wrapped;
-        end
+        input [OP_ADDR_W-1:0] cnt;
+        input                 wrapped;
+        {cnt_msb, cnt_msb_wrapped} <= {cnt, wrapped};
     endtask    
 
     task inc_cnt_lsb;
-        begin
-            if (cnt_lsb == word_index_last)
-                _set_cnt_lsb(8'd0, 1'b1);
-            else
-                _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
-        end
+        if (cnt_lsb == word_index_last) _set_cnt_lsb(OP_ADDR_ZERO, 1'b1);
+        else                            _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
     endtask
     
+    task inc_cnt_msb;
+        if (cnt_msb == word_index_last) _set_cnt_msb(OP_ADDR_ZERO, 1'b1);
+        else                            _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
+    endtask
+
     task inc_cnt_both;
         begin
             inc_cnt_lsb;
@@ -1040,31 +1012,18 @@ module modexpng_recombinator_block
         end
     endtask
     
-    task inc_cnt_msb;
-        begin
-            if (cnt_msb == word_index_last)
-                _set_cnt_msb(8'd0, 1'b1);
-            else
-                _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
-        end
-    endtask
-    
     task clr_cnt_lsb;
-        begin
-            _set_cnt_lsb(8'd0, 1'b0);
-        end
+        _set_cnt_lsb(OP_ADDR_ZERO, 1'b0);
     endtask
     
     task clr_cnt_msb;
-        begin
-            _set_cnt_msb(8'd0, 1'b0);
-        end
+        _set_cnt_msb(OP_ADDR_ZERO, 1'b0);
     endtask
+       
     
-   
-
-    wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; 
-    
+    //
+    // Main Process
+    //
     always @(posedge clk)
         //
         if (ena) begin
@@ -1073,58 +1032,75 @@ module modexpng_recombinator_block
         end else if (!rdy)
             //
             case (rcmb_mode)
-                2'd1: recombine_square();
-                2'd2: recombine_triangle();
-                2'd3: recombine_rectangle();
+                RCMB_MODE_SQUARE:    recombine_square();
+                RCMB_MODE_TRIANGLE:  recombine_triangle();
+                RCMB_MODE_RECTANGLE: recombine_rectangle();
             endcase
            
-    wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout};
-    wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout};
+           
+    //
+    // Padding
+    //
+    wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout};
+    wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout};
+    
+    wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext};
+    wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext};
+
+    wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout};
+    wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout};
 
-    wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout};
-    wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout};
+    wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]};
+    wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]};
     
-    wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]};
-    wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]};
+    
+    //
+    // Handy Signal
+    //
+    wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; 
 
-    wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}};
-    wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}};
-           
-           
+
+    //
+    // Recombination Task - Square
+    //
     task recombine_square;
         //
         begin
             //
             case (rcmb_xy_valid)
-                //
                 2'b01: inc_cnt_lsb; 
                 2'b10: inc_cnt_msb;
                 2'b11: inc_cnt_both;
-                //
             endcase            
             //
             case (rcmb_xy_valid)
                 //
-                2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
-                       else                       clear_wide;
+                2'b00: if (rcmb_msb_flag_delay_2)       set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+                       else                             clear_wide;
                 //
-                2'b01:                            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                2'b01:                                  set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
                 //
-                2'b10: if (cnt_msb < 8'd2)        clear_wide;                        
-                       else                       set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+                2'b10: if (cnt_msb < OP_ADDR_TWO)       clear_wide;                        
+                       else                             set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                 //
-                2'b11: if (cnt_lsb_wrapped)       set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); 
-                       else                       set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad,        rcmb_y_lsb_dout_pad);
+                2'b11: if (!cnt_lsb_wrapped)            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                       else begin
+                           if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                           else                         set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+                       end
                 //
             endcase            
             //
             case (rcmb_xy_valid)
                 //
-                2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                2'b10: if (cnt_msb < 8'd2)        shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
+                2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+                //
+                2'b01: if (rcmb_xy_lsb_cry)       shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
+                //
+                2'b10: if (cnt_msb < OP_ADDR_TWO) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
                 //
                 2'b11: begin                      advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
-                       if (cnt_lsb_wrapped)       shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
+                       if (rcmb_xy_lsb_cry)       shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
                        end
                 //
             endcase
@@ -1134,23 +1110,27 @@ module modexpng_recombinator_block
     endtask
     
     
+    //
+    // Recombination Task - Triangle
+    //
     task recombine_triangle;
         //
         begin
             //
             case (rcmb_xy_valid)
-                //
                 2'b01: inc_cnt_lsb(); 
-               //
             endcase            
             //
             case (rcmb_xy_valid)
                 //
-                2'b00:                        clear_narrow;
-                2'b01:  if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q,   cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
-                        else                  set_narrow(BANK_NARROW_EXT, 8'd1,    rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
-                2'b10:                        clear_narrow;
-                2'b11:                        clear_narrow;
+                2'b00:                       clear_narrow;
+                //
+                2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q,   cnt_lsb,       rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
+                       else                  set_narrow(BANK_NARROW_EXT, OP_ADDR_EXT_Q, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                //
+                2'b10:                       clear_narrow;
+                //
+                2'b11:                       clear_narrow;
                 //
             endcase
             //        
@@ -1159,44 +1139,49 @@ module modexpng_recombinator_block
     endtask
 
 
+    //
+    // Recombination Task - Rectangle
+    //
     task recombine_rectangle;
         //
         begin
             //
             case (rcmb_xy_valid)
-                //
-                2'b01:  inc_cnt_lsb; 
-                2'b10:  inc_cnt_msb;
-                2'b11:  inc_cnt_both;
-                //
+                2'b01: inc_cnt_lsb; 
+                2'b10: inc_cnt_msb;
+                2'b11: inc_cnt_both;
             endcase
-//            //
+            //
             case (rcmb_xy_valid)
-//                //
+                //
                 2'b00:  if (rcmb_msb_flag_delay_2)  set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
                         else                        clear_rdct;
+                //
                 2'b01:                              set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
-                2'b10:  if (!cnt_msb_wrapped) begin 
-                            if (cnt_msb < 8'd2)     clear_rdct;                        
-                            else                    set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-                        end else                    set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-                            
-                2'b11:  set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); 
-//                //
+                //
+                2'b10: if (!cnt_msb_wrapped) begin 
+                       if (cnt_msb < OP_ADDR_TWO)   clear_rdct;                        
+                       else                         set_rdct(BANK_RCMB_MH,  cnt_msb,      rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+                       end else                     set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+                //          
+                2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad,     rcmb_y_lsb_dout_pad);
+                       else                         set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+                //
             endcase            
-//            //
+            //
             case (rcmb_xy_valid)
-//                //
-                2'b00:  if (rcmb_msb_flag_delay_2)  advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                2'b10:  begin 
-                            if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
-                            if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
-                        end
-//                //
-                2'b11:  begin  advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
-                                   shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
-                        end
-//                //
+                //
+                2'b00: if (rcmb_msb_flag_delay_2)                       advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+                //
+                2'b01: if (rcmb_xy_lsb_cry)                             shift_rcmb_msb_carry(WORD_DNC, WORD_DNC);
+                //
+                2'b10: begin 
+                       if ((cnt_msb < OP_ADDR_TWO) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
+                       if (cnt_msb_wrapped)                             advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+                       end
+                //
+                2'b11:                                                  advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
+                //
             endcase
             //
         end
@@ -1204,50 +1189,19 @@ module modexpng_recombinator_block
     endtask
     
     
+    //
+    // Completion Logic
+    //
     always @(posedge clk)
         //
-        if (ena) begin
-            rdy_adv <= 1'b0;
-        end else if (!rdy_reg) begin
+        if (ena) rdy_adv <= 1'b0;
+        else if (!rdy_reg)
             //
             case (rcmb_mode)
-                //
-                2'd1:   case (rcmb_xy_valid)
-                            //
-                            2'b00: begin
-                                //
-                                if (rcmb_msb_flag_delay_2) begin
-                                    //
-                                    rdy_adv <= ~rcmb_msb_flag_delay_1;
-                                    //
-                                end
-                                //
-                            end
-                            //
-                        endcase
-                //
-                2'd2:   case (rcmb_xy_valid)
-                            //
-                            2'b01: rdy_adv <= cnt_lsb_wrapped;                                //
-                            //
-                        endcase
-                //
-                2'd3: case (rcmb_xy_valid)
-                                            //
-                                            2'b00: begin
-                                                //
-                                                if (rcmb_msb_flag_delay_2) begin
-                                                    //
-                                                    rdy_adv <= ~rcmb_msb_flag_delay_1;
-                                                    //
-                                                end
-                                                //
-                                            end
-                                            //
-                                        endcase
-                //
+                RCMB_MODE_SQUARE:    case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
+                RCMB_MODE_TRIANGLE:  case (rcmb_xy_valid) 2'b01:                            rdy_adv <= cnt_lsb_wrapped;        endcase
+                RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
             endcase
-            //        
-        end
-        
+
+
 endmodule
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
index ef0ca2d..9761d9c 100644
--- a/rtl/modexpng_recombinator_cell.v
+++ b/rtl/modexpng_recombinator_cell.v
@@ -33,14 +33,19 @@
 module modexpng_recombinator_cell
 (
     clk,
-    ce, clr,
-    din, dout
+    ce, clr, cry,
+    cin,
+    din, dout, dout_ext
 );
 
+
     //
     // Headers
     //
-    `include "../rtl/modexpng_parameters.vh"
+    `include "modexpng_parameters.vh"
+    `include "modexpng_dsp48e1.vh"
+    `include "modexpng_dsp_slice_primitives.vh"
+
     
     //
     // Ports
@@ -48,25 +53,80 @@ module modexpng_recombinator_cell
     input                clk;
     input                ce;
     input                clr;
+    input                cry;
+    input  [WORD_W -1:0] cin;
     input  [ MAC_W -1:0] din;
     output [WORD_W -1:0] dout;
+    output [WORD_W   :0] dout_ext;
+    
+    
+    //
+    // din <=> {z[13:0], y[15:0], x[15:0]}
+    //
+    wire [WORD_W -3:0] din_z = din[3 * WORD_W -3 : 2 * WORD_W]; // [45:32]
+    wire [WORD_W -1:0] din_y = din[2 * WORD_W -1 :     WORD_W]; // [31:16]
+    wire [WORD_W -1:0] din_x = din[    WORD_W -1 :          0]; // [15: 0]
+    
+    
+    //
+    // Delayed Clock Enable
+    //
+    reg ce_dly = 1'b0;
+    always @(posedge clk) ce_dly <= ce;
+    
+    
+    //
+    // DSP Slice Buses
+    //
+    wire [DSP48E1_A_W-1:0] a_int;
+    wire [DSP48E1_B_W-1:0] b_int;
+    wire [DSP48E1_C_W-1:0] c_int;
+    wire [DSP48E1_P_W-1:0] p_int;
 
-    reg [WORD_W -2:0] z;
-    reg [WORD_W   :0] y;
-    reg [WORD_W +1:0] x;
-
-    assign dout = x[WORD_W-1:0];
+    assign {a_int, b_int} = {{(DSP48E1_C_W-WORD_W){1'b0}}, cin};
+    assign {c_int}        = {din_z, 1'b0, din_y, 1'b1, din_x};
+    
     
-    wire [WORD_W -2:0] din_z = din[3*WORD_W -2 :2*WORD_W];  // [46:32]
-    wire [WORD_W -1:0] din_y = din[2*WORD_W -1 :  WORD_W];  // [31:16]
-    wire [WORD_W -1:0] din_x = din[  WORD_W -1 :       0];  // [15: 0]
+    //
+    // Combinational OPMODE Switch
+    //
+    reg [DSP48E1_OPMODE_W-1:0] opmode;
     
-    always @(posedge clk)
+    always @(clr, cry)
         //
-        if (ce) begin
-            z <= din_z;
-            y <= clr ? {1'b0,  din_y} : {1'b0,  din_y} + {2'b00, z};
-            x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0,  y} + {WORD_ZERO, x[WORD_EXT_W-1:WORD_W]};        
-        end
+        casez ({clr, cry})  // clr has priority over cry!
+            2'b1?: opmode = DSP48E1_OPMODE_Z0_YC_X0;
+            2'b00: opmode = DSP48E1_OPMODE_ZP17_YC_X0;
+            2'b01: opmode = DSP48E1_OPMODE_ZP17_YC_XAB;
+        endcase
+
+
+    //
+    // DSP Slice Instance
+    //
+    `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst
+    (
+        .clk          (clk),
+        .ce_abc       (ce),
+        .ce_p         (ce_dly),
+        .ce_ctrl      (ce),
+        .x            ({a_int, b_int}),
+        .y            (c_int),
+        .p            (p_int),
+        .op_mode      (opmode),
+        .alu_mode     (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
+        .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN),
+        .casc_p_in    (),
+        .casc_p_out   (),
+        .carry_out    ()
+    );
+
+
+    //
+    // Output Mapping
+    //
+    assign dout     = {p_int[WORD_W-1:0]};
+    assign dout_ext = {p_int[WORD_W+1], dout};
+    
     
 endmodule



More information about the Commits mailing list