[Cryptech-Commits] [core/math/modexpng] 25/92: Further work on the Montgomery modular multiplier. Can now to the "triangular" part of multiplication, i.e. compute the "magic" reduction coefficient Q = LSB(AB) * N_COEFF.

git at cryptech.is git at cryptech.is
Sat Mar 14 18:19:04 UTC 2020


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.

commit 9e9689d7b00ecdcc1c651f5e369e00a53d62df3c
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Oct 1 15:05:11 2019 +0300

    Further work on the Montgomery modular multiplier. Can now to the "triangular"
    part of multiplication, i.e. compute the "magic" reduction coefficient
    Q = LSB(AB) * N_COEFF.
---
 bench/tb_square.v                | 391 ++++++++++++++++++++++++++-------
 rtl/dsp/dsp_array.v              |  42 +++-
 rtl/modexpng_mmm_fsm.vh          |  10 +
 rtl/modexpng_part_recombinator.v | 455 ++++++++++++++++++++++++++++-----------
 4 files changed, 691 insertions(+), 207 deletions(-)

diff --git a/bench/tb_square.v b/bench/tb_square.v
index 61e5d8a..23831db 100644
--- a/bench/tb_square.v
+++ b/bench/tb_square.v
@@ -39,6 +39,8 @@ module tb_square;
     reg [17:0] T1[0:31];
     reg [17:0] T2[0:31];
     reg [17:0] AB[0:63];
+    reg [17:0] N_COEFF[0:32];
+    reg [17:0] Q[0:32];
 
 
     //
@@ -81,6 +83,26 @@ module tb_square;
         AB[56] = 18'h0e1c0; AB[57] = 18'h00989; AB[58] = 18'h01201; AB[59] = 18'h0e194;
         AB[60] = 18'h07f93; AB[61] = 18'h0e739; AB[62] = 18'h07cf6; AB[63] = 18'h019df;
         //
+        N_COEFF[ 0] = 18'h05a97; N_COEFF[ 1] = 18'h0ac69; N_COEFF[ 2] = 18'h0d51e; N_COEFF[ 3] = 18'h07326;
+        N_COEFF[ 4] = 18'h01053; N_COEFF[ 5] = 18'h0f68a; N_COEFF[ 6] = 18'h09c70; N_COEFF[ 7] = 18'h064f7;
+        N_COEFF[ 8] = 18'h01041; N_COEFF[ 9] = 18'h0c2bf; N_COEFF[10] = 18'h0f01f; N_COEFF[11] = 18'h01842;
+        N_COEFF[12] = 18'h0e69a; N_COEFF[13] = 18'h037ea; N_COEFF[14] = 18'h0b4a0; N_COEFF[15] = 18'h0c1ab;
+        N_COEFF[16] = 18'h0bd5b; N_COEFF[17] = 18'h09e5e; N_COEFF[18] = 18'h039bd; N_COEFF[19] = 18'h06430;
+        N_COEFF[20] = 18'h0b460; N_COEFF[21] = 18'h08bd4; N_COEFF[22] = 18'h09fcd; N_COEFF[23] = 18'h05391;
+        N_COEFF[24] = 18'h0fa45; N_COEFF[25] = 18'h08892; N_COEFF[26] = 18'h0732c; N_COEFF[27] = 18'h0baf6;
+        N_COEFF[28] = 18'h067a9; N_COEFF[29] = 18'h0b184; N_COEFF[30] = 18'h02089; N_COEFF[31] = 18'h0297b;
+        N_COEFF[32] = 18'h01810;
+        //
+        Q[ 0] = 18'h0ac02; Q[ 1] = 18'h0a026; Q[ 2] = 18'h06825; Q[ 3] = 18'h08f06;
+        Q[ 4] = 18'h03783; Q[ 5] = 18'h04cb5; Q[ 6] = 18'h0e8ea; Q[ 7] = 18'h083d2;
+        Q[ 8] = 18'h0fec9; Q[ 9] = 18'h066d9; Q[10] = 18'h0edad; Q[11] = 18'h06c12;
+        Q[12] = 18'h0a5fb; Q[13] = 18'h07295; Q[14] = 18'h06a0c; Q[15] = 18'h081a5;
+        Q[16] = 18'h03493; Q[17] = 18'h0a393; Q[18] = 18'h03da6; Q[19] = 18'h0beb1;
+        Q[20] = 18'h0d138; Q[21] = 18'h02815; Q[22] = 18'h0f191; Q[23] = 18'h03617;
+        Q[24] = 18'h08d4f; Q[25] = 18'h0f641; Q[26] = 18'h00e82; Q[27] = 18'h01774;
+        Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a;
+        Q[32] = 18'h0eef3;
+        //
     end
     
 
@@ -102,9 +124,10 @@ module tb_square;
     reg         mac_fat_bram_xy_ena = 1'b0;
     reg         mac_fat_bram_xy_reg_ena = 1'b0;
     reg  [ 2:0] mac_fat_bram_xy_bank;
-    reg  [ 7:0] mac_fat_bram_xy_addr[0:3];
-    wire [17:0] mac_fat_bram_x_dout[0:3];
-    wire [17:0] mac_fat_bram_y_dout[0:3];
+    reg  [ 2:0] mac_fat_bram_xy_bank_aux;
+    reg  [ 7:0] mac_fat_bram_xy_addr[0:4];
+    wire [17:0] mac_fat_bram_x_dout[0:4];
+    wire [17:0] mac_fat_bram_y_dout[0:4];
     
     reg        tb_slim_bram_xy_ena = 1'b0;
     reg [ 1:0] tb_slim_bram_xy_bank;
@@ -124,14 +147,14 @@ module tb_square;
         //
         mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
     
-    reg         mac_slim_bram_xy_reg_ena_dly = 1'b0;
+    reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
     always @(posedge clk)
         mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
     
     
     
     genvar z;
-    generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+    generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
         begin : gen_fat_bram
             //
             ip_bram_36k fat_bram_x
@@ -145,7 +168,8 @@ module tb_square;
                 .clkb   (clk),
                 .enb    (mac_fat_bram_xy_ena),
                 .regceb (mac_fat_bram_xy_reg_ena),
-                .addrb  ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+                .addrb  ({(z < (NUM_MULTS/2) ?
+                    mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}),
                 .doutb  (mac_fat_bram_x_dout[z])
             );
             //
@@ -160,7 +184,8 @@ module tb_square;
                 .clkb   (clk),
                 .enb    (mac_fat_bram_xy_ena),
                 .regceb (mac_fat_bram_xy_reg_ena),
-                .addrb  ({mac_fat_bram_xy_bank, mac_fat_bram_xy_addr[z]}),
+                .addrb  ({z < (NUM_MULTS/2) ?
+                    mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}),
                 .doutb  (mac_fat_bram_y_dout[z])
             );
             //
@@ -232,6 +257,23 @@ module tb_square;
             wait_clock_tick;
         end
 
+        for (i=0; i<32; i=i+1) begin
+            tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF;
+            tb_slim_bram_xy_addr = i[7:0];
+            tb_slim_bram_x_din = N_COEFF[i];
+            tb_slim_bram_y_din = N_COEFF[i];
+            
+            wait_clock_tick;
+        end
+        for (i=32; i<33; i=i+1) begin
+            tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT;
+            tb_slim_bram_xy_addr = 0;
+            tb_slim_bram_x_din = N_COEFF[i];
+            tb_slim_bram_y_din = N_COEFF[i];
+            
+            wait_clock_tick;
+        end
+
         tb_fat_bram_xy_ena = 1'b0;        
         tb_slim_bram_xy_ena = 1'b0;
         
@@ -256,6 +298,7 @@ module tb_square;
             wait_clock_tick;
             
         verify_ab;
+        verify_q;
 
     end
 
@@ -270,11 +313,11 @@ module tb_square;
     reg             dsp_x_ce_p;
     reg             dsp_x_ce_mode;
     
-    reg  [8   -1:0] dsp_x_mode_z = {8{1'b1}};
+    reg  [9   -1:0] dsp_x_mode_z = {9{1'b1}};
     
-    wire [4*18-1:0] dsp_x_a;
+    wire [5*18-1:0] dsp_x_a;
     reg  [1*17-1:0] dsp_x_b;
-    wire [8*47-1:0] dsp_x_p;
+    wire [9*47-1:0] dsp_x_p;
 
     reg             dsp_y_ce_a;
     reg             dsp_y_ce_b;
@@ -283,13 +326,13 @@ module tb_square;
     reg             dsp_y_ce_p;
     reg             dsp_y_ce_mode;
     
-    reg  [8   -1:0] dsp_y_mode_z = {8{1'b1}};
+    reg  [9   -1:0] dsp_y_mode_z = {9{1'b1}};
         
-    wire [4*18-1:0] dsp_y_a;
+    wire [5*18-1:0] dsp_y_a;
     reg  [1*17-1:0] dsp_y_b;
-    wire [8*47-1:0] dsp_y_p;
+    wire [9*47-1:0] dsp_y_p;
         
-    generate for (z=0; z<(NUM_MULTS/2); z=z+1)
+    generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
         begin : gen_dsp_xy_a_split
             assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
             assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
@@ -301,10 +344,10 @@ module tb_square;
         {dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
     
 
-    reg  [8   -1:0] dsp_xy_mode_z_adv1 = {8{1'b1}};
-    reg  [8   -1:0] dsp_xy_mode_z_adv2 = {8{1'b1}};
-    reg  [8   -1:0] dsp_xy_mode_z_adv3 = {8{1'b1}};
-    reg  [8   -1:0] dsp_xy_mode_z_adv4 = {8{1'b1}};
+    reg  [9   -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
+    reg  [9   -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
+    reg  [9   -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
+    reg  [9   -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
     
     dsp_array dsp_x
     (
@@ -355,16 +398,47 @@ module tb_square;
 
 
     localparam [7:0] index_last = 8'd31;
+    localparam [7:0] index_last_minus1 = index_last - 1'b1;
+
+
+    //
+    // Column
+    //
+    reg  [4:0] col_index;       // current column index
+    reg  [4:0] col_index_prev;  // delayed column index value
+    reg  [4:0] col_index_last;  // index of the very last column
+    reg  [4:0] col_index_next1;  // precomputed next column index
+    //reg  [4:0] col_index_next2;  // precomputed next column index after next column index
+    reg        col_is_last;     // flag set during the very last column
+
+    always @(posedge clk)
+        //
+        col_index_prev <= col_index;
     
 
     wire mult_square_addr_almost_done_comb;
     reg  mult_square_addr_almost_done_flop;
+        
+    //wire mult_square_addr_surely_done_comb;
+    reg  mult_square_addr_surely_done_flop; 
+
+    reg  mult_triangle_addr_almost_done_comb;
+    reg  mult_triangle_addr_almost_done_flop;
+        
+    //wire mult_triangle_addr_surely_done_comb;
+    reg  mult_triangle_addr_surely_done_flop;
+    reg  mult_triangle_addr_tardy_done_flop;
     
-    wire mult_square_addr_surely_done_comb;
-    reg  mult_square_addr_surely_done_flop;
+    assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
+
+    always @*
+        //
+        //if (!col_is_last)
+            mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+        //else
+            //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+            
     
-    assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == (index_last - 8'd1);
-    assign mult_square_addr_surely_done_comb = mac_slim_bram_xy_addr == index_last;
     
     always @(posedge clk)
         //
@@ -372,60 +446,130 @@ module tb_square;
         
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:
-                {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 
-                {mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
-                
+                mult_square_addr_almost_done_flop <= mult_square_addr_almost_done_comb;
+                //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 
+                //{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
             default:
-                {mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
+               mult_square_addr_almost_done_flop <= 1'b0;
+                //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
             
         endcase
 
-
-    //
-    // Column
-    //
-    reg  [4:0] col_index;
-    reg  [4:0] col_index_prev;
-    reg  [4:0] col_index_last;
+    always @(posedge clk)
+        //
+        mult_square_addr_surely_done_flop <= mult_square_addr_almost_done_flop;
 
     always @(posedge clk)
         //
-        col_index_prev <= col_index;
+        case (fsm_state)
+        
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
+                mult_triangle_addr_almost_done_flop <= mult_triangle_addr_almost_done_comb;
+                //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 
+                //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
+                
+            default:
+                mult_triangle_addr_almost_done_flop <= 1'b0;
+                //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
+            
+        endcase
+
+    always @(posedge clk) begin
+        //
+        mult_triangle_addr_surely_done_flop <= mult_triangle_addr_almost_done_flop;
+        mult_triangle_addr_tardy_done_flop  <= mult_triangle_addr_surely_done_flop;
+        //
+    end
+        
+
 
     //
     // FSM Transition Logic
     //
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
+    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
     
     
-
     always @(posedge clk)
         //
         case (fsm_state_next)
+            //
             FSM_STATE_MULT_SQUARE_COL_0_INIT,
             FSM_STATE_MULT_SQUARE_COL_N_INIT:   mac_slim_bram_xy_addr <= 8'd0;
             FSM_STATE_MULT_SQUARE_COL_0_TRIG,
             FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
+            //
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
+                8'd0 :  mac_slim_bram_xy_addr + 1'b1;
+            //
             default:                            mac_slim_bram_xy_addr <= 8'dX;
         endcase
 
+
+    wire [2:0] fat_bram_offset_rom[0:3];
+    
+    generate for (z=1; z<NUM_MULTS; z=z+2)
+        begin : gen_fat_bram_offset
+            assign fat_bram_offset_rom[(z-1)/2] = z[2:0];
+        end
+    endgenerate    
+        
     integer j;
-    always @(posedge clk)
+    always @(posedge clk) begin
         //
         for (j=0; j<(NUM_MULTS/2); j=j+1)
+            //
             case (fsm_state_next)
-                FSM_STATE_MULT_SQUARE_COL_0_INIT:   mac_fat_bram_xy_addr[j] <= 1 + 2 * j;
-                FSM_STATE_MULT_SQUARE_COL_N_INIT:   mac_fat_bram_xy_addr[j] <= 8 * (col_index + 1) + 1 + 2 * j;
+                //
+                // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+                //
+                FSM_STATE_MULT_SQUARE_COL_0_INIT:   mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+                FSM_STATE_MULT_SQUARE_COL_N_INIT:   mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
                 FSM_STATE_MULT_SQUARE_COL_0_TRIG,
                 FSM_STATE_MULT_SQUARE_COL_N_TRIG,
                 FSM_STATE_MULT_SQUARE_COL_0_BUSY,
                 FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+                //
+                FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+                FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
+                FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+                FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+                FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);                
+                //
                 default:                            mac_fat_bram_xy_addr[j] <= 8'dX;
             endcase
-
-
+            //
+        case (fsm_state_next)
+            //
+            // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
+            //
+            FSM_STATE_MULT_SQUARE_COL_0_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_SQUARE_COL_N_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_SQUARE_COL_0_TRIG,
+            FSM_STATE_MULT_SQUARE_COL_N_TRIG,
+            FSM_STATE_MULT_SQUARE_COL_0_BUSY,
+            FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+            //
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);                
+            //
+            default:                            mac_fat_bram_xy_addr[4] <= 8'dX;
+        endcase
+//
+    end
 
     always @(posedge clk)
         //
@@ -436,6 +580,13 @@ module tb_square;
             FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
+                BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF;  
             default:                            mac_slim_bram_xy_bank <= 2'bXX;
         endcase
 
@@ -447,8 +598,14 @@ module tb_square;
             FSM_STATE_MULT_SQUARE_COL_0_TRIG,
             FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
-            default:                            mac_fat_bram_xy_bank <= 3'bXXX;
+            FSM_STATE_MULT_SQUARE_COL_N_BUSY:   {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_T1T2}};
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL};
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}};
+            default:                            {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}};
         endcase
 
 
@@ -462,7 +619,13 @@ module tb_square;
             FSM_STATE_MULT_SQUARE_COL_N_TRIG:   mac_slim_bram_xy_ena <= 1'b1;
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
-            default:                            mac_slim_bram_xy_ena <= 1'b0;
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   mac_slim_bram_xy_ena <= 1'b1;
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop; 
+            default:                              mac_slim_bram_xy_ena <= 1'b0;
         endcase
 
     always @(posedge clk)
@@ -473,7 +636,13 @@ module tb_square;
             FSM_STATE_MULT_SQUARE_COL_0_TRIG,
             FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_fat_bram_xy_ena <= 1'b1;
+            FSM_STATE_MULT_SQUARE_COL_N_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_ena <= 1'b1;
             default:                            mac_fat_bram_xy_ena <= 1'b0;
         endcase
 
@@ -486,7 +655,7 @@ module tb_square;
         //
         mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
             
-        
+
     always @(posedge clk)
         //
         if (mac_slim_bram_xy_reg_ena_dly)
@@ -499,7 +668,7 @@ module tb_square;
         input [7:0] mac_fat_bram_xy_addr_current;
         input [7:0] mac_fat_bram_xy_addr_last;
         begin
-            if (mac_fat_bram_xy_addr_current > 0)
+            if (mac_fat_bram_xy_addr_current > 8'd0)
                 mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
             else
                 mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
@@ -541,26 +710,41 @@ module tb_square;
         //
         case (fsm_state_next)
             //
-            FSM_STATE_MULT_SQUARE_COL_0_INIT: begin
-                col_index      <= 5'd0;
-                col_index_last <= index_last[7:3];
+            FSM_STATE_MULT_SQUARE_COL_0_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin
+                col_index       <= 5'd0;
+                col_index_last  <= index_last[7:3];
+                col_index_next1 <= 5'd1;
+                //col_index_next2 <= 5'd2;
+                col_is_last     <= 1'b0;
+                
             end
             //
-            FSM_STATE_MULT_SQUARE_COL_N_INIT:
-                col_index <= col_index + 1'b1;
+            FSM_STATE_MULT_SQUARE_COL_N_INIT,
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin
+                col_index <= col_index_next1;
+                col_is_last <= col_index_next1 == col_index_last;
+                col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;   
+                //col_index_next2 <= col_index_next2 + 1'b1;
+            end
             //
         endcase
     
-    assign  fsm_state_after_mult_square = (col_index == col_index_last) ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+    assign  fsm_state_after_mult_square   = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+    assign  fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
     
     always @(posedge clk)
         //
         case (fsm_state_next)
             FSM_STATE_MULT_SQUARE_COL_0_TRIG,
-            FSM_STATE_MULT_SQUARE_COL_N_TRIG:   dsp_xy_mode_z_adv4 <= {8{1'b0}};
+            FSM_STATE_MULT_SQUARE_COL_N_TRIG:       dsp_xy_mode_z_adv4 <= {9{1'b0}};
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY:   dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
-            default:                            dsp_xy_mode_z_adv4 <= {8{1'b1}};
+            FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
+            default:                                dsp_xy_mode_z_adv4 <= {9{1'b1}};
         endcase
 
     always @(posedge clk) begin
@@ -571,25 +755,45 @@ module tb_square;
         dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
     end
     
-    function  [NUM_MULTS-1:0] calc_mac_mode_z_square;
-        input [          4:0] col_index_value;
-        input [          7:0] mac_slim_bram_xy_addr_value;
+    function  [NUM_MULTS:0] calc_mac_mode_z_square;
+        input [        4:0] col_index_value;
+        input [        7:0] mac_slim_bram_xy_addr_value;
         begin
             if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
                 case (mac_slim_bram_xy_addr_value[2:0])
-                    3'b000: calc_mac_mode_z_square = 8'b11111110;
-                    3'b001: calc_mac_mode_z_square = 8'b11111101;
-                    3'b010: calc_mac_mode_z_square = 8'b11111011;
-                    3'b011: calc_mac_mode_z_square = 8'b11110111;
-                    3'b100: calc_mac_mode_z_square = 8'b11101111;
-                    3'b101: calc_mac_mode_z_square = 8'b11011111;
-                    3'b110: calc_mac_mode_z_square = 8'b10111111;
-                    3'b111: calc_mac_mode_z_square = 8'b01111111;
+                    3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+                    3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+                    3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+                    3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+                    3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+                    3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+                    3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+                    3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
                 endcase
             else
-                calc_mac_mode_z_square = {NUM_MULTS{1'b1}};
+                calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
         end
     endfunction
+    /*
+    function  [NUM_MULTS:0] calc_mac_mode_z_triangle;
+        input [        4:0] col_index_value;
+        input [        7:0] mac_slim_bram_xy_addr_value;
+        begin
+            if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
+                case (mac_slim_bram_xy_addr_value[2:0])
+                    3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
+                    3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
+                    3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
+                    3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
+                    3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
+                    3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
+                    3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
+                    3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+                endcase
+            else
+                calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+        end
+    endfunction*/
 
     reg recomb_x_ena = 1'b0;
     reg recomb_y_ena = 1'b0;
@@ -623,6 +827,7 @@ module tb_square;
         .col_index                  (col_index),
         .col_index_last             (col_index_last),
         .slim_bram_xy_addr          (mac_slim_bram_xy_addr),
+        .slim_bram_xy_bank          (mac_slim_bram_xy_bank),
         .fat_bram_xy_bank           (recomb_fat_bram_xy_bank),
         .fat_bram_xy_addr           (recomb_fat_bram_xy_addr),
         .fat_bram_x_dout            (recomb_fat_bram_x_dout),
@@ -631,14 +836,17 @@ module tb_square;
     );
     
     reg [17:0] AB_READ[0:63];
+    reg [17:0] Q_READ[0:32];
     
     always @(posedge clk)
         //
         if (recomb_fat_bram_xy_dout_valid)
             //
             case (recomb_fat_bram_xy_bank)
-                3'd1: AB_READ[recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;
-                3'd2: AB_READ[32 + recomb_fat_bram_xy_addr] <= recomb_fat_bram_x_dout;  
+                3'd1: AB_READ[     (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                3'd3: Q_READ [     (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
             endcase
             
 
@@ -663,11 +871,8 @@ module tb_square;
             mgr_fat_bram_x_din   <= {18{1'bX}};
             mgr_fat_bram_y_din   <= {18{1'bX}};
         end
-           
-  
-    
-    
-    
+
+
     task verify_ab;
         reg verify_ab_ok;
         begin
@@ -687,6 +892,28 @@ module tb_square;
     endtask
 
 
+    task verify_q;
+        reg verify_q_ok;
+        begin
+            verify_q_ok = 1;
+            for (i=0; i<33; i=i+1)
+                if (Q_READ[i] === Q[i])
+                    $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x", i, Q[i], Q_READ[i]);
+                else begin
+                    $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, Q[i], Q_READ[i]);
+                    verify_q_ok = 0;
+                end
+            if (verify_q_ok)
+                $display("Q is OK.");
+            else
+                $display("Q is WRONG!");
+        end
+    endtask
+
+
+    wire mult_square_addr_done = mult_square_addr_surely_done_flop;
+    
+    wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
 
     always @* begin
         //
@@ -697,13 +924,23 @@ module tb_square;
                         
             FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
             FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
-            FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
+            FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
             
             FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
             FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
-            FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_surely_done_flop ? fsm_state_after_mult_square    : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+            FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square    : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
+            
+            FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         recomb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
+
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = mult_triangle_addr_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;     
+            
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
             
-            FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+            FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
             
             default:                          fsm_state_next =                         FSM_STATE_IDLE                   ;
 
diff --git a/rtl/dsp/dsp_array.v b/rtl/dsp/dsp_array.v
index 178f87f..2a050d4 100644
--- a/rtl/dsp/dsp_array.v
+++ b/rtl/dsp/dsp_array.v
@@ -8,11 +8,11 @@ module dsp_array
     input             ce_p,
     input             ce_mode,
 
-    input  [8   -1:0] mode_z,
+    input  [9   -1:0] mode_z,
     
-    input  [4*18-1:0] a,
+    input  [5*18-1:0] a,
     input  [1*17-1:0] b,
-    output [8*47-1:0] p
+    output [9*47-1:0] p
 );
 
     `include "../modexpng_parameters_x8.vh"
@@ -37,7 +37,7 @@ module dsp_array
     genvar z;
     generate for (z=0; z<(NUM_MULTS/2); z=z+1)
         //
-        begin : DSP48E1
+        begin : gen_DSP48E1
             //        
             dsp_slice #
             (
@@ -64,7 +64,7 @@ module dsp_array
                 .opmode         ({1'b0, mode_z[2*z], 1'b0, 2'b01, 2'b01}),
                 .alumode        (4'b0000),
                 
-                .casc_a_in      ({17{1'b0}}),
+                .casc_a_in      ({18{1'b0}}),
                 .casc_b_in      ({17{1'b0}}),
                 
                 .casc_a_out     (casc_a[z]),
@@ -107,5 +107,37 @@ module dsp_array
         //
     endgenerate
 
+    dsp_slice #
+    (
+        .AB_INPUT("DIRECT"),
+        .B_REG(2)
+    )
+    dsp_aux
+    (
+        .clk            (clk),
+        
+        .ce_a1          (ce_a0),
+        .ce_b1          (ce_b0),
+        .ce_a2          (ce_a1),
+        .ce_b2          (ce_b1),
+        .ce_m           (ce_m),
+        .ce_p           (ce_p),
+        .ce_mode        (ce_mode),
+        
+        .a              (a[4*18+:18]),
+        .b              (b),
+        .p              (p[47*2*4+:47]),
+        
+        .inmode         (5'b00000),
+        .opmode         ({1'b0, mode_z[2*4], 1'b0, 2'b01, 2'b01}),
+        .alumode        (4'b0000),
+        
+        .casc_a_in      ({18{1'b0}}),
+        .casc_b_in      ({17{1'b0}}),
+        
+        .casc_a_out     (),
+        .casc_b_out     ()
+    );
+
 
 endmodule
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh
index c237a0b..2700a42 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm.vh
@@ -19,6 +19,16 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_TRIG = 15;
 localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_COL_N_BUSY = 16;
     
 localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_SQUARE_HOLDOFF = 17;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_INIT = 21;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_TRIG = 22;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_0_BUSY = 23;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_INIT = 24;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_TRIG = 25;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
     
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
+
 localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
     
\ No newline at end of file
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
index db4774b..c51e7ef 100644
--- a/rtl/modexpng_part_recombinator.v
+++ b/rtl/modexpng_part_recombinator.v
@@ -7,7 +7,8 @@ module modexpng_part_recombinator
     dsp_x_ce_p, dsp_y_ce_p,
     ena_x,   ena_y,
     dsp_x_p, dsp_y_p,
-    col_index, col_index_last, slim_bram_xy_addr,
+    col_index, col_index_last,
+    slim_bram_xy_addr, slim_bram_xy_bank,
     fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid
 );
 
@@ -28,11 +29,12 @@ module modexpng_part_recombinator
     input                        dsp_y_ce_p;
     input                        ena_x;
     input                        ena_y;
-    input  [8*47-1:0] dsp_x_p;
-    input  [8*47-1:0] dsp_y_p;
+    input  [9*47-1:0] dsp_x_p;
+    input  [9*47-1:0] dsp_y_p;
     input  [     4:0] col_index;
     input  [     4:0] col_index_last;
     input  [     7:0] slim_bram_xy_addr;
+    input  [     1:0] slim_bram_xy_bank;
 
     output [     2:0] fat_bram_xy_bank;
     output [     7:0] fat_bram_xy_addr;
@@ -44,18 +46,18 @@ module modexpng_part_recombinator
     //
     // Latches
     //
-    reg  [1*47-1:0] dsp_x_p_latch[0:7];
-    reg  [1*47-1:0] dsp_y_p_latch[0:7];
+    reg  [1*47-1:0] dsp_x_p_latch[0:8];
+    reg  [1*47-1:0] dsp_y_p_latch[0:8];
 
 
     //
     // Mapping
     //
-    wire [46:0] dsp_x_p_split[0:7];
-    wire [46:0] dsp_y_p_split[0:7];
+    wire [46:0] dsp_x_p_split[0:8];
+    wire [46:0] dsp_y_p_split[0:8];
     
     genvar z;
-    generate for (z=0; z<NUM_MULTS; z=z+1)
+    generate for (z=0; z<(NUM_MULTS+1); z=z+1)
         begin : gen_dsp_xy_p_split
             assign dsp_x_p_split[z] = dsp_x_p[47*z+:47];
             assign dsp_y_p_split[z] = dsp_y_p[47*z+:47];
@@ -83,6 +85,8 @@ module modexpng_part_recombinator
     // valid
     reg       x_valid_lsb = 1'b0;
     reg       y_valid_lsb = 1'b0;
+    reg       x_aux_lsb   = 1'b0;
+    reg       y_aux_lsb   = 1'b0;
     reg       x_valid_msb = 1'b0;
     reg       y_valid_msb = 1'b0;
     
@@ -106,6 +110,10 @@ module modexpng_part_recombinator
     reg       x_valid_latch_lsb = 1'b0;
     reg       y_valid_latch_lsb = 1'b0;
     
+    // aux - latch
+    reg       x_aux_latch_lsb = 1'b0;
+    reg       y_aux_latch_lsb = 1'b0;
+    
     // bitmap - latch
     reg [7:0] x_bitmap_latch_lsb = {8{1'b0}};
     reg [7:0] y_bitmap_latch_lsb = {8{1'b0}};
@@ -125,6 +133,7 @@ module modexpng_part_recombinator
     // 
     reg       xy_valid_lsb_adv[1:6];
     reg       xy_valid_msb_adv[1:6];
+    reg       xy_aux_lsb_adv[1:6];
     reg [7:0] xy_bitmap_lsb_adv[1:6];
     reg [7:0] xy_bitmap_msb_adv[1:6];
     reg [2:0] xy_index_lsb_adv[1:6];
@@ -132,11 +141,25 @@ module modexpng_part_recombinator
     reg       xy_purge_lsb_adv[1:6];
     reg       xy_purge_msb_adv[1:6];
     
-    
+    reg [1:0] rcmb_mode;
+       
+    always @(posedge clk)
+       //
+       if (ena_x && ena_y)
+           //
+           case (fsm_state_next)
+               FSM_STATE_MULT_SQUARE_COL_0_BUSY:       rcmb_mode <= 2'd1;
+               FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:     rcmb_mode <= 2'd2;
+               //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:    rcmb_mode <= 2'd3;
+               default:                                rcmb_mode <= 2'd0;
+           endcase
+
+               
     integer i;
     initial for (i=1; i<6; i=i+1) begin
         xy_valid_lsb_adv[i] = 1'b0;
         xy_valid_msb_adv[i] = 1'b0;
+        xy_aux_lsb_adv[i] = 1'b0;
         xy_bitmap_lsb_adv[i] = {8{1'b0}};
         xy_bitmap_msb_adv[i] = {8{1'b0}};
         xy_index_lsb_adv[i] = 3'dX;
@@ -145,7 +168,7 @@ module modexpng_part_recombinator
         xy_purge_msb_adv[i] = 1'b0;
     end
     
-    function  [0:0] calc_square_valid_lsb;
+    function        calc_square_valid_lsb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
         input [7:0] slim_bram_xy_addr_value;
@@ -159,6 +182,40 @@ module modexpng_part_recombinator
         end
     endfunction
     
+    function        calc_triangle_valid_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        begin
+            //
+            if (slim_bram_xy_addr_value[7:3] == col_index_value)
+                calc_triangle_valid_lsb = 1'b1;
+            else
+                calc_triangle_valid_lsb = 1'b0;
+            //
+        end
+    endfunction
+
+    function        calc_triangle_aux_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        begin
+            //
+            if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT)
+                calc_triangle_aux_lsb = 1'b1;
+            else
+                calc_triangle_aux_lsb = 1'b0;
+            //
+            //if (slim_bram_xy_addr_value[7:3] == col_index_value)
+                //calc_triangle_aux_lsb = 1'b1;
+            //else
+                //calc_triangle_aux_lsb = 1'b0;
+            //
+        end
+    endfunction
+    
     function  [7:0] calc_square_bitmap_lsb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
@@ -183,7 +240,32 @@ module modexpng_part_recombinator
             //
         end
     endfunction
-
+    
+    function  [7:0] calc_triangle_bitmap_lsb;
+           input [4:0] col_index_value;
+           input [4:0] col_index_last_value;
+           input [7:0] slim_bram_xy_addr_value;
+           begin
+               //
+               if (slim_bram_xy_addr_value[7:3] == col_index_value)
+                   //
+                   case (slim_bram_xy_addr_value[2:0])
+                       3'b000: calc_triangle_bitmap_lsb = 8'b00000001;
+                       3'b001: calc_triangle_bitmap_lsb = 8'b00000010;
+                       3'b010: calc_triangle_bitmap_lsb = 8'b00000100;
+                       3'b011: calc_triangle_bitmap_lsb = 8'b00001000;
+                       3'b100: calc_triangle_bitmap_lsb = 8'b00010000;
+                       3'b101: calc_triangle_bitmap_lsb = 8'b00100000;
+                       3'b110: calc_triangle_bitmap_lsb = 8'b01000000;
+                       3'b111: calc_triangle_bitmap_lsb = 8'b10000000;
+                   endcase
+                   //
+               else
+                   calc_triangle_bitmap_lsb = {8{1'b0}};
+               //
+           end
+       endfunction
+       
     function  [2:0] calc_square_index_lsb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
@@ -208,6 +290,31 @@ module modexpng_part_recombinator
             //
         end
     endfunction
+
+    function  [2:0] calc_triangle_index_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        begin
+            //
+            if (slim_bram_xy_addr_value[7:3] == col_index_value)
+                //
+                case (slim_bram_xy_addr_value[2:0])
+                    3'b000: calc_triangle_index_lsb = 3'd0;
+                    3'b001: calc_triangle_index_lsb = 3'd1;
+                    3'b010: calc_triangle_index_lsb = 3'd2;
+                    3'b011: calc_triangle_index_lsb = 3'd3;
+                    3'b100: calc_triangle_index_lsb = 3'd4;
+                    3'b101: calc_triangle_index_lsb = 3'd5;
+                    3'b110: calc_triangle_index_lsb = 3'd6;
+                    3'b111: calc_triangle_index_lsb = 3'd7;
+                endcase
+                //
+            else
+                calc_triangle_index_lsb = 3'dX;
+            //
+        end
+    endfunction
     
     function        calc_square_purge_lsb;
         input [4:0] col_index_value;
@@ -271,10 +378,10 @@ module modexpng_part_recombinator
 
     
     reg         recomb_lsb_ce = 1'b0;
+    reg         recomb_lsb_ce_aux;
     reg  [ 2:0] recomb_lsb_ce_purge = 3'b000;
-    wire        recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_purge[0];
+    wire        recomb_lsb_ce_combined = recomb_lsb_ce | recomb_lsb_ce_aux | recomb_lsb_ce_purge[0];
     reg         recomb_lsb_clr;
-    reg         recomb_lsb_vld = 1'b0;
 
     reg  [46:0] recomb_lsb_din;
     wire [15:0] recomb_lsb_dout;
@@ -283,12 +390,7 @@ module modexpng_part_recombinator
     reg  [ 1:0] recomb_msb_ce_purge = 2'b00;
     wire        recomb_msb_ce_combined = recomb_msb_ce | recomb_msb_ce_purge[0];
     reg         recomb_msb_clr;
-    reg         recomb_msb_vld = 1'b0;
     
-    always @(posedge clk)
-        //
-        {recomb_msb_vld, recomb_lsb_vld} <= {recomb_msb_ce_combined, recomb_lsb_ce_combined};
-
     reg  [46:0] recomb_msb_din;
     wire [15:0] recomb_msb_dout;
     
@@ -313,6 +415,7 @@ module modexpng_part_recombinator
     always @(posedge clk) begin
         //
         recomb_lsb_ce <= x_valid_latch_lsb;
+        recomb_lsb_ce_aux <= x_aux_latch_lsb;
         recomb_msb_ce <= x_bitmap_latch_msb[0];
         //
         if (x_purge_latch_lsb)
@@ -342,6 +445,8 @@ module modexpng_part_recombinator
         //
         if (x_valid_latch_lsb)
             recomb_lsb_din <= dsp_x_p_latch[x_index_latch_lsb];
+        else if (x_aux_latch_lsb)
+            recomb_lsb_din <= dsp_x_p_latch[8];
         else
             recomb_lsb_din <= {47{1'b0}};
 
@@ -363,6 +468,7 @@ module modexpng_part_recombinator
             FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
                 //
                 xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, slim_bram_xy_addr);
+                xy_aux_lsb_adv   [6] <= 1'b0;
                 xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr);
                 xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, slim_bram_xy_addr);
                 xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
@@ -373,9 +479,27 @@ module modexpng_part_recombinator
                 //
             end
             //
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
+                //
+                xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr); /// bank
+                xy_aux_lsb_adv   [6] <= calc_triangle_aux_lsb   (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+                xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr); //! bank
+                xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr); // ! bank!!!
+                xy_purge_lsb_adv [6] <= 1'b0;
+                //
+                xy_valid_msb_adv [6] <= 1'b0;
+                xy_bitmap_msb_adv[6] <= {8{1'b0}};
+                xy_purge_msb_adv [6] <= 1'b0;
+                //
+            end            
+            //
             default: begin
                 //
                 xy_valid_lsb_adv [6] <= 1'b0;
+                xy_aux_lsb_adv   [6] <= 1'b0;
                 xy_bitmap_lsb_adv[6] <= {8{1'b0}};
                 xy_index_lsb_adv [6] <= 3'dX;
                 xy_purge_lsb_adv [6] <= 1'b0;
@@ -392,11 +516,13 @@ module modexpng_part_recombinator
     always @(posedge clk) begin
         //
         {y_valid_lsb,  x_valid_lsb}  <= {2{xy_valid_lsb_adv [1]}};
+        {y_aux_lsb,    x_aux_lsb}    <= {2{xy_aux_lsb_adv   [1]}};
         {y_bitmap_lsb, x_bitmap_lsb} <= {2{xy_bitmap_lsb_adv[1]}};
         {y_index_lsb,  x_index_lsb}  <= {2{xy_index_lsb_adv [1]}};
         {y_purge_lsb,  x_purge_lsb}  <= {2{xy_purge_lsb_adv [1]}};
         //
         {y_valid_latch_lsb,  x_valid_latch_lsb}  <= {y_valid_lsb,  x_valid_lsb};
+        {y_aux_latch_lsb,    x_aux_latch_lsb}    <= {y_aux_lsb,    x_aux_lsb};
         {y_bitmap_latch_lsb, x_bitmap_latch_lsb} <= {y_bitmap_lsb, x_bitmap_lsb};
         {y_index_latch_lsb,  x_index_latch_lsb}  <= {y_index_lsb,  x_index_lsb};
         {y_purge_latch_lsb,  x_purge_latch_lsb}  <= {y_purge_lsb,  x_purge_lsb};
@@ -415,6 +541,7 @@ module modexpng_part_recombinator
         //
         for (i=1; i<6; i=i+1) begin
             xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
+            xy_aux_lsb_adv   [i] <= xy_aux_lsb_adv   [i+1];
             xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
             xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
             xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
@@ -436,7 +563,7 @@ module modexpng_part_recombinator
                 else
                     dsp_x_p_latch[i] <= {47{1'bX}};
             //
-        else if (dsp_x_ce_p_dly1)
+        else if (dsp_x_ce_p_dly1) begin
             //
             for (i=0; i<8; i=i+1)
                 //
@@ -444,6 +571,11 @@ module modexpng_part_recombinator
                     dsp_x_p_latch[i] <= dsp_x_p_split[i];
                 else if (x_valid_msb && x_bitmap_msb[i])
                     dsp_x_p_latch[i] <= dsp_x_p_split[i];
+            //
+            if (x_aux_lsb)
+                dsp_x_p_latch[8] <= dsp_x_p_split[8];
+            //
+        end
 
     reg recomb_x_lsb_dout_valid = 1'b0;
     reg recomb_x_msb_dout_valid = 1'b0;
@@ -493,12 +625,187 @@ module modexpng_part_recombinator
         else
             rdy_reg <= rdy_adv;
 
+            
+    task advance_recomb_msb_dout_delay;
+        input [15:0] dout;
+        input [ 7:0] cnt;
+        begin
+            recomb_msb_dout_delay_0 <= dout;
+            recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
+            recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
+            //
+            recomb_msb_cnt_delay_0 <= cnt;
+            recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
+            recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
+        end
+    endtask
+         
+    task shift_recomb_msb_dout_carry;
+        input [15:0] dout;
+        begin
+            recomb_msb_dout_carry_0 <= dout;
+            recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
+        end
+    endtask
+    
+    task _update_fat_bram_regs;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
+        input        valid;
+        begin
+            fat_bram_xy_bank_reg       <= bank;
+            fat_bram_xy_addr_reg       <= addr;
+            fat_bram_x_dout_reg        <= dout_x;
+            fat_bram_y_dout_reg        <= dout_y;
+            fat_bram_xy_dout_valid_reg <= 1'b1;
+        end
+    endtask
+    
+            
+    task set_fat_bram_regs;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
+        begin
+            _update_fat_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
+        end
+    endtask
+    
+    task clear_fat_bram_regs;
+        begin
+            _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+        end
+    endtask
+    
+    task _set_fat_bram_cnt_lsb;
+        input [7:0] cnt;
+        begin
+            fat_bram_xy_cnt_lsb <= cnt;
+        end
+    endtask
+    task _set_fat_bram_cnt_msb;
+        input [7:0] cnt;
+        begin
+            fat_bram_xy_cnt_msb <= cnt;
+        end
+    endtask    
+
+    task inc_fat_bram_cnt_lsb;
+        begin
+            _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1);
+        end
+    endtask
+    task inc_fat_bram_cnt_msb;
+        begin
+            _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1);
+        end
+    endtask
+    
+    task clr_fat_bram_cnt_lsb;
+        begin
+            _set_fat_bram_cnt_lsb(8'd0);
+        end
+    endtask
+    task clr_fat_bram_cnt_msb;
+        begin
+            _set_fat_bram_cnt_msb(8'd0);
+        end
+    endtask
+    
+    
+   
+    
+
+    wire [1:0] rcmb_xy_dout_valid = {recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid}; 
+    
+    always @(posedge clk)
+        //
+        if (ena_x & ena_y) begin
+            clr_fat_bram_cnt_lsb();
+            clr_fat_bram_cnt_msb();
+        end else begin  // if not ready???
+            //
+            case (rcmb_mode)
+                2'd1: recombine_square();
+                2'd2: recombine_triangle();
+            endcase
+            //
+        end
+           
+    task recombine_square;
+        begin
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b01: inc_fat_bram_cnt_lsb(); 
+                2'b10: inc_fat_bram_cnt_msb();
+                2'b11: begin
+                    if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb();
+                    else                                   inc_fat_bram_cnt_lsb();
+                    inc_fat_bram_cnt_msb();
+                end
+                //
+            endcase            
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b00:  if (recomb_msb_cnt_delay_2 > 8'd0)      set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+                        else                                    clear_fat_bram_regs();
+                2'b01:                                          set_fat_bram_regs(BANK_FAT_ABL,        fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
+                2'b10:  if (fat_bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
+                        else                                    set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});                        
+                2'b11:  if (fat_bram_xy_cnt_lsb < index_last)   set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
+                        else                                    set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                //
+            endcase            
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b00:  if (recomb_msb_cnt_delay_2 > 8'd0)      advance_recomb_msb_dout_delay(16'hXXXX, 8'd0);
+                2'b10:  if (fat_bram_xy_cnt_msb < 8'd2)         shift_recomb_msb_dout_carry(recomb_msb_dout);
+                //
+                2'b11:  begin                                   advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb);
+                        if (fat_bram_xy_cnt_lsb < index_last)   shift_recomb_msb_dout_carry({16{1'bX}});
+                        end
+                //
+            endcase
+            //        
+        end
+        //
+    endtask
+    
+    
+    task recombine_triangle;
+        begin
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b01: begin                                  inc_fat_bram_cnt_lsb();
+                       if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb();
+                end 
+                //
+            endcase            
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b00:  clear_fat_bram_regs();
+                2'b01:  if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q,     fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
+                        else                             set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                //
+            endcase
+            //        
+        end
+    endtask
+
+    
+    
     always @(posedge clk)
         //
         if (ena_x & ena_y) begin
             rdy_adv <= 1'b0;
-            fat_bram_xy_cnt_lsb <= 8'd0;
-            fat_bram_xy_cnt_msb <= 8'd0;
         end else begin
             //
             case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid})
@@ -509,115 +816,13 @@ module modexpng_part_recombinator
                         //
                         rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0;
                         //
-                        recomb_msb_dout_delay_0 <= {18{1'bX}};
-                        recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
-                        recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
-                        //
-                        recomb_msb_cnt_delay_0 <= 8'd0;
-                        recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
-                        recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
-                        //
-                        fat_bram_xy_bank_reg        <= BANK_FAT_ABH;
-                        fat_bram_xy_addr_reg        <= recomb_msb_cnt_delay_2;
-                        fat_bram_x_dout_reg         <= recomb_msb_dout_delay_2;
-//                      fat_bram_y_dout_reg         <= {18{1'bX}};
-                        fat_bram_xy_dout_valid_reg  <= 1'b1;
-                        //
-                    end else begin
-                        //
-                        fat_bram_xy_bank_reg        <= 3'bXXX;
-                        fat_bram_xy_addr_reg        <= 8'hXX;
-                        fat_bram_x_dout_reg         <= {18{1'bX}};
-                        fat_bram_y_dout_reg         <= {18{1'bX}};
-                        fat_bram_xy_dout_valid_reg  <= 1'b0;
-                        //
-                    end
-                    //
-                end
-                //
-                2'b01: begin
-                    //
-                    fat_bram_xy_bank_reg        <= BANK_FAT_ABL;
-                    fat_bram_xy_addr_reg        <= fat_bram_xy_cnt_lsb;
-                    fat_bram_x_dout_reg         <= {2'b00, recomb_lsb_dout};
-//                  fat_bram_y_dout_reg
-                    fat_bram_xy_dout_valid_reg  <= 1'b1;
-                    //
-                    fat_bram_xy_cnt_lsb         <= fat_bram_xy_cnt_lsb + 1'b1; 
-                    //
-                end
-                //
-                2'b10: begin
-                    //
-                    if (fat_bram_xy_cnt_msb < 8'd2) begin
-                        //
-                        recomb_msb_dout_carry_0 <= recomb_msb_dout;
-                        recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;
-                        //
-                        fat_bram_xy_bank_reg        <= 3'bXXX;
-                        fat_bram_xy_addr_reg        <= 8'hXX;
-                        fat_bram_x_dout_reg         <= {18{1'bX}};
-    //                  fat_bram_y_dout_reg
-                        fat_bram_xy_dout_valid_reg  <= 1'b0;
-                        //                        
-                    end else begin
-                        //
-                        fat_bram_xy_bank_reg        <= BANK_FAT_ABH;
-                        fat_bram_xy_addr_reg        <= fat_bram_xy_cnt_msb;
-                        fat_bram_x_dout_reg         <= {2'b00, recomb_msb_dout};
-    //                  fat_bram_y_dout_reg
-                        fat_bram_xy_dout_valid_reg  <= 1'b1;                        
-                        //
-                    end
-                    //
-                    fat_bram_xy_cnt_msb         <= fat_bram_xy_cnt_msb + 1'b1;
-                    //                
-                end
-                //
-                2'b11: begin
-                    //
-                    if (fat_bram_xy_cnt_lsb == index_last) begin
-                        //
-                        fat_bram_xy_bank_reg        <= BANK_FAT_ABL;
-                        fat_bram_xy_addr_reg        <= fat_bram_xy_cnt_lsb;
-                        fat_bram_x_dout_reg         <= {2'b00, recomb_lsb_dout};
-//                      fat_bram_y_dout_reg         <= {18{1'bX}};
-                        fat_bram_xy_dout_valid_reg  <= 1'b1;
-                        //
-                        fat_bram_xy_cnt_lsb         <= 8'd0;
-                        //
-                    end else begin
-                        //
-                        fat_bram_xy_bank_reg        <= BANK_FAT_ABH;
-                        fat_bram_xy_addr_reg        <= fat_bram_xy_cnt_lsb;
-                        fat_bram_x_dout_reg         <= {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}};
-//                      fat_bram_y_dout_reg         <= {18{1'bX}};
-                        fat_bram_xy_dout_valid_reg  <= 1'b1;
-                        //
-                        fat_bram_xy_cnt_lsb         <= fat_bram_xy_cnt_lsb + 1'b1;
-                        //
-                        recomb_msb_dout_carry_0 <= {16{1'bX}};
-                        recomb_msb_dout_carry_1 <= recomb_msb_dout_carry_0;                        
-                        //
                     end
                     //
-                    recomb_msb_dout_delay_0 <= recomb_msb_dout;
-                    recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
-                    recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
-                    //
-                    recomb_msb_cnt_delay_0 <= fat_bram_xy_cnt_msb;
-                    recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
-                    recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
-                    //
-                    fat_bram_xy_cnt_msb         <= fat_bram_xy_cnt_msb + 1'b1;
-                    //
                 end
-                //
             endcase
             //        
         end
-    
-    
-    
+
+
     
 endmodule



More information about the Commits mailing list