[Cryptech-Commits] [user/shatov/modexpng] 03/05: Further work on the Montgomery modular multiplier. Added the third "rectangular" stage of the multiplication process, i.e. computation of how many copies of the modulus N to add to the intermediate product AB to zeroize the lower half: M = Q * N.

git at cryptech.is git at cryptech.is
Tue Oct 1 12:18:47 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit ecf0374b7bbf1c1ea56fea8f1acaeea85c3612d2
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Oct 1 15:07:56 2019 +0300

    Further work on the Montgomery modular multiplier. Added the third
    "rectangular" stage of the multiplication process, i.e. computation of how many
    copies of the modulus N to add to the intermediate product AB to zeroize the
    lower half: M = Q * N.
---
 bench/tb_square.v                | 362 ++++++++++++++++++++++++++-------
 rtl/modexpng_mmm_fsm.vh          |  11 +-
 rtl/modexpng_parameters.vh       |  26 +--
 rtl/modexpng_part_recombinator.v | 425 ++++++++++++++++++++++++++++++++-------
 4 files changed, 668 insertions(+), 156 deletions(-)

diff --git a/bench/tb_square.v b/bench/tb_square.v
index 23831db..d35a5cc 100644
--- a/bench/tb_square.v
+++ b/bench/tb_square.v
@@ -41,6 +41,8 @@ module tb_square;
     reg [17:0] AB[0:63];
     reg [17:0] N_COEFF[0:32];
     reg [17:0] Q[0:32];
+    reg [17:0] N[0:31];
+    reg [17:0] M[0:64];
 
 
     //
@@ -103,6 +105,33 @@ module tb_square;
         Q[28] = 18'h0bf39; Q[29] = 18'h0929d; Q[30] = 18'h05273; Q[31] = 18'h0c30a;
         Q[32] = 18'h0eef3;
         //
+        N[ 0] = 18'h03ad9; N[ 1] = 18'h046b4; N[ 2] = 18'h0e181; N[ 3] = 18'h0fac7;
+        N[ 4] = 18'h0be72; N[ 5] = 18'h029ab; N[ 6] = 18'h07e51; N[ 7] = 18'h037a8;
+        N[ 8] = 18'h0880c; N[ 9] = 18'h05a7d; N[10] = 18'h043c2; N[11] = 18'h038c9;
+        N[12] = 18'h01275; N[13] = 18'h0aa0d; N[14] = 18'h0c0c1; N[15] = 18'h0d035;
+        N[16] = 18'h04082; N[17] = 18'h0543c; N[18] = 18'h0dcb0; N[19] = 18'h0497c;
+        N[20] = 18'h0b12c; N[21] = 18'h013d4; N[22] = 18'h0b80a; N[23] = 18'h051cf;
+        N[24] = 18'h0286c; N[25] = 18'h0b600; N[26] = 18'h0d838; N[27] = 18'h0af4b;
+        N[28] = 18'h08274; N[29] = 18'h06a07; N[30] = 18'h0beea; N[31] = 18'h0f000;
+        //
+        M[ 0] = 18'h041b2; M[ 1] = 18'h00128; M[ 2] = 18'h06b69; M[ 3] = 18'h08e7e;
+        M[ 4] = 18'h0118c; M[ 5] = 18'h0b96d; M[ 6] = 18'h0ebe5; M[ 7] = 18'h0f873;
+        M[ 8] = 18'h0cf14; M[ 9] = 18'h0de83; M[10] = 18'h09690; M[11] = 18'h05e9a;
+        M[12] = 18'h048ac; M[13] = 18'h0b506; M[14] = 18'h01283; M[15] = 18'h08631;
+        M[16] = 18'h0179c; M[17] = 18'h06820; M[18] = 18'h0867b; M[19] = 18'h0b750;
+        M[20] = 18'h0e680; M[21] = 18'h0df95; M[22] = 18'h0d818; M[23] = 18'h0b4c5;
+        M[24] = 18'h0cced; M[25] = 18'h0c4a9; M[26] = 18'h0bb78; M[27] = 18'h04295;
+        M[28] = 18'h0b1b4; M[29] = 18'h09635; M[30] = 18'h0066b; M[31] = 18'h022b1;
+        M[32] = 18'h04fdb; M[33] = 18'h0efc8; M[34] = 18'h00a14; M[35] = 18'h04bef;
+        M[36] = 18'h006a1; M[37] = 18'h0f1a6; M[38] = 18'h0fc40; M[39] = 18'h0adb5;
+        M[40] = 18'h06e8f; M[41] = 18'h02c60; M[42] = 18'h083e1; M[43] = 18'h0f862;
+        M[44] = 18'h0da61; M[45] = 18'h0dd3d; M[46] = 18'h03381; M[47] = 18'h09db0;
+        M[48] = 18'h05454; M[49] = 18'h07525; M[50] = 18'h0d9c7; M[51] = 18'h0a361;
+        M[52] = 18'h049e0; M[53] = 18'h0a671; M[54] = 18'h0242e; M[55] = 18'h07cb2;
+        M[56] = 18'h02021; M[57] = 18'h0bde1; M[58] = 18'h025aa; M[59] = 18'h0c615;
+        M[60] = 18'h05645; M[61] = 18'h03b46; M[62] = 18'h065d6; M[63] = 18'h0390d;
+        M[64] = 18'h0e005;
+        //  
     end
     
 
@@ -134,6 +163,12 @@ module tb_square;
     reg [ 7:0] tb_slim_bram_xy_addr;
     reg [17:0] tb_slim_bram_x_din;
     reg [17:0] tb_slim_bram_y_din;
+    
+    reg        mgr_slim_bram_xy_ena = 1'b0;
+    reg [ 1:0] mgr_slim_bram_xy_bank;
+    reg [ 7:0] mgr_slim_bram_xy_addr;
+    reg [17:0] mgr_slim_bram_x_din;
+    reg [17:0] mgr_slim_bram_y_din;
 
     reg         mac_slim_bram_xy_ena = 1'b0;
     reg         mac_slim_bram_xy_reg_ena = 1'b0;
@@ -195,10 +230,10 @@ module tb_square;
     ip_bram_18k slim_bram_x
     (
         .clka   (clk),
-        .ena    (tb_slim_bram_xy_ena),
-        .wea    (tb_slim_bram_xy_ena),
-        .addra  ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
-        .dina   (tb_slim_bram_x_din),
+        .ena    (mgr_slim_bram_xy_ena),
+        .wea    (mgr_slim_bram_xy_ena),
+        .addra  ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+        .dina   (mgr_slim_bram_x_din),
     
         .clkb   (clk),
         .enb    (mac_slim_bram_xy_ena),
@@ -210,10 +245,10 @@ module tb_square;
     ip_bram_18k slim_bram_y
     (
         .clka   (clk),
-        .ena    (tb_slim_bram_xy_ena),
-        .wea    (tb_slim_bram_xy_ena),
-        .addra  ({tb_slim_bram_xy_bank, tb_slim_bram_xy_addr}),
-        .dina   (tb_slim_bram_y_din),
+        .ena    (mgr_slim_bram_xy_ena),
+        .wea    (mgr_slim_bram_xy_ena),
+        .addra  ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
+        .dina   (mgr_slim_bram_y_din),
     
         .clkb   (clk),
         .enb    (mac_slim_bram_xy_ena),
@@ -266,14 +301,23 @@ module tb_square;
             wait_clock_tick;
         end
         for (i=32; i<33; i=i+1) begin
-            tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF_EXT;
-            tb_slim_bram_xy_addr = 0;
+            tb_slim_bram_xy_bank = BANK_SLIM_EXT;
+            tb_slim_bram_xy_addr = 0;   // !
             tb_slim_bram_x_din = N_COEFF[i];
             tb_slim_bram_y_din = N_COEFF[i];
             
             wait_clock_tick;
         end
 
+        for (i=0; i<32; i=i+1) begin
+            tb_fat_bram_xy_bank = BANK_FAT_N;
+            tb_fat_bram_xy_addr = i[7:0];
+            tb_fat_bram_x_din = N[i];
+            tb_fat_bram_y_din = N[i];
+            
+            wait_clock_tick;
+        end
+
         tb_fat_bram_xy_ena = 1'b0;        
         tb_slim_bram_xy_ena = 1'b0;
         
@@ -299,6 +343,7 @@ module tb_square;
             
         verify_ab;
         verify_q;
+        verify_m;
 
     end
 
@@ -418,25 +463,23 @@ module tb_square;
 
     wire mult_square_addr_almost_done_comb;
     reg  mult_square_addr_almost_done_flop;
-        
-    //wire mult_square_addr_surely_done_comb;
     reg  mult_square_addr_surely_done_flop; 
 
-    reg  mult_triangle_addr_almost_done_comb;
-    reg  mult_triangle_addr_almost_done_flop;
-        
-    //wire mult_triangle_addr_surely_done_comb;
+    wire  mult_triangle_addr_almost_done_comb;
+    reg  mult_triangle_addr_almost_done_flop;        
     reg  mult_triangle_addr_surely_done_flop;
     reg  mult_triangle_addr_tardy_done_flop;
+
+    wire  mult_rectangle_addr_almost_done_comb;
+    reg  mult_rectangle_addr_almost_done_flop;        
+    reg  mult_rectangle_addr_surely_done_flop;
+    reg  mult_rectangle_addr_tardy_done_flop;
+
     
     assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
+    assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
+    assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
 
-    always @*
-        //
-        //if (!col_is_last)
-            mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
-        //else
-            //mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
             
     
     
@@ -482,6 +525,29 @@ module tb_square;
         //
     end
         
+        
+     always @(posedge clk)
+        //
+        case (fsm_state)
+        
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
+                mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb;
+                //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 
+                //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
+                
+            default:
+                mult_rectangle_addr_almost_done_flop <= 1'b0;
+                //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
+            
+        endcase
+
+    always @(posedge clk) begin
+        //
+        mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop;
+        mult_rectangle_addr_tardy_done_flop  <= mult_rectangle_addr_surely_done_flop;
+        //
+    end
 
 
     //
@@ -489,6 +555,7 @@ module tb_square;
     //
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
+    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
     
     
     always @(posedge clk)
@@ -510,6 +577,14 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
                 8'd0 :  mac_slim_bram_xy_addr + 1'b1;
             //
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+                8'd1 :  mac_slim_bram_xy_addr + 1'b1;            
+            //
             default:                            mac_slim_bram_xy_addr <= 8'dX;
         endcase
 
@@ -543,7 +618,14 @@ module tb_square;
                 FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
                 FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
                 FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);                
+                FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
+                //
+                FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
+                FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
+                FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+                FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+                FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+                FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
                 //
                 default:                            mac_fat_bram_xy_addr[j] <= 8'dX;
             endcase
@@ -564,7 +646,14 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);                
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
+            //
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT:   mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
             //
             default:                            mac_fat_bram_xy_addr[4] <= 8'dX;
         endcase
@@ -574,19 +663,30 @@ module tb_square;
     always @(posedge clk)
         //
         case (fsm_state_next)
+            //
             FSM_STATE_MULT_SQUARE_COL_0_INIT,
             FSM_STATE_MULT_SQUARE_COL_N_INIT,
             FSM_STATE_MULT_SQUARE_COL_0_TRIG,
             FSM_STATE_MULT_SQUARE_COL_N_TRIG,
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:   mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
+            //
             FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
             FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
             FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
-                BANK_SLIM_N_COEFF_EXT : BANK_SLIM_N_COEFF;  
+                BANK_SLIM_EXT : BANK_SLIM_N_COEFF;
+            //
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
+                BANK_SLIM_EXT : BANK_SLIM_Q;            
+            //
             default:                            mac_slim_bram_xy_bank <= 2'bXX;
         endcase
 
@@ -605,6 +705,12 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {BANK_FAT_ABH, BANK_FAT_ABL};
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_ABL}};
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,    
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{BANK_FAT_N}};            
             default:                            {mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_bank} <= {2{3'bXXX}};
         endcase
 
@@ -625,6 +731,12 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:   mac_slim_bram_xy_ena <= 1'b1;
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop; 
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:   mac_slim_bram_xy_ena <= 1'b1;
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop;
             default:                              mac_slim_bram_xy_ena <= 1'b0;
         endcase
 
@@ -642,7 +754,13 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
-            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:   mac_fat_bram_xy_ena <= 1'b1;
+            FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:   mac_fat_bram_xy_ena <= 1'b1;
             default:                            mac_fat_bram_xy_ena <= 1'b0;
         endcase
 
@@ -654,12 +772,30 @@ module tb_square;
     always @(posedge clk)
         //
         mac_fat_bram_xy_reg_ena <= mac_fat_bram_xy_ena;
-            
+          
+    reg ladder_mode = 1'b0; // 0 = X:T1*T2, Y:T2*T2
+                            // 1 = X:T1*T2, Y:T2*T1
+          
 
+    reg dsp_swap_xy;
+    
+    always @(posedge clk)
+        //
+        case (fsm_state)
+            FSM_STATE_MULT_SQUARE_COL_0_TRIG:   dsp_swap_xy <= 1'b1;
+            FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0;
+        endcase
+  
     always @(posedge clk)
         //
-        if (mac_slim_bram_xy_reg_ena_dly)
-            {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+        if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite
+            if (!dsp_swap_xy)
+                {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+            else begin
+                if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
+                else              {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
+            end
+        end
         else
             {dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
 
@@ -711,7 +847,8 @@ module tb_square;
         case (fsm_state_next)
             //
             FSM_STATE_MULT_SQUARE_COL_0_INIT,
-            FSM_STATE_MULT_TRIANGLE_COL_0_INIT: begin
+            FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
                 col_index       <= 5'd0;
                 col_index_last  <= index_last[7:3];
                 col_index_next1 <= 5'd1;
@@ -721,7 +858,8 @@ module tb_square;
             end
             //
             FSM_STATE_MULT_SQUARE_COL_N_INIT,
-            FSM_STATE_MULT_TRIANGLE_COL_N_INIT: begin
+            FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
                 col_index <= col_index_next1;
                 col_is_last <= col_index_next1 == col_index_last;
                 col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;   
@@ -730,8 +868,9 @@ module tb_square;
             //
         endcase
     
-    assign  fsm_state_after_mult_square   = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
-    assign  fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+    assign fsm_state_after_mult_square    = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
+    assign fsm_state_after_mult_triangle  = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
+    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
     
     always @(posedge clk)
         //
@@ -741,9 +880,13 @@ module tb_square;
             FSM_STATE_MULT_SQUARE_COL_0_BUSY,
             FSM_STATE_MULT_SQUARE_COL_N_BUSY:       dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
             FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
-            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};
+            FSM_STATE_MULT_TRIANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
             FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= {9{1'b1}};
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG:     dsp_xy_mode_z_adv4 <= {9{1'b0}};    // so easy
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:     dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly);
             default:                                dsp_xy_mode_z_adv4 <= {9{1'b1}};
         endcase
 
@@ -774,26 +917,26 @@ module tb_square;
                 calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
         end
     endfunction
-    /*
-    function  [NUM_MULTS:0] calc_mac_mode_z_triangle;
+    
+    function  [NUM_MULTS:0] calc_mac_mode_z_rectangle;
         input [        4:0] col_index_value;
         input [        7:0] mac_slim_bram_xy_addr_value;
         begin
             if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
                 case (mac_slim_bram_xy_addr_value[2:0])
-                    3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
-                    3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
-                    3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
-                    3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
-                    3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
-                    3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
-                    3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
-                    3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
+                    3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
+                    3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
+                    3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
+                    3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
+                    3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
+                    3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
+                    3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
+                    3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
                 endcase
             else
-                calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
+                calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
         end
-    endfunction*/
+    endfunction
 
     reg recomb_x_ena = 1'b0;
     reg recomb_y_ena = 1'b0;
@@ -810,44 +953,66 @@ module tb_square;
     wire [17:0] recomb_fat_bram_x_dout;
     wire [17:0] recomb_fat_bram_y_dout;
     wire        recomb_fat_bram_xy_dout_valid;
+    wire [ 2:0] recomb_slim_bram_xy_bank;
+    wire [ 7:0] recomb_slim_bram_xy_addr;
+    wire [17:0] recomb_slim_bram_x_dout;
+    wire [17:0] recomb_slim_bram_y_dout;
+    wire        recomb_slim_bram_xy_dout_valid;
     wire        recomb_rdy;
     
     modexpng_part_recombinator recomb
     (
-        .clk                        (clk),
-        .rdy                        (recomb_rdy),
-        .fsm_state_next             (fsm_state_next),
-        .index_last                 (index_last),
-        .dsp_x_ce_p                 (dsp_x_ce_p),
-        .dsp_y_ce_p                 (dsp_y_ce_p),
-        .ena_x                      (recomb_x_ena),
-        .ena_y                      (recomb_y_ena),
-        .dsp_x_p                    (dsp_x_p),
-        .dsp_y_p                    (dsp_y_p),
-        .col_index                  (col_index),
-        .col_index_last             (col_index_last),
-        .slim_bram_xy_addr          (mac_slim_bram_xy_addr),
-        .slim_bram_xy_bank          (mac_slim_bram_xy_bank),
-        .fat_bram_xy_bank           (recomb_fat_bram_xy_bank),
-        .fat_bram_xy_addr           (recomb_fat_bram_xy_addr),
-        .fat_bram_x_dout            (recomb_fat_bram_x_dout),
-        .fat_bram_y_dout            (recomb_fat_bram_y_dout),
-        .fat_bram_xy_dout_valid     (recomb_fat_bram_xy_dout_valid)
+        .clk                            (clk),
+        .rdy                            (recomb_rdy),
+        .fsm_state_next                 (fsm_state_next),
+        .index_last                     (index_last),
+        .dsp_x_ce_p                     (dsp_x_ce_p),
+        .dsp_y_ce_p                     (dsp_y_ce_p),
+        .ena_x                          (recomb_x_ena),
+        .ena_y                          (recomb_y_ena),
+        .dsp_x_p                        (dsp_x_p),
+        .dsp_y_p                        (dsp_y_p),
+        .col_index                      (col_index),
+        .col_index_last                 (col_index_last),
+        .slim_bram_xy_addr              (mac_slim_bram_xy_addr),
+        .slim_bram_xy_bank              (mac_slim_bram_xy_bank),
+        .rcmb_fat_bram_xy_bank          (recomb_fat_bram_xy_bank),
+        .rcmb_fat_bram_xy_addr          (recomb_fat_bram_xy_addr),
+        .rcmb_fat_bram_x_dout           (recomb_fat_bram_x_dout),
+        .rcmb_fat_bram_y_dout           (recomb_fat_bram_y_dout),
+        .rcmb_fat_bram_xy_dout_valid    (recomb_fat_bram_xy_dout_valid),
+        .rcmb_slim_bram_xy_bank         (recomb_slim_bram_xy_bank),
+        .rcmb_slim_bram_xy_addr         (recomb_slim_bram_xy_addr),
+        .rcmb_slim_bram_x_dout          (recomb_slim_bram_x_dout),
+        .rcmb_slim_bram_y_dout          (recomb_slim_bram_y_dout),
+        .rcmb_slim_bram_xy_dout_valid   (recomb_slim_bram_xy_dout_valid)
     );
     
     reg [17:0] AB_READ[0:63];
     reg [17:0] Q_READ[0:32];
+    reg [17:0] M_READ[0:64];
     
-    always @(posedge clk)
+    always @(posedge clk) begin
         //
         if (recomb_fat_bram_xy_dout_valid)
             //
             case (recomb_fat_bram_xy_bank)
-                3'd1: AB_READ[     (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
-                3'd2: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
-                3'd3: Q_READ [     (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
-                3'd4: Q_READ [32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+                BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                BANK_FAT_ML:  M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
+                BANK_FAT_MH:  M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
+                BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
             endcase
+            //
+        if (recomb_slim_bram_xy_dout_valid)
+            //
+            case (recomb_slim_bram_xy_bank)
+                BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout;
+                BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1)
+                             Q_READ[32] <= recomb_slim_bram_x_dout;
+            endcase
+            //
+    end
             
 
     always @(posedge clk)
@@ -873,6 +1038,29 @@ module tb_square;
         end
 
 
+    always @(posedge clk)
+        //
+        if (tb_slim_bram_xy_ena) begin
+            mgr_slim_bram_xy_ena  <= 1'b1;
+            mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank;
+            mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr;
+            mgr_slim_bram_x_din   <= tb_slim_bram_x_din;
+            mgr_slim_bram_y_din   <= tb_slim_bram_y_din;
+        end else if (recomb_slim_bram_xy_dout_valid) begin
+            mgr_slim_bram_xy_ena  <= 1'b1;
+            mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank;
+            mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr;
+            mgr_slim_bram_x_din   <= recomb_slim_bram_x_dout;
+            mgr_slim_bram_y_din   <= recomb_slim_bram_y_dout;
+        end else begin
+            mgr_slim_bram_xy_ena  <= 1'b0;
+            mgr_slim_bram_xy_bank <= 3'bXXX;
+            mgr_slim_bram_xy_addr <= 8'hXX;
+            mgr_slim_bram_x_din   <= {18{1'bX}};
+            mgr_slim_bram_y_din   <= {18{1'bX}};
+        end
+
+
     task verify_ab;
         reg verify_ab_ok;
         begin
@@ -911,9 +1099,29 @@ module tb_square;
     endtask
 
 
+    task verify_m;
+        reg verify_m_ok;
+        begin
+            verify_m_ok = 1;
+            for (i=0; i<65; i=i+1)
+                if (M_READ[i] === M[i])
+                    $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]);
+                else begin
+                    $display("M / M_READ [%02d] = 0x%05x / 0x%05x <???>", i, M[i], M_READ[i]);
+                    verify_m_ok = 0;
+                end
+            if (verify_m_ok)
+                $display("M is OK.");
+            else
+                $display("M is WRONG!");
+        end
+    endtask
+
+
     wire mult_square_addr_done = mult_square_addr_surely_done_flop;
-    
     wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
+    wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop;
+    
 
     always @* begin
         //
@@ -940,7 +1148,17 @@ module tb_square;
             FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
             FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
             
-            FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_HOLDOFF;//recomb_rdy ? FSM_STATE_IDLE : FSM_STATE_MULT_SQUARE_HOLDOFF;
+            FSM_STATE_MULT_TRIANGLE_HOLDOFF:    fsm_state_next =                         recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
+
+            FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;     
+            
+            FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
+            
+            FSM_STATE_MULT_RECTANGLE_HOLDOFF:    fsm_state_next =                         recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
             
             default:                          fsm_state_next =                         FSM_STATE_IDLE                   ;
 
diff --git a/rtl/modexpng_mmm_fsm.vh b/rtl/modexpng_mmm_fsm.vh
index 2700a42..3bdae66 100644
--- a/rtl/modexpng_mmm_fsm.vh
+++ b/rtl/modexpng_mmm_fsm.vh
@@ -30,5 +30,14 @@ localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_COL_N_BUSY = 26;
     
 localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_TRIANGLE_HOLDOFF = 27;
 
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_INIT = 31;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_TRIG = 32;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_0_BUSY = 33;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_INIT = 34;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_TRIG = 35;
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_COL_N_BUSY = 36;
+
+localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_MULT_RECTANGLE_HOLDOFF = 37;
+
 localparam [FSM_STATE_WIDTH-1:0] FSM_STATE_STOP = 999;
-    
\ No newline at end of file
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index f846119..57eef35 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -3,19 +3,19 @@
 
 //localparam BANK_ADDR_WIDTH = 3; // TODO: Replace everywhere!
 
-localparam [2:0] BANK_FAT_T1T2   = 3'd0;
-localparam [2:0] BANK_FAT_ABL    = 3'd1;
-localparam [2:0] BANK_FAT_ABH    = 3'd2;
-localparam [2:0] BANK_FAT_Q      = 3'd3;
-localparam [2:0] BANK_FAT_Q_EXT  = 3'd4;
-localparam [2:0] BANK_FAT_ML     = 3'd5;
-localparam [2:0] BANK_FAT_MH     = 3'd6;
-localparam [2:0] BANK_FAT_MH_EXT = 3'd7;
-
-localparam [1:0] BANK_SLIM_T1T2        = 2'd0;
-localparam [1:0] BANK_SLIM_N           = 2'd1;
-localparam [1:0] BANK_SLIM_N_COEFF     = 2'd2;
-localparam [1:0] BANK_SLIM_N_COEFF_EXT = 2'd3;
+localparam [2:0] BANK_FAT_T1T2      = 3'd0;
+localparam [2:0] BANK_FAT_ABL       = 3'd1;
+localparam [2:0] BANK_FAT_ABH       = 3'd2;
+localparam [2:0] BANK_FAT_N         = 3'd3;
+localparam [2:0] BANK_FAT_ML        = 3'd4;
+localparam [2:0] BANK_FAT_MH        = 3'd5;
+localparam [2:0] BANK_FAT_EXT       = 3'd6; // 0 -> MH'
+localparam [2:0] BANK_FAT_UNUSED    = 3'd7;
+
+localparam [1:0] BANK_SLIM_T1T2     = 2'd0;
+localparam [1:0] BANK_SLIM_N_COEFF  = 2'd1;
+localparam [1:0] BANK_SLIM_Q        = 2'd2;
+localparam [1:0] BANK_SLIM_EXT      = 2'd3; // 0 -> N_COEFF', 1 -> Q'
 
 
 //localparam BANK_Y_T2      = 3'd0;
diff --git a/rtl/modexpng_part_recombinator.v b/rtl/modexpng_part_recombinator.v
index c51e7ef..567ecd5 100644
--- a/rtl/modexpng_part_recombinator.v
+++ b/rtl/modexpng_part_recombinator.v
@@ -9,7 +9,8 @@ module modexpng_part_recombinator
     dsp_x_p, dsp_y_p,
     col_index, col_index_last,
     slim_bram_xy_addr, slim_bram_xy_bank,
-    fat_bram_xy_bank, fat_bram_xy_addr, fat_bram_x_dout, fat_bram_y_dout, fat_bram_xy_dout_valid
+    rcmb_fat_bram_xy_bank,  rcmb_fat_bram_xy_addr,  rcmb_fat_bram_x_dout,  rcmb_fat_bram_y_dout,  rcmb_fat_bram_xy_dout_valid,
+    rcmb_slim_bram_xy_bank, rcmb_slim_bram_xy_addr, rcmb_slim_bram_x_dout, rcmb_slim_bram_y_dout, rcmb_slim_bram_xy_dout_valid
 );
 
 
@@ -36,11 +37,17 @@ module modexpng_part_recombinator
     input  [     7:0] slim_bram_xy_addr;
     input  [     1:0] slim_bram_xy_bank;
 
-    output [     2:0] fat_bram_xy_bank;
-    output [     7:0] fat_bram_xy_addr;
-    output [    17:0] fat_bram_x_dout;
-    output [    17:0] fat_bram_y_dout;
-    output            fat_bram_xy_dout_valid;
+    output [     2:0] rcmb_fat_bram_xy_bank;
+    output [     7:0] rcmb_fat_bram_xy_addr;
+    output [    17:0] rcmb_fat_bram_x_dout;
+    output [    17:0] rcmb_fat_bram_y_dout;
+    output            rcmb_fat_bram_xy_dout_valid;
+
+    output [     2:0] rcmb_slim_bram_xy_bank;
+    output [     7:0] rcmb_slim_bram_xy_addr;
+    output [    17:0] rcmb_slim_bram_x_dout;
+    output [    17:0] rcmb_slim_bram_y_dout;
+    output            rcmb_slim_bram_xy_dout_valid;
 
 
     //
@@ -148,10 +155,10 @@ module modexpng_part_recombinator
        if (ena_x && ena_y)
            //
            case (fsm_state_next)
-               FSM_STATE_MULT_SQUARE_COL_0_BUSY:       rcmb_mode <= 2'd1;
-               FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:     rcmb_mode <= 2'd2;
-               //FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:    rcmb_mode <= 2'd3;
-               default:                                rcmb_mode <= 2'd0;
+               FSM_STATE_MULT_SQUARE_COL_0_BUSY:        rcmb_mode <= 2'd1;
+               FSM_STATE_MULT_TRIANGLE_COL_0_BUSY:      rcmb_mode <= 2'd2;
+               FSM_STATE_MULT_RECTANGLE_COL_0_BUSY:     rcmb_mode <= 2'd3;
+               default:                                 rcmb_mode <= 2'd0;
            endcase
 
                
@@ -203,7 +210,7 @@ module modexpng_part_recombinator
         input [1:0] slim_bram_xy_bank_value;
         begin
             //
-            if (slim_bram_xy_bank_value == BANK_SLIM_N_COEFF_EXT)
+            if (slim_bram_xy_bank_value == BANK_SLIM_EXT)
                 calc_triangle_aux_lsb = 1'b1;
             else
                 calc_triangle_aux_lsb = 1'b0;
@@ -216,6 +223,21 @@ module modexpng_part_recombinator
         end
     endfunction
     
+    function        calc_rectangle_valid_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        begin
+            //
+            if (slim_bram_xy_addr_value[7:3] == col_index_value) 
+                calc_rectangle_valid_lsb = slim_bram_xy_bank_value != BANK_SLIM_EXT;
+            else
+                calc_rectangle_valid_lsb = 1'b0;
+            //
+        end
+    endfunction
+    
     function  [7:0] calc_square_bitmap_lsb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
@@ -265,6 +287,32 @@ module modexpng_part_recombinator
                //
            end
        endfunction
+
+    function  [7:0] calc_rectangle_bitmap_lsb;
+           input [4:0] col_index_value;
+           input [4:0] col_index_last_value;
+           input [7:0] slim_bram_xy_addr_value;
+           input [1:0] slim_bram_xy_bank_value;
+           begin
+               //
+               if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+                   //
+                   case (slim_bram_xy_addr_value[2:0])
+                       3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
+                       3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
+                       3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
+                       3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
+                       3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
+                       3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
+                       3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
+                       3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
+                   endcase
+                   //
+               else
+                   calc_rectangle_bitmap_lsb = {8{1'b0}};
+               //
+           end
+       endfunction
        
     function  [2:0] calc_square_index_lsb;
         input [4:0] col_index_value;
@@ -315,6 +363,32 @@ module modexpng_part_recombinator
             //
         end
     endfunction
+
+    function  [2:0] calc_rectangle_index_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        begin
+            //
+            if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_SLIM_EXT))
+                //
+                case (slim_bram_xy_addr_value[2:0])
+                    3'b000: calc_rectangle_index_lsb = 3'd0;
+                    3'b001: calc_rectangle_index_lsb = 3'd1;
+                    3'b010: calc_rectangle_index_lsb = 3'd2;
+                    3'b011: calc_rectangle_index_lsb = 3'd3;
+                    3'b100: calc_rectangle_index_lsb = 3'd4;
+                    3'b101: calc_rectangle_index_lsb = 3'd5;
+                    3'b110: calc_rectangle_index_lsb = 3'd6;
+                    3'b111: calc_rectangle_index_lsb = 3'd7;
+                endcase
+                //
+            else
+                calc_rectangle_index_lsb = 3'dX;
+            //
+        end
+    endfunction
     
     function        calc_square_purge_lsb;
         input [4:0] col_index_value;
@@ -330,6 +404,20 @@ module modexpng_part_recombinator
         end
     endfunction
 
+    function        calc_rectangle_purge_lsb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        begin
+            //
+            if (slim_bram_xy_addr_value[7:3] == col_index_value)
+                calc_rectangle_purge_lsb = slim_bram_xy_addr_value[7:3] == col_index_last_value;
+            else
+                calc_rectangle_purge_lsb = 1'b0;
+            //
+        end
+    endfunction
+
     function        calc_square_valid_msb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
@@ -344,6 +432,22 @@ module modexpng_part_recombinator
             //
         end
     endfunction
+
+    function        calc_rectangle_valid_msb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        input [7:0] index_last_value;
+        begin
+            //
+            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+                calc_rectangle_valid_msb = 1'b1;
+            else
+                calc_rectangle_valid_msb = 1'b0;
+            //
+        end
+    endfunction
     
     function  [7:0] calc_square_bitmap_msb;
         input [4:0] col_index_value;
@@ -361,6 +465,22 @@ module modexpng_part_recombinator
         end
     endfunction
 
+    function  [7:0] calc_rectangle_bitmap_msb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        input [7:0] index_last_value;
+        begin
+            //
+            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT)) begin
+                calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
+            end else
+                calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
+            //
+        end
+    endfunction
+
     function        calc_square_purge_msb;
         input [4:0] col_index_value;
         input [4:0] col_index_last_value;
@@ -376,6 +496,22 @@ module modexpng_part_recombinator
         end
     endfunction
 
+    function        calc_rectangle_purge_msb;
+        input [4:0] col_index_value;
+        input [4:0] col_index_last_value;
+        input [7:0] slim_bram_xy_addr_value;
+        input [1:0] slim_bram_xy_bank_value;
+        input [7:0] index_last_value;
+        begin
+            //
+            if ((slim_bram_xy_addr_value == 8'd1) && (slim_bram_xy_bank_value == BANK_SLIM_EXT))
+                calc_rectangle_purge_msb = col_index_value == col_index_last_value;
+            else
+                calc_rectangle_purge_msb = 1'b0;
+            //
+        end
+    endfunction
+
     
     reg         recomb_lsb_ce = 1'b0;
     reg         recomb_lsb_ce_aux;
@@ -494,7 +630,24 @@ module modexpng_part_recombinator
                 xy_bitmap_msb_adv[6] <= {8{1'b0}};
                 xy_purge_msb_adv [6] <= 1'b0;
                 //
-            end            
+            end
+            //
+            FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
+            FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
+            FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
+                //
+                xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+                xy_aux_lsb_adv   [6] <= 1'b0;
+                xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+                xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank);
+                xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, slim_bram_xy_addr);
+                //
+                xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+                xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+                xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, slim_bram_xy_addr, slim_bram_xy_bank, index_last);
+                //
+            end
             //
             default: begin
                 //
@@ -586,15 +739,24 @@ module modexpng_part_recombinator
     end
         
 
-
     reg [ 2:0] fat_bram_xy_bank_reg;
     reg [ 7:0] fat_bram_xy_addr_reg;
-    reg [ 7:0] fat_bram_xy_cnt_lsb;
-    reg [ 7:0] fat_bram_xy_cnt_msb;
     reg [17:0] fat_bram_x_dout_reg;
     reg [17:0] fat_bram_y_dout_reg;
     reg        fat_bram_xy_dout_valid_reg = 1'b0;
 
+    reg [ 2:0] slim_bram_xy_bank_reg;
+    reg [ 7:0] slim_bram_xy_addr_reg;
+    reg [17:0] slim_bram_x_dout_reg;
+    reg [17:0] slim_bram_y_dout_reg;
+    reg        slim_bram_xy_dout_valid_reg = 1'b0;
+
+    reg [ 7:0] bram_xy_cnt_lsb;
+    reg [ 7:0] bram_xy_cnt_msb;
+    
+    reg        bram_xy_cnt_lsb_wrapped;
+    reg        bram_xy_cnt_msb_wrapped;
+
     reg [15:0] recomb_msb_dout_carry_0;
     reg [15:0] recomb_msb_dout_carry_1;
     
@@ -606,11 +768,21 @@ module modexpng_part_recombinator
     reg [ 7:0] recomb_msb_cnt_delay_1 = 8'd0;
     reg [ 7:0] recomb_msb_cnt_delay_2 = 8'd0;
 
-    assign fat_bram_xy_bank       = fat_bram_xy_bank_reg;
-    assign fat_bram_xy_addr       = fat_bram_xy_addr_reg;
-    assign fat_bram_x_dout        = fat_bram_x_dout_reg;
-    assign fat_bram_y_dout        = fat_bram_y_dout_reg;
-    assign fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+    reg        recomb_msb_flag_delay_0;
+    reg        recomb_msb_flag_delay_1;
+    reg        recomb_msb_flag_delay_2;
+
+    assign rcmb_fat_bram_xy_bank       = fat_bram_xy_bank_reg;
+    assign rcmb_fat_bram_xy_addr       = fat_bram_xy_addr_reg;
+    assign rcmb_fat_bram_x_dout        = fat_bram_x_dout_reg;
+    assign rcmb_fat_bram_y_dout        = fat_bram_y_dout_reg;
+    assign rcmb_fat_bram_xy_dout_valid = fat_bram_xy_dout_valid_reg;
+
+    assign rcmb_slim_bram_xy_bank       = slim_bram_xy_bank_reg;
+    assign rcmb_slim_bram_xy_addr       = slim_bram_xy_addr_reg;
+    assign rcmb_slim_bram_x_dout        = slim_bram_x_dout_reg;
+    assign rcmb_slim_bram_y_dout        = slim_bram_y_dout_reg;
+    assign rcmb_slim_bram_xy_dout_valid = slim_bram_xy_dout_valid_reg;
     
     reg rdy_reg = 1'b1;
     reg rdy_adv = 1'b1;
@@ -629,7 +801,9 @@ module modexpng_part_recombinator
     task advance_recomb_msb_dout_delay;
         input [15:0] dout;
         input [ 7:0] cnt;
+        input        flag;
         begin
+            //
             recomb_msb_dout_delay_0 <= dout;
             recomb_msb_dout_delay_1 <= recomb_msb_dout_delay_0;
             recomb_msb_dout_delay_2 <= recomb_msb_dout_delay_1;
@@ -637,6 +811,11 @@ module modexpng_part_recombinator
             recomb_msb_cnt_delay_0 <= cnt;
             recomb_msb_cnt_delay_1 <= recomb_msb_cnt_delay_0;
             recomb_msb_cnt_delay_2 <= recomb_msb_cnt_delay_1;
+            //
+            recomb_msb_flag_delay_0 <= flag;
+            recomb_msb_flag_delay_1 <= recomb_msb_flag_delay_0;
+            recomb_msb_flag_delay_2 <= recomb_msb_flag_delay_1;
+            //
         end
     endtask
          
@@ -659,10 +838,24 @@ module modexpng_part_recombinator
             fat_bram_xy_addr_reg       <= addr;
             fat_bram_x_dout_reg        <= dout_x;
             fat_bram_y_dout_reg        <= dout_y;
-            fat_bram_xy_dout_valid_reg <= 1'b1;
+            fat_bram_xy_dout_valid_reg <= valid;
         end
     endtask
     
+    task _update_slim_bram_regs;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
+        input        valid;
+        begin
+            slim_bram_xy_bank_reg       <= bank;
+            slim_bram_xy_addr_reg       <= addr;
+            slim_bram_x_dout_reg        <= dout_x;
+            slim_bram_y_dout_reg        <= dout_y;
+            slim_bram_xy_dout_valid_reg <= valid;
+        end
+    endtask
             
     task set_fat_bram_regs;
         input [ 2:0] bank;
@@ -674,44 +867,73 @@ module modexpng_part_recombinator
         end
     endtask
     
+    task set_slim_bram_regs;
+        input [ 2:0] bank;
+        input [ 7:0] addr;
+        input [17:0] dout_x;
+        input [17:0] dout_y;
+        begin
+            _update_slim_bram_regs(bank, addr, dout_x, dout_y, 1'b1);
+        end
+    endtask
+    
     task clear_fat_bram_regs;
         begin
             _update_fat_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
         end
     endtask
+
+    task clear_slim_bram_regs;
+        begin
+            _update_slim_bram_regs(3'bXXX, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
+        end
+    endtask
     
-    task _set_fat_bram_cnt_lsb;
+    task _set_bram_cnt_lsb;
         input [7:0] cnt;
+        input       wrapped;
         begin
-            fat_bram_xy_cnt_lsb <= cnt;
+            bram_xy_cnt_lsb <= cnt;
+            bram_xy_cnt_lsb_wrapped <= wrapped;
         end
     endtask
-    task _set_fat_bram_cnt_msb;
+    
+    task _set_bram_cnt_msb;
         input [7:0] cnt;
+        input       wrapped;
         begin
-            fat_bram_xy_cnt_msb <= cnt;
+            bram_xy_cnt_msb <= cnt;
+            bram_xy_cnt_msb_wrapped <= wrapped;
         end
     endtask    
 
-    task inc_fat_bram_cnt_lsb;
+    task inc_bram_cnt_lsb;
         begin
-            _set_fat_bram_cnt_lsb(fat_bram_xy_cnt_lsb + 1'b1);
+            if (bram_xy_cnt_lsb == index_last)
+                _set_bram_cnt_lsb(8'd0, 1'b1);
+            else
+                _set_bram_cnt_lsb(bram_xy_cnt_lsb + 1'b1, bram_xy_cnt_lsb_wrapped);
         end
     endtask
-    task inc_fat_bram_cnt_msb;
+    
+    task inc_bram_cnt_msb;
         begin
-            _set_fat_bram_cnt_msb(fat_bram_xy_cnt_msb + 1'b1);
+            if (bram_xy_cnt_msb == index_last)
+                _set_bram_cnt_msb(8'd0, 1'b1);
+            else
+                _set_bram_cnt_msb(bram_xy_cnt_msb + 1'b1, bram_xy_cnt_msb_wrapped);
         end
     endtask
     
-    task clr_fat_bram_cnt_lsb;
+    task clr_bram_cnt_lsb;
         begin
-            _set_fat_bram_cnt_lsb(8'd0);
+            _set_bram_cnt_lsb(8'd0, 1'b0);
         end
     endtask
-    task clr_fat_bram_cnt_msb;
+    
+    task clr_bram_cnt_msb;
         begin
-            _set_fat_bram_cnt_msb(8'd0);
+            _set_bram_cnt_msb(8'd0, 1'b0);
         end
     endtask
     
@@ -724,51 +946,53 @@ module modexpng_part_recombinator
     always @(posedge clk)
         //
         if (ena_x & ena_y) begin
-            clr_fat_bram_cnt_lsb();
-            clr_fat_bram_cnt_msb();
+            clr_bram_cnt_lsb();
+            clr_bram_cnt_msb();
         end else begin  // if not ready???
             //
             case (rcmb_mode)
                 2'd1: recombine_square();
                 2'd2: recombine_triangle();
+                2'd3: recombine_rectangle();
             endcase
             //
         end
            
     task recombine_square;
+        //
         begin
             //
             case (rcmb_xy_dout_valid)
                 //
-                2'b01: inc_fat_bram_cnt_lsb(); 
-                2'b10: inc_fat_bram_cnt_msb();
+                2'b01: inc_bram_cnt_lsb(); 
+                2'b10: inc_bram_cnt_msb();
                 2'b11: begin
-                    if (fat_bram_xy_cnt_lsb == index_last) clr_fat_bram_cnt_lsb();
-                    else                                   inc_fat_bram_cnt_lsb();
-                    inc_fat_bram_cnt_msb();
+                    inc_bram_cnt_lsb();
+                    inc_bram_cnt_msb();
                 end
                 //
             endcase            
             //
             case (rcmb_xy_dout_valid)
                 //
-                2'b00:  if (recomb_msb_cnt_delay_2 > 8'd0)      set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
-                        else                                    clear_fat_bram_regs();
-                2'b01:                                          set_fat_bram_regs(BANK_FAT_ABL,        fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
-                2'b10:  if (fat_bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
-                        else                                    set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});                        
-                2'b11:  if (fat_bram_xy_cnt_lsb < index_last)   set_fat_bram_regs(BANK_FAT_ABH, fat_bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
-                        else                                    set_fat_bram_regs(BANK_FAT_ABL, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                2'b00:  if (recomb_msb_flag_delay_2)  set_fat_bram_regs(BANK_FAT_ABH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+                        else                                clear_fat_bram_regs();
+                  2'b01:                                      set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
+                  2'b10:  if (bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
+                        else                                set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});                        
+                2'b11:  if (bram_xy_cnt_lsb_wrapped)   set_fat_bram_regs(BANK_FAT_ABH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
+                        else                                set_fat_bram_regs(BANK_FAT_ABL, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                default:    clear_fat_bram_regs();  // DEBUG!!!
                 //
             endcase            
             //
             case (rcmb_xy_dout_valid)
                 //
-                2'b00:  if (recomb_msb_cnt_delay_2 > 8'd0)      advance_recomb_msb_dout_delay(16'hXXXX, 8'd0);
-                2'b10:  if (fat_bram_xy_cnt_msb < 8'd2)         shift_recomb_msb_dout_carry(recomb_msb_dout);
-                //
-                2'b11:  begin                                   advance_recomb_msb_dout_delay(recomb_msb_dout, fat_bram_xy_cnt_msb);
-                        if (fat_bram_xy_cnt_lsb < index_last)   shift_recomb_msb_dout_carry({16{1'bX}});
+                2'b00:  if (recomb_msb_flag_delay_2)  advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+                2'b10:  if (bram_xy_cnt_msb < 8'd2)         shift_recomb_msb_dout_carry(recomb_msb_dout);
+//                //
+                2'b11:  begin                          advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+                        if (bram_xy_cnt_lsb_wrapped)   shift_recomb_msb_dout_carry({16{1'bX}});
                         end
                 //
             endcase
@@ -779,50 +1003,111 @@ module modexpng_part_recombinator
     
     
     task recombine_triangle;
+        //
         begin
             //
             case (rcmb_xy_dout_valid)
                 //
-                2'b01: begin                                  inc_fat_bram_cnt_lsb();
-                       if (fat_bram_xy_cnt_lsb == index_last) inc_fat_bram_cnt_msb();
-                end 
-                //
+                2'b01: inc_bram_cnt_lsb(); 
+               //
             endcase            
             //
             case (rcmb_xy_dout_valid)
                 //
-                2'b00:  clear_fat_bram_regs();
-                2'b01:  if (fat_bram_xy_cnt_msb == 8'd0) set_fat_bram_regs(BANK_FAT_Q,     fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
-                        else                             set_fat_bram_regs(BANK_FAT_Q_EXT, fat_bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                2'b00:  clear_slim_bram_regs();
+                2'b01:  if (!bram_xy_cnt_lsb_wrapped) set_slim_bram_regs(BANK_SLIM_Q,   bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
+                        else                         set_slim_bram_regs(BANK_SLIM_EXT, 8'd1, {2'b00, recomb_lsb_dout}, {18{1'bX}});
+                2'b10:  clear_slim_bram_regs();
+                2'b11:  clear_slim_bram_regs();
                 //
             endcase
             //        
         end
+        //
     endtask
 
+
+    task recombine_rectangle;
+        //
+        begin
+            //
+            case (rcmb_xy_dout_valid)
+                //
+                2'b01: inc_bram_cnt_lsb(); 
+                2'b10: inc_bram_cnt_msb();
+                2'b11: begin
+                    inc_bram_cnt_lsb();
+                    inc_bram_cnt_msb();
+                end
+                //
+            endcase
+//            //
+            case (rcmb_xy_dout_valid)
+//                //
+                2'b00:  if (recomb_msb_flag_delay_2)  set_fat_bram_regs(BANK_FAT_MH, recomb_msb_cnt_delay_2, {2'b00, recomb_msb_dout_delay_2}, {18{1'bX}});
+                        else                                clear_fat_bram_regs();
+                2'b01:                                      set_fat_bram_regs(BANK_FAT_ML, bram_xy_cnt_lsb, {2'b00, recomb_lsb_dout}, {18{1'bX}}); 
+                2'b10:  if (!bram_xy_cnt_msb_wrapped) begin 
+                            if (bram_xy_cnt_msb < 8'd2)         clear_fat_bram_regs();                        
+                            else                                set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_msb, {2'b00, recomb_msb_dout}, {18{1'bX}});
+                        end else
+                                                                set_fat_bram_regs(BANK_FAT_EXT, 8'd0, {2'b00, recomb_msb_dout}, {18{1'bX}});
+                            
+                2'b11:  set_fat_bram_regs(BANK_FAT_MH, bram_xy_cnt_lsb, {1'b0, {1'b0, recomb_lsb_dout} + {1'b0, recomb_msb_dout_carry_1}}, {18{1'bX}}); 
+//                //
+            endcase            
+//            //
+            case (rcmb_xy_dout_valid)
+//                //
+                2'b00:  if (recomb_msb_flag_delay_2)  advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+                2'b10:  begin 
+                            if ((bram_xy_cnt_msb < 8'd2) && !bram_xy_cnt_msb_wrapped)         shift_recomb_msb_dout_carry(recomb_msb_dout);
+                            if (bram_xy_cnt_msb_wrapped) advance_recomb_msb_dout_delay(16'hXXXX, 8'd0, 1'b0);
+                        end
+//                //
+                2'b11:  begin  advance_recomb_msb_dout_delay(recomb_msb_dout, bram_xy_cnt_msb, 1'b1);
+                                   shift_recomb_msb_dout_carry({16{1'bX}});
+                        end
+//                //
+            endcase
+            //
+        end
+        //
+    endtask
     
     
     always @(posedge clk)
         //
         if (ena_x & ena_y) begin
             rdy_adv <= 1'b0;
-        end else begin
+        end else if (!rdy_reg) begin
             //
-            case ({recomb_x_msb_dout_valid, recomb_x_lsb_dout_valid})
+            case (rcmb_mode)
+                //
+                2'd1:   case (rcmb_xy_dout_valid)
+                            //
+                            2'b00: begin
+                                //
+                                if (recomb_msb_flag_delay_2) begin
+                                    //
+                                    rdy_adv <= ~recomb_msb_flag_delay_1;
+                                    //
+                                end
+                                //
+                            end
+                        endcase
+                //
+                2'd2:   case (rcmb_xy_dout_valid)
+                            //
+                            2'b01: rdy_adv <= bram_xy_cnt_lsb_wrapped;                                //
+                            //
+                        endcase
                 //
-                2'b00: begin
-                    //
-                    if (recomb_msb_cnt_delay_2 > 8'd0) begin
-                        //
-                        rdy_adv <= recomb_msb_cnt_delay_1 == 8'd0;
-                        //
-                    end
-                    //
-                end
             endcase
             //        
         end
 
 
     
+        // add ready for mode=3
 endmodule



More information about the Commits mailing list