[Cryptech-Commits] [core/math/modexpng] 88/92: New partial product recombination algorithm.

git at cryptech.is git at cryptech.is
Sat Mar 14 18:20:07 UTC 2020


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.

commit 77d11487d8987e13403f426537dc9bc59141f3f3
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Feb 3 22:49:25 2020 +0300

    New partial product recombination algorithm.
---
 rtl/modexpng_recombinator_block.v | 219 ++++++++++++++++++++++----------------
 rtl/modexpng_recombinator_cell.v  | 188 +++++++++++++++++++++++++-------
 2 files changed, 278 insertions(+), 129 deletions(-)

diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index cc89db0..e3cb50f 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -153,7 +153,7 @@ module modexpng_recombinator_block
     // index - latch
     reg [MAC_INDEX_W-1:0] xy_index_latch_lsb;
     
-    // purge - index
+    // purge - latch
     reg       xy_purge_latch_lsb = 1'b0;
     reg       xy_purge_latch_msb = 1'b0;
 
@@ -496,21 +496,25 @@ module modexpng_recombinator_block
     
     reg         rcmb_xy_lsb_ce = 1'b0;
     reg         rcmb_xy_lsb_ce_aux = 1'b0;
+    reg         rcmb_xy_lsb_ce_aux_dly = 1'b0;
     reg  [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
-    wire        rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
+    wire        rcmb_xy_lsb_ce_combined     = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
+    wire        rcmb_xy_lsb_ce_combined_ext = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0] | rcmb_xy_lsb_ce_aux_dly;
     reg         rcmb_xy_lsb_clr;
     wire        rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1];
 
-    reg  [ MAC_W     -1:0] rcmb_x_lsb_din;
-    reg  [ MAC_W     -1:0] rcmb_y_lsb_din;
-    wire [WORD_W     -1:0] rcmb_x_lsb_dout;
-    wire [WORD_W     -1:0] rcmb_y_lsb_dout;
-    wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext;
-    wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext;    
+    reg  [ MAC_W -1:0] rcmb_x_lsb_din;
+    reg  [ MAC_W -1:0] rcmb_y_lsb_din;
+    wire [WORD_W -1:0] rcmb_x_lsb_dout;
+    wire [WORD_W -1:0] rcmb_y_lsb_dout;
+    wire [WORD_W   :0] rcmb_x_lsb_doutw;
+    wire [WORD_W   :0] rcmb_y_lsb_doutw;
 
     reg         rcmb_xy_msb_ce = 1'b0;
     reg  [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
-    wire        rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
+    reg         rcmb_xy_msb_ce_purge0_rectangle_dly = 1'b0;
+    wire        rcmb_xy_msb_ce_combined     = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
+    wire        rcmb_xy_msb_ce_combined_ext = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0] | rcmb_xy_msb_ce_purge0_rectangle_dly;
     reg         rcmb_xy_msb_clr;
     
     reg  [ MAC_W -1:0] rcmb_x_msb_din;
@@ -518,42 +522,45 @@ module modexpng_recombinator_block
     wire [WORD_W -1:0] rcmb_x_msb_dout;
     wire [WORD_W -1:0] rcmb_y_msb_dout;
     
-    modexpng_recombinator_cell recomb_x_lsb
+    always @(posedge clk) rcmb_xy_lsb_ce_aux_dly <= rcmb_xy_lsb_ce_aux;
+    always @(posedge clk) rcmb_xy_msb_ce_purge0_rectangle_dly <= rcmb_mode == RCMB_MODE_RECTANGLE ? rcmb_xy_msb_ce_purge[0] : 1'b0;
+    
+    modexpng_recombinator_cell recomb_x_lsb_new
     (
-        .clk      (clk),
-        .ce       (rcmb_xy_lsb_ce_combined),
-        .clr      (rcmb_xy_lsb_clr),
-        .din      (rcmb_x_lsb_din),
-        .dout     (rcmb_x_lsb_dout),
-        .dout_ext (rcmb_x_lsb_dout_ext)
+        .clk   (clk),
+        .ce    (rcmb_xy_lsb_ce_combined_ext),
+        .clr   (rcmb_xy_lsb_clr),
+        .din   (rcmb_x_lsb_din),
+        .dout  (rcmb_x_lsb_dout),
+        .doutw (rcmb_x_lsb_doutw)
     );
-    modexpng_recombinator_cell recomb_y_lsb
+    modexpng_recombinator_cell recomb_y_lsb_new
     (
-        .clk      (clk),
-        .ce       (rcmb_xy_lsb_ce_combined),
-        .clr      (rcmb_xy_lsb_clr),
-        .din      (rcmb_y_lsb_din),
-        .dout     (rcmb_y_lsb_dout),
-        .dout_ext (rcmb_y_lsb_dout_ext)
+        .clk   (clk),
+        .ce    (rcmb_xy_lsb_ce_combined_ext),
+        .clr   (rcmb_xy_lsb_clr),
+        .din   (rcmb_y_lsb_din),
+        .dout  (rcmb_y_lsb_dout),
+        .doutw (rcmb_y_lsb_doutw)
     );
 
-    modexpng_recombinator_cell recomb_x_msb
+    modexpng_recombinator_cell recomb_x_msb_new
     (
-        .clk      (clk),
-        .ce       (rcmb_xy_msb_ce_combined),
-        .clr      (rcmb_xy_msb_clr),
-        .din      (rcmb_x_msb_din),
-        .dout     (rcmb_x_msb_dout),
-        .dout_ext ()
+        .clk   (clk),
+        .ce    (rcmb_xy_msb_ce_combined_ext),
+        .clr   (rcmb_xy_msb_clr),
+        .din   (rcmb_x_msb_din),
+        .dout  (rcmb_x_msb_dout),
+        .doutw ()
     );
-    modexpng_recombinator_cell recomb_y_msb
+    modexpng_recombinator_cell recomb_y_msb_new
     (
-        .clk      (clk),
-        .ce       (rcmb_xy_msb_ce_combined),
-        .clr      (rcmb_xy_msb_clr),
-        .din      (rcmb_y_msb_din),
-        .dout     (rcmb_y_msb_dout),
-        .dout_ext ()
+        .clk   (clk),
+        .ce    (rcmb_xy_msb_ce_combined_ext),
+        .clr   (rcmb_xy_msb_clr),
+        .din   (rcmb_y_msb_din),
+        .dout  (rcmb_y_msb_dout),
+        .doutw ()
     );
 
     always @(posedge clk) begin
@@ -704,7 +711,6 @@ module modexpng_recombinator_block
             xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]};
         end
         //
-        //
         for (i=1; i<6; i=i+1) begin
             xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
             xy_aux_lsb_adv   [i] <= xy_aux_lsb_adv   [i+1];
@@ -753,17 +759,52 @@ module modexpng_recombinator_block
         end
 
 
-    reg rcmb_xy_lsb_ce_combined_dly = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly = 1'b0;
+    reg rcmb_xy_lsb_ce_combined_dly1 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly1 = 1'b0;
+    
+    reg rcmb_xy_lsb_ce_combined_dly2 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly2 = 1'b0;
+
+    reg rcmb_xy_lsb_ce_combined_dly3 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly3 = 1'b0;
+
+    reg rcmb_xy_lsb_ce_combined_dly4 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly4 = 1'b0;
+
+    reg rcmb_xy_lsb_ce_combined_dly5 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly5 = 1'b0;
+
+    reg rcmb_xy_lsb_ce_combined_dly6 = 1'b0;
+    reg rcmb_xy_msb_ce_combined_dly6 = 1'b0;
 
     always @(posedge clk or negedge rst_n)
         //
         if (!rst_n) begin
-            rcmb_xy_lsb_ce_combined_dly <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly1 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly1 <= 1'b0;
+            rcmb_xy_lsb_ce_combined_dly2 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly2 <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly3 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly3 <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly4 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly4 <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly5 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly5 <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly6 <= 1'b0;
+            rcmb_xy_msb_ce_combined_dly6 <= 1'b0;        
         end else begin
-            rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined;
-            rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined;
+            rcmb_xy_lsb_ce_combined_dly1 <= rcmb_xy_lsb_ce_combined;
+            rcmb_xy_msb_ce_combined_dly1 <= rcmb_xy_msb_ce_combined;
+            rcmb_xy_lsb_ce_combined_dly2 <= rcmb_xy_lsb_ce_combined_dly1;
+            rcmb_xy_msb_ce_combined_dly2 <= rcmb_xy_msb_ce_combined_dly1;
+            rcmb_xy_lsb_ce_combined_dly3 <= rcmb_xy_lsb_ce_combined_dly2;
+            rcmb_xy_msb_ce_combined_dly3 <= rcmb_xy_msb_ce_combined_dly2;
+            rcmb_xy_lsb_ce_combined_dly4 <= rcmb_xy_lsb_ce_combined_dly3;
+            rcmb_xy_msb_ce_combined_dly4 <= rcmb_xy_msb_ce_combined_dly3;
+            rcmb_xy_lsb_ce_combined_dly5 <= rcmb_xy_lsb_ce_combined_dly4;
+            rcmb_xy_msb_ce_combined_dly5 <= rcmb_xy_msb_ce_combined_dly4;
+            rcmb_xy_lsb_ce_combined_dly6 <= rcmb_xy_lsb_ce_combined_dly5;
+            rcmb_xy_msb_ce_combined_dly6 <= rcmb_xy_msb_ce_combined_dly5;
         end        
 
     reg rcmb_xy_lsb_valid = 1'b0;
@@ -775,8 +816,8 @@ module modexpng_recombinator_block
             rcmb_xy_lsb_valid <= 1'b0;
             rcmb_xy_msb_valid <= 1'b0;        
         end else begin
-            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly;
-            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly;
+            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly6;
+            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly6;
         end        
 
 
@@ -811,17 +852,17 @@ module modexpng_recombinator_block
     reg cnt_lsb_wrapped;
     reg cnt_msb_wrapped;
     
-    reg [31:0] rcmb_xy_msb_delay_0;
-    reg [31:0] rcmb_xy_msb_delay_1;
-    reg [31:0] rcmb_xy_msb_delay_2;
+    reg [WORD_W-1:0] rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y;
+    reg [WORD_W-1:0] rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y;
+    reg [WORD_W-1:0] rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y;
     
-    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO;
-    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO;
-    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_0 = OP_ADDR_ZERO;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_1 = OP_ADDR_ZERO;
+    reg [OP_ADDR_W -1:0] rcmb_msb_cnt_dly_2 = OP_ADDR_ZERO;
 
-    reg rcmb_msb_flag_delay_0 = 1'b0;
-    reg rcmb_msb_flag_delay_1 = 1'b0;
-    reg rcmb_msb_flag_delay_2 = 1'b0;
+    reg rcmb_msb_flag_dly_0 = 1'b0;
+    reg rcmb_msb_flag_dly_1 = 1'b0;
+    reg rcmb_msb_flag_dly_2 = 1'b0;
     
     
     //
@@ -870,24 +911,24 @@ module modexpng_recombinator_block
         input                  flag;
         begin
             //
-            rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
-            rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
-            rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
+            {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y} <= {dout_x, dout_y};
+            {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y} <= {rcmb_xy_msb_dly_0_x, rcmb_xy_msb_dly_0_y};
+            {rcmb_xy_msb_dly_2_x, rcmb_xy_msb_dly_2_y} <= {rcmb_xy_msb_dly_1_x, rcmb_xy_msb_dly_1_y};
             //
-            rcmb_msb_cnt_delay_0 <= cnt;
-            rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
-            rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
+            rcmb_msb_cnt_dly_0 <= cnt;
+            rcmb_msb_cnt_dly_1 <= rcmb_msb_cnt_dly_0;
+            rcmb_msb_cnt_dly_2 <= rcmb_msb_cnt_dly_1;
             //
-            rcmb_msb_flag_delay_0 <= flag;
-            rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
-            rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
+            rcmb_msb_flag_dly_0 <= flag;
+            rcmb_msb_flag_dly_1 <= rcmb_msb_flag_dly_0;
+            rcmb_msb_flag_dly_2 <= rcmb_msb_flag_dly_1;
             //
         end
     endtask
          
     task _update_rcmb_msb_carry;
-        input [WORD_W -1:0] dout_x;
-        input [WORD_W -1:0] dout_y;
+        input [WORD_W-1:0] dout_x;
+        input [WORD_W-1:0] dout_y;
         begin
             rcmb_x_msb_carry_0 <= dout_x;
             rcmb_y_msb_carry_0 <= dout_y;
@@ -1031,8 +1072,8 @@ module modexpng_recombinator_block
     always @(posedge clk)
         //
         if (ena) begin
-            clr_cnt_lsb();
-            clr_cnt_msb();
+            clr_cnt_lsb;
+            clr_cnt_msb;
         end else if (!rdy)
             //
             case (rcmb_mode)
@@ -1040,22 +1081,22 @@ module modexpng_recombinator_block
                 RCMB_MODE_TRIANGLE:  recombine_triangle();
                 RCMB_MODE_RECTANGLE: recombine_rectangle();
             endcase
-           
-           
+
+    
     //
     // Padding
     //
     wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout};
     wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout};
     
-    wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext};
-    wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext};
+    wire [WORD_EXT_W-1:0] rcmb_x_lsb_doutw_pad = {1'b0, rcmb_x_lsb_doutw};
+    wire [WORD_EXT_W-1:0] rcmb_y_lsb_doutw_pad = {1'b0, rcmb_y_lsb_doutw};
 
     wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout};
     wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout};
 
-    wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]};
-    wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]};
+    wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_x_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_x};
+    wire [WORD_EXT_W-1:0] rcmb_xy_msb_dly_2_y_pad = {CARRY_ZERO, rcmb_xy_msb_dly_2_y};
     
     
     //
@@ -1079,25 +1120,25 @@ module modexpng_recombinator_block
             //
             case (rcmb_xy_valid)
                 //
-                2'b00: if (rcmb_msb_flag_delay_2)       set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+                2'b00: if (rcmb_msb_flag_dly_2)         set_wide(BANK_WIDE_H, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
                        else                             clear_wide;
                 //
-                2'b01:                                  set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                2'b01:                                  set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad,  rcmb_y_lsb_dout_pad);
                 //
                 2'b10: if (cnt_msb < OP_ADDR_TWO)       clear_wide;                        
-                       else                             set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
+                       else                             set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad,  rcmb_y_msb_dout_pad);
                 //
-                2'b11: if (!cnt_lsb_wrapped)            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
+                2'b11: if (!cnt_lsb_wrapped)            set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad,  rcmb_y_lsb_dout_pad);
                        else begin
-                           if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
-                           else                         set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+                           if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad,  rcmb_y_lsb_dout_pad);
+                           else                         set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
                        end
                 //
             endcase            
             //
             case (rcmb_xy_valid)
                 //
-                2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+                2'b00: if (rcmb_msb_flag_dly_2)   advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
                 //
                 2'b01: if (rcmb_xy_lsb_cry)       pop_rcmb_msb_carry;
                 //
@@ -1120,7 +1161,7 @@ module modexpng_recombinator_block
         begin
             //
             case (rcmb_xy_valid)
-                2'b01: inc_cnt_lsb(); 
+                2'b01: inc_cnt_lsb; 
             endcase            
             //
             case (rcmb_xy_valid)
@@ -1156,7 +1197,7 @@ module modexpng_recombinator_block
             //
             case (rcmb_xy_valid)
                 //
-                2'b00:  if (rcmb_msb_flag_delay_2)  set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
+                2'b00:  if (rcmb_msb_flag_dly_2)    set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_dly_2, rcmb_xy_msb_dly_2_x_pad, rcmb_xy_msb_dly_2_y_pad);
                         else                        clear_rdct;
                 //
                 2'b01:                              set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); 
@@ -1166,14 +1207,14 @@ module modexpng_recombinator_block
                        else                         set_rdct(BANK_RCMB_MH,  cnt_msb,      rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                        end else                     set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
                 //          
-                2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad,     rcmb_y_lsb_dout_pad);
-                       else                         set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad);
+                2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad,  rcmb_y_lsb_dout_pad);
+                       else                         set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_doutw_pad, rcmb_y_lsb_doutw_pad);
                 //
             endcase            
             //
             case (rcmb_xy_valid)
                 //
-                2'b00: if (rcmb_msb_flag_delay_2)                       advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
+                2'b00: if (rcmb_msb_flag_dly_2)                         advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0);
                 //
                 2'b01: if (rcmb_xy_lsb_cry)                             pop_rcmb_msb_carry;
                 //
@@ -1200,9 +1241,9 @@ module modexpng_recombinator_block
         else if (!rdy_reg)
             //
             case (rcmb_mode)
-                RCMB_MODE_SQUARE:    case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
-                RCMB_MODE_TRIANGLE:  case (rcmb_xy_valid) 2'b01:                            rdy_adv <= cnt_lsb_wrapped;        endcase
-                RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase
+                RCMB_MODE_SQUARE:    case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
+                RCMB_MODE_TRIANGLE:  case (rcmb_xy_valid) 2'b01:                          rdy_adv <= cnt_lsb_wrapped;      endcase
+                RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_dly_2) rdy_adv <= ~rcmb_msb_flag_dly_1; endcase
             endcase
 
 
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
index 45fc68c..0c9ab00 100644
--- a/rtl/modexpng_recombinator_cell.v
+++ b/rtl/modexpng_recombinator_cell.v
@@ -34,7 +34,7 @@ module modexpng_recombinator_cell
 (
     clk,
     ce, clr,
-    din, dout, dout_ext
+    din, dout, doutw
 );
 
 
@@ -54,63 +54,172 @@ module modexpng_recombinator_cell
     input                clr;
     input  [ MAC_W -1:0] din;
     output [WORD_W -1:0] dout;
-    output [WORD_W   :0] dout_ext;
+    output [WORD_W   :0] doutw;
     
     
     //
     // din <=> {z[13:0], y[15:0], x[15:0]}
     //
-    wire [WORD_W -3:0] din_z = din[3 * WORD_W -3 : 2 * WORD_W]; // [45:32]
-    wire [WORD_W -1:0] din_y = din[2 * WORD_W -1 :     WORD_W]; // [31:16]
-    wire [WORD_W -1:0] din_x = din[    WORD_W -1 :          0]; // [15: 0]
-    
+    wire [WORD_W -1:0] din_z = {2'b00, din[3 * WORD_W -3 : 2 * WORD_W]}; // [47:46][45:32]
+    wire [WORD_W -1:0] din_y = {       din[2 * WORD_W -1 :     WORD_W]}; // [31:16]
+    wire [WORD_W -1:0] din_x = {       din[    WORD_W -1 :          0]}; // [15: 0]
+
     
     //
-    // Delayed Clock Enable
+    // Delayed Clock Enables
+    //
+    reg ce_dly1 = 1'b0, ce_dly2 = 1'b0, ce_dly3 = 1'b0, ce_dly4 = 1'b0, ce_dly5 = 1'b0, ce_dly6 = 1'b0; 
+    always @(posedge clk) {ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5, ce_dly6} <= {ce, ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5};
+
+
+    //
+    // Delayed Clear
     //
-    reg ce_dly = 1'b0;
-    always @(posedge clk) ce_dly <= ce;
+    reg clr_dly1, clr_dly2, clr_dly3, clr_dly4;
+    always @(posedge clk) {clr_dly1, clr_dly2, clr_dly3, clr_dly4} <= {clr, clr_dly1, clr_dly2, clr_dly3};
     
+
+    //
+    // Phase Flip-Flop
+    //
+    reg phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5;
+    always @(posedge clk)
+        if      (ce)  phase_ff <= ~phase_ff;
+        else if (clr) phase_ff <= 1'b0;
+
+    always @(posedge clk)
+        {phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5} <= {phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4};
+        
     
     //
-    // DSP Slice Buses
+    // Shift Registers
     //
-    wire [DSP48E1_A_W-1:0] a_int;
-    wire [DSP48E1_B_W-1:0] b_int;
-    wire [DSP48E1_C_W-1:0] c_int;
-    wire [DSP48E1_P_W-1:0] p_int;
+    reg [WORD_W-1:0] din_x_dly1;
+    reg [WORD_W-1:0] din_y_dly1;
+    reg [WORD_W-1:0] din_z_dly1;
+    reg [WORD_W-1:0] din_z_dly2;
 
-    assign {a_int, b_int} = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}},     din_z, din_y[WORD_W-1], din_y};
-    assign {c_int}        = {{(DSP48E1_C_W-(2*WORD_W+1)){1'b0}}, WORD_ZERO, din_x[WORD_W-1], din_x};
+    always @(posedge clk) begin
+        //
+        if      (ce)  {din_x_dly1, din_y_dly1, din_z_dly1} <= {din_x,     din_y,     din_z};
+        else if (clr) {din_x_dly1, din_y_dly1, din_z_dly1} <= {WORD_ZERO, WORD_ZERO, WORD_ZERO};
+        //
+        if      (ce)  {din_z_dly2} <= {din_z_dly1};
+        else if (clr) {din_z_dly2} <= {WORD_ZERO};
+        //
+    end
     
+
+    //
+    // DSP Input Registers
     //
-    // Combinational OPMODE Switch
+    reg [2 * WORD_W-1:0] master_ab_reg;
+    reg [2 * WORD_W-1:0] master_c_reg;
+
+    reg [    WORD_W+1:0] slave_ab_reg;
+    reg [    WORD_W+1:0] slave_ab_next_reg;
+
+
+    //
+    // DSP Cascade Bus
+    //
+    wire [DSP48E1_P_W-1:0] master_slave_p_int;
+
+
+    //
+    // DSP Output Buses
     //
-    reg [DSP48E1_OPMODE_W-1:0] opmode;
+    wire [DSP48E1_P_W-1:0] master_p_int;
+    wire [DSP48E1_P_W-1:0] slave_p_int;
+
     
-    always @(clr)
-        //
-        case (clr)
-            1'b1: opmode = DSP48E1_OPMODE_Z0_YC_X0;
-            1'b0: opmode = DSP48E1_OPMODE_ZP17_YC_XAB;
-        endcase
+    //
+    // DSP Input Mapping
+    //
+    wire [DSP48E1_C_W-1:0] master_ab_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_ab_reg};
+    wire [DSP48E1_C_W-1:0] master_c_int  = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_c_reg};
+
+    wire [DSP48E1_C_W-1:0] slave_ab_int = {{(DSP48E1_C_W - (WORD_W+3)){1'b0}}, slave_ab_reg[WORD_W+1:WORD_W], 1'b1, slave_ab_reg[WORD_W-1:0]};
+    wire [DSP48E1_C_W-1:0] slave_c_int  = {DSP48E1_C_W{1'b0}};
 
 
     //
-    // DSP Slice Instance
+    // Master DSP Input Logic
     //
-    `MODEXPNG_DSP_SLICE_ADDSUB #(.AB_REG(2)) dsp_inst
+    always @(posedge clk)
+        //
+        if (ce) begin
+            master_ab_reg <= !phase_ff ? {din_y,      din_y_dly1} : {din_x,    din_x_dly1}; 
+            master_c_reg  <= !phase_ff ? {din_z_dly1, din_z_dly2} : {WORD_DNC, WORD_DNC};
+        end else begin
+            master_ab_reg <= {WORD_DNC, WORD_DNC};
+            master_c_reg  <= {WORD_DNC, WORD_DNC};
+        end
+    
+    
+    //
+    // Slave DSP Input Logic
+    //
+    always @(posedge clk) begin
+        //
+        slave_ab_reg      <= {(WORD_W+2){1'bX}};
+        slave_ab_next_reg <= {(WORD_W+2){1'bX}};
+        //
+        if (ce_dly3 && phase_ff_dly3) slave_ab_next_reg <= {master_p_int[2*WORD_W+1:WORD_W]};
+        //
+        if (ce_dly3 && phase_ff_dly3) slave_ab_reg <= {2'b00, master_p_int[WORD_W-1:0]};
+        if (ce_dly4 && phase_ff_dly4) slave_ab_reg <= slave_ab_next_reg; 
+        //
+    end
+    
+    
+    //
+    // OPMODE Logic
+    //
+    reg [DSP48E1_OPMODE_W-1:0] master_opmode;
+    reg [DSP48E1_OPMODE_W-1:0] slave_opmode;
+    
+    always @(posedge clk) begin
+        //
+        if (ce) master_opmode <= !phase_ff ? DSP48E1_OPMODE_Z0_YC_XAB : DSP48E1_OPMODE_ZP_Y0_XAB;
+        else    master_opmode <= DSP48E1_OPMODE_DNC;
+        //
+        if (ce_dly4) slave_opmode <= clr_dly4 ? DSP48E1_OPMODE_Z0_Y0_XAB : DSP48E1_OPMODE_ZP17_Y0_XAB;
+        else         slave_opmode <= DSP48E1_OPMODE_DNC;
+        //
+    end
+        
+    
+    //
+    // DSP Slice Instances
+    //
+    `MODEXPNG_DSP_SLICE_ADDSUB dsp_master_inst
+    (
+        .clk            (clk),
+        .ce_abc         (ce_dly1),
+        .ce_p           (ce_dly2),
+        .ce_ctrl        (ce_dly1),
+        .ab             (master_ab_int),
+        .c              (master_c_int),
+        .p              (master_p_int),
+        .op_mode        (master_opmode),
+        .alu_mode       (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
+        .carry_in_sel   (DSP48E1_CARRYINSEL_CARRYIN),
+        .casc_p_in      (),
+        .casc_p_out     (),
+        .carry_out      ()
+    );
+    
+    `MODEXPNG_DSP_SLICE_ADDSUB dsp_slave_inst
     (
         .clk            (clk),
-        .ce_ab1         (ce),
-        .ce_ab2         (ce_dly),
-        .ce_c           (ce),
-        .ce_p           (ce_dly),
-        .ce_ctrl        (ce),
-        .ab             ({a_int, b_int}),
-        .c              (c_int),
-        .p              (p_int),
-        .op_mode        (opmode),
+        .ce_abc         (ce_dly5),
+        .ce_p           (ce_dly6),
+        .ce_ctrl        (ce_dly5),
+        .ab             (slave_ab_int),
+        .c              (slave_c_int),
+        .p              (slave_p_int),
+        .op_mode        (slave_opmode),
         .alu_mode       (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
         .carry_in_sel   (DSP48E1_CARRYINSEL_CARRYIN),
         .casc_p_in      (),
@@ -120,10 +229,9 @@ module modexpng_recombinator_cell
 
 
     //
-    // Output Mapping
-    //
-    assign dout     = {p_int[WORD_W-1:0]};
-    assign dout_ext = {p_int[WORD_W+1], dout};
-    
+    // Output Register
+    //    
+    assign dout  = {slave_p_int[WORD_W-1:0]};
+    assign doutw = {slave_p_int[WORD_W+1], dout}; 
     
 endmodule



More information about the Commits mailing list