[Cryptech-Commits] [user/shatov/modexpng] branch master updated: Small change to the reductor module to try to get past 180 MHz. Previously BRAM outputs were going directry into a LUT-based ternary adder which was causing timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF we have BRAM -> FF -> LUT -> FF. This increases core latency by (number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks.

git at cryptech.is git at cryptech.is
Wed Nov 20 11:39:56 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

The following commit(s) were added to refs/heads/master by this push:
     new 157d5de  Small change to the reductor module to try to get past 180 MHz. Previously BRAM outputs were going directry into a LUT-based ternary adder which was causing timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF we have BRAM -> FF -> LUT -> FF. This increases core latency by (number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks.
157d5de is described below

commit 157d5dedd90fede9ea392e2aeda6562d839a30e1
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Nov 20 14:34:36 2019 +0300

    Small change to the reductor module to try to get past 180 MHz. Previously BRAM
    outputs were going directry into a LUT-based ternary adder which was causing
    timing problems. Added a layer of flip-flops, so instead of BRAM -> LUT -> FF
    we have BRAM -> FF -> LUT -> FF. This increases core latency by
    (number_of_supporting_modular_multiplications + number_of_exponent_bits) ticks.
---
 rtl/modexpng_reductor.v | 168 ++++++++++++++++++++++++------------------------
 1 file changed, 83 insertions(+), 85 deletions(-)

diff --git a/rtl/modexpng_reductor.v b/rtl/modexpng_reductor.v
index dd9cfd9..7404eba 100644
--- a/rtl/modexpng_reductor.v
+++ b/rtl/modexpng_reductor.v
@@ -174,116 +174,114 @@ module modexpng_reductor
 
 
     //
-    // Pipeline (Delay Match)
+    // Pipeline rd_wide_*
     //
-    reg                    rcmb_xy_valid_dly1 = 1'b0;
-    reg                    rcmb_xy_valid_dly2 = 1'b0;
-    reg                    rcmb_xy_valid_dly3 = 1'b0;
+    reg [WORD_EXT_W -1:0] rd_wide_x_din_aux_pipe;
+    reg [WORD_EXT_W -1:0] rd_wide_y_din_aux_pipe;
 
-    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1;
-    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2;
-    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3;
+    always @(posedge clk)
+        //
+        {rd_wide_x_din_aux_pipe, rd_wide_y_din_aux_pipe} <=
+        {rd_wide_x_din_aux,      rd_wide_y_din_aux     } ;
 
-    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly1;
-    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly2;
-    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly3;
 
-    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1;
-    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2;
-    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3;
 
-    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1;
-    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2;
-    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3;
+    //
+    // Delay rcmb_final_* to match rd_wide_*
+    //
+    reg                    rcmb_xy_valid_dly1_x = 1'b0;
+    reg                    rcmb_xy_valid_dly2_x = 1'b0;
+    reg                    rcmb_xy_valid_dly3_x = 1'b0;
+    reg                    rcmb_xy_valid_dly4_x = 1'b0;
+
+    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1_x;
+    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2_x;
+    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3_x;
+    reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly4_x;
+
+    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly1_x;
+    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly2_x;
+    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly3_x;
+    reg [  OP_ADDR_W -1:0] rcmb_xy_addr_dly4_x;
+
+    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1_x;
+    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2_x;
+    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3_x;
+    reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly4_x;
+
+    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1_x;
+    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2_x;
+    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3_x;
+    reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly4_x;
     
     always @(posedge clk or negedge rst_n)
         //
-        if (!rst_n) begin
-            rcmb_xy_valid_dly1 <= 1'b0;
-            rcmb_xy_valid_dly2 <= 1'b0;
-            rcmb_xy_valid_dly3 <= 1'b0;
-        end else begin
-            rcmb_xy_valid_dly1 <= rcmb_final_xy_valid;
-            rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1;
-            rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2;        
-        end
+        if (!rst_n) {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= 4'b0000;      
+        else        {rcmb_xy_valid_dly4_x, rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x} <= 
+                    {rcmb_xy_valid_dly3_x, rcmb_xy_valid_dly2_x, rcmb_xy_valid_dly1_x, rcmb_final_xy_valid } ;
     
     always @(posedge clk) begin
         //
-        if (rcmb_final_xy_valid) begin
-            rcmb_xy_bank_dly1 <= rcmb_final_xy_bank;
-            rcmb_xy_addr_dly1 <= rcmb_final_xy_addr;
-            rcmb_x_dout_dly1  <= rcmb_final_x_din;
-            rcmb_y_dout_dly1  <= rcmb_final_y_din;
-        end
-        //
-        if (rcmb_xy_valid_dly1) begin
-            rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1;
-            rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1;
-            rcmb_x_dout_dly2  <= rcmb_x_dout_dly1;
-            rcmb_y_dout_dly2  <= rcmb_y_dout_dly1;
-        end
-        //
-        if (rcmb_xy_valid_dly2) begin
-            rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2;
-            rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2;
-            rcmb_x_dout_dly3  <= rcmb_x_dout_dly2;
-            rcmb_y_dout_dly3  <= rcmb_y_dout_dly2;
-        end
+        if (rcmb_final_xy_valid)  {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} <=
+                                  {rcmb_final_xy_bank,  rcmb_final_xy_addr,  rcmb_final_x_din,   rcmb_final_y_din  } ;   
+        if (rcmb_xy_valid_dly1_x) {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} <=
+                                  {rcmb_xy_bank_dly1_x, rcmb_xy_addr_dly1_x, rcmb_x_dout_dly1_x, rcmb_y_dout_dly1_x} ;
+        if (rcmb_xy_valid_dly2_x) {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} <=
+                                  {rcmb_xy_bank_dly2_x, rcmb_xy_addr_dly2_x, rcmb_x_dout_dly2_x, rcmb_y_dout_dly2_x} ;
+        if (rcmb_xy_valid_dly3_x) {rcmb_xy_bank_dly4_x, rcmb_xy_addr_dly4_x, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x} <=
+                                  {rcmb_xy_bank_dly3_x, rcmb_xy_addr_dly3_x, rcmb_x_dout_dly3_x, rcmb_y_dout_dly3_x} ;
         //
     end
       
       
     //
+    // LSB Carry Logic
     //
-    //
-    reg [   CARRY_W -1:0] rcmb_x_lsb_carry;
-    reg [    WORD_W -1:0] rcmb_x_lsb_dummy;
-    reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout;
-
-    reg [   CARRY_W -1:0] rcmb_y_lsb_carry;
-    reg [    WORD_W -1:0] rcmb_y_lsb_dummy;
-    reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout;
+    reg  [   CARRY_W -1:0] rcmb_x_lsb_carry;
+    reg  [   CARRY_W -1:0] rcmb_y_lsb_carry;
+    reg  [    WORD_W -1:0] rcmb_x_lsb_dummy;
+    reg  [    WORD_W -1:0] rcmb_y_lsb_dummy;
+    wire [WORD_EXT_W -1:0] rcmb_x_lsb_carry_ext = {WORD_ZERO, rcmb_x_lsb_carry};
+    wire [WORD_EXT_W -1:0] rcmb_y_lsb_carry_ext = {WORD_ZERO, rcmb_y_lsb_carry};
+    
+    task calc_rcmb_xy_lsb_carry;
+        begin
+            {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe + rcmb_x_lsb_carry_ext;
+            {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe + rcmb_y_lsb_carry_ext;
+        end
+    endtask
     
 
     //
-    // Carry Computation
+    // LSB Carry Computation
     //
     always @(posedge clk)
         //
         if (ena) begin
+            //
             rcmb_x_lsb_carry <= CARRY_ZERO;
             rcmb_y_lsb_carry <= CARRY_ZERO;
-        end else if (rcmb_xy_valid_dly3)
             //
-            case (rcmb_xy_bank_dly3)    
-        
-                BANK_RCMB_ML: begin
-                    {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
-                    {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
-                end
-                    
-                BANK_RCMB_MH:
-                    if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin
-                        {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_din_aux + rcmb_x_lsb_carry;
-                        {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_din_aux + rcmb_y_lsb_carry;
-                    end
-                    
+        end else if (rcmb_xy_valid_dly4_x)
+            //
+            case (rcmb_xy_bank_dly4_x)
+                BANK_RCMB_ML:                                          calc_rcmb_xy_lsb_carry;  
+                BANK_RCMB_MH: if (rcmb_xy_addr_dly4_x == OP_ADDR_ZERO) calc_rcmb_xy_lsb_carry;  
             endcase
 
     
     //
+    // MSB Sum Logic
     //
-    //
-    wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_din_aux;
-    wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_din_aux;
+    wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly4_x + rd_wide_x_din_aux_pipe;
+    wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly4_x + rd_wide_y_din_aux_pipe;
     
-    wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_ZERO, rcmb_x_lsb_carry};
-    wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_ZERO, rcmb_y_lsb_carry};
+    wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + rcmb_x_lsb_carry_ext;
+    wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + rcmb_y_lsb_carry_ext;
     
     
     //
-    //
+    // MSB Sum Computation
     //
     always @(posedge clk or negedge rst_n)
         //
@@ -295,22 +293,22 @@ module modexpng_reductor
             clear_rdct_wide;
             clear_rdct_narrow;
             //
-            if (rcmb_xy_valid_dly3)
+            if (rcmb_xy_valid_dly4_x)
                 //
-                case (rcmb_xy_bank_dly3)
+                case (rcmb_xy_bank_dly4_x)
                                     
                     BANK_RCMB_MH:
-                        if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) begin
+                        if (rcmb_xy_addr_dly4_x == OP_ADDR_ONE) begin
                             set_rdct_wide  (sel_wide_out,   OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry);
                             set_rdct_narrow(sel_narrow_out, OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry);
-                        end else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) begin
-                            set_rdct_wide  (sel_wide_out,   rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
-                            set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y);
+                        end else if (rcmb_xy_addr_dly4_x > OP_ADDR_ONE) begin
+                            set_rdct_wide  (sel_wide_out,   rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y);
+                            set_rdct_narrow(sel_narrow_out, rcmb_xy_addr_dly4_x - 1'b1, sum_rdct_x, sum_rdct_y);
                         end
                             
                     BANK_RCMB_EXT: begin
-                        set_rdct_wide  (sel_wide_out,   word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
-                        set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3);
+                        set_rdct_wide  (sel_wide_out,   word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x);
+                        set_rdct_narrow(sel_narrow_out, word_index_last, rcmb_x_dout_dly4_x, rcmb_y_dout_dly4_x);
                     end
 
                 endcase
@@ -335,10 +333,10 @@ module modexpng_reductor
     
     always @(posedge clk or negedge rst_n)
         //
-        if (!rst_n)                                                                 busy_next <= 1'b0;
+        if (!rst_n)                                                                     busy_next <= 1'b0;
         else begin
-            if (rdy && ena)                                                         busy_next <= 1'b1;
-            if (!rdy && rcmb_xy_valid_dly3 && (rcmb_xy_bank_dly3 == BANK_RCMB_EXT)) busy_next <= 1'b0;    
+            if (rdy && ena)                                                             busy_next <= 1'b1;
+            if (!rdy && rcmb_xy_valid_dly4_x && (rcmb_xy_bank_dly4_x == BANK_RCMB_EXT)) busy_next <= 1'b0;    
         end
 
 

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list