[Cryptech-Commits] [core/math/modexp] 04/04: (1) Collapsed the sm and sa adder states. Thisimoproves performance for modexp with 3%. (2) Updated montprod testbench to not use the now removed states. (3) Minor code cleanup to make it easier to work with for further improvements.

git at cryptech.is git at cryptech.is
Wed Jun 17 19:02:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

joachim at secworks.se pushed a commit to branch perfopt
in repository core/math/modexp.

commit 46e2c534e7197b57f40ebf3e60bf4da5a0da7d3f
Author: Joachim Strömbergson <joachim at secworks.se>
Date:   Wed Jun 17 21:02:24 2015 +0200

    (1) Collapsed the sm and sa adder states. Thisimoproves performance for modexp with 3%. (2) Updated montprod testbench to not use the now removed states. (3) Minor code cleanup to make it easier to work with for further improvements.
---
 src/rtl/montprod.v   | 165 +++++++++++++++++++++++----------------------------
 src/tb/tb_montprod.v |   4 +-
 2 files changed, 75 insertions(+), 94 deletions(-)

diff --git a/src/rtl/montprod.v b/src/rtl/montprod.v
index 7c7bf33..9748b09 100644
--- a/src/rtl/montprod.v
+++ b/src/rtl/montprod.v
@@ -70,14 +70,12 @@ module montprod(
   localparam CTRL_WAIT           = 4'h2;
   localparam CTRL_LOOP_ITER      = 4'h3;
   localparam CTRL_LOOP_BQ        = 4'h4;
-  localparam CTRL_L_CALC_SM      = 4'h5;
-  localparam CTRL_L_STALLPIPE_SM = 4'h6;
-  localparam CTRL_L_CALC_SA      = 4'h7;
-  localparam CTRL_L_STALLPIPE_SA = 4'h8;
-  localparam CTRL_L_CALC_SDIV2   = 4'h9;
-  localparam CTRL_L_STALLPIPE_D2 = 4'hA;
-  localparam CTRL_L_STALLPIPE_ES = 4'hB;
-  localparam CTRL_EMIT_S         = 4'hC;
+  localparam CTRL_L_CALC_SA      = 4'h5;
+  localparam CTRL_L_STALLPIPE_SA = 4'h6;
+  localparam CTRL_L_CALC_SDIV2   = 4'h7;
+  localparam CTRL_L_STALLPIPE_D2 = 4'h8;
+  localparam CTRL_L_STALLPIPE_ES = 4'h9;
+  localparam CTRL_EMIT_S         = 4'ha;
 
   localparam SMUX_0              = 2'h0;
   localparam SMUX_ADD_SM         = 2'h1;
@@ -117,7 +115,7 @@ module montprod(
   reg           q_reg;
   reg           b_new;
   reg           b_reg;
-  reg           update_bq;
+  reg           bq_we;
 
   reg [12 : 0] loop_ctr_reg;
   reg [12 : 0] loop_ctr_new;
@@ -125,9 +123,11 @@ module montprod(
   reg          loop_ctr_set;
   reg          loop_ctr_dec;
 
-  reg [07 : 0] B_word_index; //loop counter as a word index
-  reg [04 : 0] B_bit_index; //loop counter as a bit index
-  reg [04 : 0] B_bit_index_reg; //loop counter as a bit index
+  reg [07 : 0] b_word_index; //loop counter as a word index
+
+  reg [04 : 0] b_bit_index_reg;
+  reg [04 : 0] b_bit_index_new;
+  reg [04 : 0] b_bit_index_we;
 
   reg [07 : 0] word_index_reg; //register of what word is being read
   reg [07 : 0] word_index_new; //calculation of what word to be read
@@ -142,8 +142,10 @@ module montprod(
   reg          shr_carry_in_reg;
   reg          shr_carry_in_new;
 
-  reg          reset_word_index_LSW;
-  reg          reset_word_index_MSW;
+  reg          reset_word_index_lsw;
+  reg          reset_word_index_msw;
+
+  reg [31 : 0] sa_adder_data_in;
 
 
   //----------------------------------------------------------------
@@ -155,10 +157,10 @@ module montprod(
   wire [31 : 0] add_result_sm;
   wire          add_carry_out_sm;
 
+  reg           shr_data_in;
   wire          shr_carry_out;
-  wire [31 : 0] shr_adiv2;
+  wire [31 : 0] shr_data_out;
 
-  reg           set_B_bit_index;
 
 
   //----------------------------------------------------------------
@@ -198,7 +200,7 @@ module montprod(
 
 
   adder32 s_adder_sa(
-                     .a(s_mem_read_data),
+                     .a(sa_adder_data_in),
                      .b(opa_data),
                      .carry_in(add_carry_in_sa_reg),
                      .sum(add_result_sa),
@@ -208,7 +210,7 @@ module montprod(
   shr32 shifter(
                 .a(s_mem_read_data),
                 .carry_in(shr_carry_in_reg),
-                .adiv2(shr_adiv2),
+                .adiv2(shr_data_out),
                 .carry_out(shr_carry_out)
                );
 
@@ -225,7 +227,7 @@ module montprod(
       if (!reset_n)
         begin
           ready_reg           <= 1'b0;
-          loop_ctr_reg    <= 13'h0;
+          loop_ctr_reg        <= 13'h0;
           word_index_reg      <= 8'h0;
           word_index_prev_reg <= 8'h0;
           add_carry_in_sa_reg <= 1'b0;
@@ -236,13 +238,14 @@ module montprod(
           s_mux_reg           <= SMUX_0;
           s_mem_we_reg        <= 1'b0;
           s_mem_wr_addr_reg   <= 8'h0;
-          B_bit_index_reg     <= 5'h0;
+          b_bit_index_reg     <= 5'h0;
           montprod_ctrl_reg   <= CTRL_IDLE;
         end
       else
         begin
-          s_mem_wr_addr_reg <= s_mem_addr;
-          s_mem_we_reg      <= s_mem_we_new;
+          s_mem_wr_addr_reg   <= s_mem_addr;
+          s_mem_we_reg        <= s_mem_we_new;
+          s_mux_reg           <= s_mux_new;
 
           word_index_reg      <= word_index_new;
           word_index_prev_reg <= word_index_reg;
@@ -251,14 +254,14 @@ module montprod(
           add_carry_in_sa_reg <= add_carry_in_sa_new;
           add_carry_in_sm_reg <= add_carry_in_sm_new;
 
-          B_bit_index_reg <= B_bit_index;
+          if (b_bit_index_we)
+            b_bit_index_reg <= b_bit_index_new;
 
-          if (update_bq)
+          if (bq_we)
             begin
               b_reg <= b_new;
               q_reg <= q_new;
             end
-          s_mux_reg <= s_mux_new;
 
           if (ready_we)
             ready_reg <= ready_new;
@@ -279,7 +282,7 @@ module montprod(
   //----------------------------------------------------------------
   always @*
     begin : prodcalc
-      opb_addr_reg = B_word_index;
+      opb_addr_reg = b_word_index;
       opm_addr_reg = word_index_reg;
 
       result_addr_reg  = word_index_prev_reg;
@@ -309,10 +312,10 @@ module montprod(
       endcase
 
 
-      if (reset_word_index_LSW == 1'b1)
+      if (reset_word_index_lsw == 1'b1)
         word_index_new = length_m1;
 
-      else if (reset_word_index_MSW == 1'b1)
+      else if (reset_word_index_msw == 1'b1)
         word_index_new = 8'h0;
 
       else if (montprod_ctrl_reg == CTRL_L_CALC_SDIV2)
@@ -331,6 +334,7 @@ module montprod(
       shr_carry_in_new = 1'b0;
       s_mux_new        = SMUX_0;
       s_mem_we_new     = 1'b0;
+      sa_adder_data_in = 32'h0;
 
       case (montprod_ctrl_reg)
         CTRL_INIT_S:
@@ -339,18 +343,11 @@ module montprod(
             s_mem_we_new = 1'b1;
           end
 
-        CTRL_L_CALC_SM:
-          begin
-            //s = (s + q*M + b*A) >>> 1;, if(q==1) S+= M. Takes (1..length) cycles.
-            s_mux_new    = SMUX_ADD_SM;
-            s_mem_we_new = q_reg;
-          end
-
         CTRL_L_CALC_SA:
           begin
             //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles.
             s_mux_new    = SMUX_ADD_SA;
-            s_mem_we_new = b_reg;
+            s_mem_we_new = b_reg | q_reg;
           end
 
         CTRL_L_CALC_SDIV2:
@@ -373,21 +370,26 @@ module montprod(
         SMUX_0:
           s_mem_new = 32'h0;
 
-        SMUX_ADD_SM:
-          begin
-            s_mem_new           = add_result_sm;
-            add_carry_in_sm_new = add_carry_out_sm;
-          end
-
         SMUX_ADD_SA:
           begin
-            s_mem_new           = add_result_sa;
+            if (q_reg)
+                sa_adder_data_in = add_result_sm;
+            else
+              sa_adder_data_in = s_mem_read_data;
+
+            if (b_reg)
+              s_mem_new = add_result_sa;
+            else
+              s_mem_new = add_result_sm;
+
             add_carry_in_sa_new = add_carry_out_sa;
+            add_carry_in_sm_new = add_carry_out_sm;
           end
 
         SMUX_SHR:
           begin
-            s_mem_new        = shr_adiv2;
+            shr_data_in      = s_mem_read_data;
+            s_mem_new        = shr_data_out;
             shr_carry_in_new = shr_carry_out;
           end
 
@@ -400,12 +402,14 @@ module montprod(
 
   //----------------------------------------------------------------
   // bq
-  // b: bit of B
+  //
+  // Extract b and q bits.
+  // b: current bit of B used.
   // q = (s - b * A) & 1
   //----------------------------------------------------------------
   always @*
    begin : bq
-     b_new = opb_data[B_bit_index_reg];
+     b_new = opb_data[b_bit_index_reg];
      q_new = s_mem_read_data[0] ^ (opa_data[0] & b_new);
    end // bq
 
@@ -417,11 +421,12 @@ module montprod(
   //----------------------------------------------------------------
   always @*
    begin : loop_ctr
-     loop_ctr_new = loop_ctr_reg;
-     loop_ctr_we  = 1'b0;
+     loop_ctr_new     = 13'h0;
+     loop_ctr_we      = 1'b0;
      length_m1        = length - 1'b1;
-     B_bit_index      = B_bit_index_reg;
-     B_word_index     = loop_ctr_reg[12:5];
+
+     b_bit_index_new  = 5'h1f - loop_ctr_reg[4:0];
+     b_word_index     = loop_ctr_reg[12:5];
 
      if (loop_ctr_set)
          begin
@@ -434,11 +439,6 @@ module montprod(
            loop_ctr_new = loop_ctr_reg - 1'b1;
            loop_ctr_we  = 1'b1;
        end
-
-     if (set_B_bit_index)
-       begin
-           B_bit_index  = 5'h1f - loop_ctr_reg[4:0];
-       end
    end
 
 
@@ -449,18 +449,18 @@ module montprod(
   //----------------------------------------------------------------
   always @*
     begin : montprod_ctrl
-      ready_new            = 1'b0;
-      ready_we             = 1'b0;
-      montprod_ctrl_new    = CTRL_IDLE;
-      montprod_ctrl_we     = 1'b0;
+      ready_new         = 1'b0;
+      ready_we          = 1'b0;
+      montprod_ctrl_new = CTRL_IDLE;
+      montprod_ctrl_we  = 1'b0;
 
-      loop_ctr_set         = 1'b0;
-      loop_ctr_dec         = 1'b0;
-      set_B_bit_index      = 1'b0;
-      update_bq            = 1'b0;
+      loop_ctr_set      = 1'b0;
+      loop_ctr_dec      = 1'b0;
+      b_bit_index_we    = 1'b0;
+      bq_we             = 1'b0;
 
-      reset_word_index_LSW = 1'b0;
-      reset_word_index_MSW = 1'b0;
+      reset_word_index_lsw = 1'b0;
+      reset_word_index_msw = 1'b0;
 
 
       case (montprod_ctrl_reg)
@@ -472,7 +472,7 @@ module montprod(
                 ready_we  = 1'b1;
                 montprod_ctrl_new = CTRL_INIT_S;
                 montprod_ctrl_we = 1'b1;
-                reset_word_index_LSW = 1'b1;
+                reset_word_index_lsw = 1'b1;
               end
             else
               begin
@@ -501,42 +501,25 @@ module montprod(
         // Also abort loop if done.
         CTRL_LOOP_ITER:
           begin
-            set_B_bit_index      = 1'b1;
-            reset_word_index_LSW = 1'b1;
+            b_bit_index_we       = 1'b1;
+            reset_word_index_lsw = 1'b1;
             montprod_ctrl_new    = CTRL_LOOP_BQ;
             montprod_ctrl_we     = 1'b1;
           end
 
         CTRL_LOOP_BQ:
           begin
-            reset_word_index_LSW = 1'b1;
-            update_bq            = 1'b1;
-            montprod_ctrl_new    = CTRL_L_CALC_SM;
-            montprod_ctrl_we     = 1'b1;
-          end
-
-        CTRL_L_CALC_SM:
-          begin
-            if (word_index_reg == 8'h0)
-              begin
-                reset_word_index_LSW = 1'b1;
-                montprod_ctrl_we     = 1'b1;
-                montprod_ctrl_new    = CTRL_L_STALLPIPE_SM;
-              end
-          end
-
-        CTRL_L_STALLPIPE_SM:
-          begin
+            reset_word_index_lsw = 1'b1;
+            bq_we                = 1'b1;
             montprod_ctrl_new    = CTRL_L_CALC_SA;
             montprod_ctrl_we     = 1'b1;
-            reset_word_index_LSW = 1'b1;
           end
 
         CTRL_L_CALC_SA:
           begin
             if (word_index_reg == 8'h0)
               begin
-                reset_word_index_LSW = 1'b1;
+                reset_word_index_lsw = 1'b1;
                 montprod_ctrl_new    = CTRL_L_STALLPIPE_SA;
                 montprod_ctrl_we     = 1'b1;
               end
@@ -546,7 +529,7 @@ module montprod(
           begin
             montprod_ctrl_new    = CTRL_L_CALC_SDIV2;
             montprod_ctrl_we     = 1'b1;
-            reset_word_index_MSW = 1'b1;
+            reset_word_index_msw = 1'b1;
           end
 
         CTRL_L_CALC_SDIV2:
@@ -560,10 +543,10 @@ module montprod(
 
         CTRL_L_STALLPIPE_D2:
           begin
-            loop_ctr_dec     = 1'b1;
+            loop_ctr_dec         = 1'b1;
             montprod_ctrl_new    = CTRL_LOOP_ITER;
             montprod_ctrl_we     = 1'b1;
-            reset_word_index_LSW = 1'b1;
+            reset_word_index_lsw = 1'b1;
 
             if (loop_ctr_reg == 0)
               begin
diff --git a/src/tb/tb_montprod.v b/src/tb/tb_montprod.v
index bf9333e..850f8d9 100644
--- a/src/tb/tb_montprod.v
+++ b/src/tb/tb_montprod.v
@@ -226,7 +226,7 @@ module tb_montprod();
       if (SHOW_BQ_DEBUG)
         begin
           if (dut.montprod_ctrl_reg == dut.CTRL_L_CALC_SM)
-            $display("====================> B: %x Q: %x B_bit_index_reg: %x <=====================", dut.b_reg, dut.q_reg, dut.B_bit_index_reg);
+            $display("====================> B: %x Q: %x b_bit_index_reg: %x <=====================", dut.b_reg, dut.q_reg, dut.b_bit_index_reg);
         end
     end
 
@@ -247,8 +247,6 @@ module tb_montprod();
               $display("FSM: LOOP_ITER");
             dut.CTRL_LOOP_BQ:
               $display("FSM: LOOP_BQ");
-            dut.CTRL_L_CALC_SM:
-              $display("FSM: LOOP_CALC_SM");
             dut.CTRL_L_CALC_SA:
               $display("FSM: LOOP_CALC_SA");
             dut.CTRL_L_STALLPIPE_SA:



More information about the Commits mailing list