[Cryptech-Commits] [core/pkey/ed25519] 01/04: Finished porting modular multiplier.

git at cryptech.is git at cryptech.is
Mon Oct 15 13:18:08 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository core/pkey/ed25519.

commit 469110c8636aff88bde5a94f7a1596b88126547b
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 15 15:47:47 2018 +0300

    Finished porting modular multiplier.
---
 bench/tb_modular_multiplier.v                      |  55 +--
 .../ed25519_modular_multiplier.v                   | 387 ++++++++++++++++-----
 2 files changed, 331 insertions(+), 111 deletions(-)

diff --git a/bench/tb_modular_multiplier.v b/bench/tb_modular_multiplier.v
index c3de537..46e8790 100644
--- a/bench/tb_modular_multiplier.v
+++ b/bench/tb_modular_multiplier.v
@@ -48,6 +48,9 @@ module tb_modular_multiplier;
         //
     localparam A1 = 256'h216936d3_cd6e53fe_c0a4e231_fdd6dc5c_692cc760_9525a7b2_c9562d60_8f25d51a;   // GX
     localparam B1 = 256'h66666666_66666666_66666666_66666666_66666666_66666666_66666666_66666658;   // GY
+    localparam C1 = 256'h67875f0f_d78b7665_66ea4e8e_64abe37d_20f09f80_775152f5_6dde8ab3_a5b7dda3;   // GT
+
+    localparam F  = 256'hFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF;   // FFF...F
 
 
         //
@@ -163,7 +166,9 @@ module tb_modular_multiplier;
         
             /* run tests */
         $display("1. A1 * A1 = ...");
-        test_modular_multiplier(A1, A1);
+        test_modular_multiplier(A1   , B1);
+        test_modular_multiplier(A1+B1, C1+C1);
+        test_modular_multiplier(F,     F);
         
             /* print result */
         if (ok) $display("tb_modular_multiplier: SUCCESS");
@@ -188,11 +193,16 @@ module tb_modular_multiplier;
         reg     [255:0] b_shreg;
         reg     [255:0] p_shreg;
         reg             p_ok;
-        
+        reg     [511:0] ab;
+        reg     [255:0] p_ref;
         integer         w;
 
         begin
         
+                /* calculate reference value */
+            ab = {{256{1'b0}}, a} * {{256{1'b0}}, b};
+            p_ref = ab % {{31{8'hFF}}, 8'hDA};
+        
                 /* initialize result */
             p_ok = 0;
 
@@ -241,28 +251,29 @@ module tb_modular_multiplier;
                 /* wait for operation to complete */
             while (!rdy) #`CLOCK_PERIOD;
             
-//                /* read result */
-//            for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
-//                
-//                    /* set address */
-//                tb_qxy_addr = w[WORD_COUNTER_WIDTH-1:0];
-//                
-//                    /* wait for 1 clock tick */
-//                #10;
-//                
-//                    /* store data word */
-//                qx_shreg = {tb_qx_data, qx_shreg[255:32]};
-//                qy_shreg = {tb_qy_data, qy_shreg[255:32]};
-//
-//            end
-//            
-//                /* compare */
-//            q_ok =  (qx_shreg == qx) &&
-//                        (qy_shreg == qy);
-//
+                /* read result */
+            for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+                
+                    /* set address */
+                tb_p_addr = w[WORD_COUNTER_WIDTH-1:0];
+                
+                    /* wait for 1 clock tick */
+                #`CLOCK_PERIOD;
+                
+                    /* store data word */
+                p_shreg = {tb_p_data, p_shreg[255:32]};
+
+            end
+            
+                /* compare */
+            p_ok = (p_shreg === p_ref);
+
                 /* display results */
             if (p_ok) $display("test_modular_multiplier(): CORRECT RESULT");
-            else      $display("test_modular_multiplier(): WRONG RESULT");
+            else begin
+                $display("test_modular_multiplier(): WRONG RESULT");
+                $display("XOR: %x", p_shreg ^ p_ref);
+            end
             
                 /* update global flag */
             ok = ok & p_ok;
diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v
index 56c8537..012091a 100644
--- a/rtl/modular_multiplier/ed25519_modular_multiplier.v
+++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v
@@ -120,11 +120,74 @@ module ed25519_modular_multiplier
     //
     // FSM
     //
-    localparam FSM_SHREG_WIDTH =    1 * OPERAND_NUM_WORDS + 1 +
-                                    2 * OPERAND_NUM_WORDS + 1 +
-                                    2 * OPERAND_NUM_WORDS + 2 +
-                                    0 * OPERAND_NUM_WORDS + 2 +
-                                    0 * OPERAND_NUM_WORDS + 1;
+
+    
+    localparam integer PHASE_INCREMENT_INDEX_A_OFFSET           =   0;
+    localparam integer PHASE_INCREMENT_INDEX_A_DURATION         =   OPERAND_NUM_WORDS;
+    
+    localparam integer PHASE_DECREMENT_INDEX_B_OFFSET           =   PHASE_INCREMENT_INDEX_A_DURATION;
+    localparam integer PHASE_DECREMENT_INDEX_B_DURATION         =   OPERAND_NUM_WORDS * 2;
+
+    localparam integer PHASE_STORE_MSB_SI_OFFSET                =   PHASE_DECREMENT_INDEX_B_OFFSET + 2;
+    localparam integer PHASE_STORE_MSB_SI_DURATION              =   OPERAND_NUM_WORDS * 2 - 1;
+
+    localparam integer PHASE_STORE_LSB_SI_OFFSET                =   PHASE_STORE_MSB_SI_OFFSET +
+                                                                    PHASE_STORE_MSB_SI_DURATION;
+    localparam integer PHASE_STORE_LSB_SI_DURATION              =   1;
+    
+    localparam integer PHASE_SHIFT_SI_OFFSET                    =   PHASE_STORE_LSB_SI_OFFSET + 1;
+    localparam integer PHASE_SHIFT_SI_DURATION                  =   OPERAND_NUM_WORDS * 2 - 1;
+
+    localparam integer PHASE_MASK_SUM_CW1_OFFSET                =   PHASE_SHIFT_SI_OFFSET + 1;
+    localparam integer PHASE_MASK_SUM_CW1_DURATION              =   1;    
+    
+    localparam integer PHASE_STORE_LSB_C_OFFSET                 =   PHASE_MASK_SUM_CW1_OFFSET + 1;
+    localparam integer PHASE_STORE_LSB_C_DURATION               =   OPERAND_NUM_WORDS;
+
+    localparam integer PHASE_STORE_MSB_C_OFFSET                 =   PHASE_STORE_LSB_C_OFFSET +
+                                                                    PHASE_STORE_LSB_C_DURATION;
+    localparam integer PHASE_STORE_MSB_C_DURATION               =   OPERAND_NUM_WORDS;
+
+    localparam integer PHASE_MASK_B_R3_OFFSET                   =   PHASE_STORE_MSB_C_OFFSET + 3;
+    localparam integer PHASE_MASK_B_R3_DURATION                 =   1;
+
+    localparam integer PHASE_CALCULATE_CARRY_MSB_S1_OFFSET      =   PHASE_STORE_MSB_C_OFFSET +
+                                                                    PHASE_STORE_MSB_C_DURATION + 4;
+    localparam integer PHASE_CALCULATE_CARRY_MSB_S1_DURATION    =   1;
+    
+    localparam integer PHASE_STORE_LSB_S1_OFFSET                =   PHASE_STORE_MSB_C_OFFSET + 4;
+    localparam integer PHASE_STORE_LSB_S1_DURATION              =   OPERAND_NUM_WORDS;
+    
+    localparam integer PHASE_SHIFT_S1_OFFSET                    =   PHASE_STORE_LSB_S1_OFFSET +
+                                                                    PHASE_STORE_LSB_S1_DURATION + 1;
+    localparam integer PHASE_SHIFT_S1_DURATION                  =   OPERAND_NUM_WORDS;
+
+    localparam integer PHASE_CHANGE_LSB_B_P_OFFSET              =   PHASE_SHIFT_S1_OFFSET;
+    localparam integer PHASE_CHANGE_LSB_B_P_DURATION            =   1;
+
+    localparam integer PHASE_SELECT_S2_OR_PN_OFFSET             =   PHASE_SHIFT_S1_OFFSET +
+                                                                    PHASE_SHIFT_S1_DURATION + 1;
+    localparam integer PHASE_SELECT_S2_OR_PN_DURATION           =   1;
+
+    localparam integer PHASE_UPDATE_P_DOUT_OFFSET               =   PHASE_SHIFT_S1_OFFSET +
+                                                                    PHASE_SHIFT_S1_DURATION + 2;
+    localparam integer PHASE_UPDATE_P_DOUT_DURATION             =   OPERAND_NUM_WORDS;
+
+    
+    
+    localparam integer FSM_SHREG_WIDTH =    PHASE_INCREMENT_INDEX_A_DURATION +
+                                            PHASE_DECREMENT_INDEX_B_DURATION +
+                                            1 +
+                                            PHASE_STORE_LSB_SI_DURATION +
+                                            PHASE_SHIFT_SI_DURATION +
+                                            -1 +
+                                            PHASE_STORE_LSB_S1_DURATION +
+                                            PHASE_CALCULATE_CARRY_MSB_S1_DURATION +
+                                            PHASE_SHIFT_S1_DURATION +
+                                            1 +
+                                            PHASE_SELECT_S2_OR_PN_DURATION +
+                                            PHASE_UPDATE_P_DOUT_DURATION +
+                                            2;
 
     localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1};
 
@@ -132,39 +195,66 @@ module ed25519_modular_multiplier
 
     assign rdy = fsm_shreg[0];
 
-    wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a   = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0)];
-    wire [1*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a  = fsm_shreg[FSM_SHREG_WIDTH-(0*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1)];
-    wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_part_b  = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)];
-    wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_b   = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+0):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+0)];
-    wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_enable_mac_ab = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+1):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)];
-    wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb  = fsm_shreg[FSM_SHREG_WIDTH-(1*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+1)];
-    wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb  = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+2)];
-    wire [2*OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si      = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+3):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+1)];
-    wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum  = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4):FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+4)];
-    wire [2*OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word  = fsm_shreg[FSM_SHREG_WIDTH-(3*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+4)];
-    wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start  = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+5)];
-    wire [0*OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_wait   = fsm_shreg[FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6):FSM_SHREG_WIDTH-(5*OPERAND_NUM_WORDS+6)];
-
-    wire inc_index_a   = |fsm_shreg_inc_index_a;
-    wire store_word_a  = |fsm_shreg_store_word_a;
-    wire store_part_b  = |fsm_shreg_store_part_b;
-    wire dec_index_b   = |fsm_shreg_dec_index_b;
-    wire enable_mac_ab = |fsm_shreg_enable_mac_ab;    
-    wire store_si_msb  = |fsm_shreg_store_si_msb;
-    wire store_si_lsb  = |fsm_shreg_store_si_lsb;
-    wire shift_si      = |fsm_shreg_shift_si;
-    wire mask_cw1_sum  = |fsm_shreg_mask_cw1_sum;
-    wire store_c_word  = |fsm_shreg_store_c_word;
-    wire reduce_start  = |fsm_shreg_reduce_start;
-    wire reduce_wait   = |fsm_shreg_reduce_wait;
+    
+    
+    
+    
+    
+    
+    wire [PHASE_INCREMENT_INDEX_A_DURATION     -1:0] fsm_shreg_increment_index_a      = fsm_shreg[FSM_SHREG_WIDTH - PHASE_INCREMENT_INDEX_A_OFFSET      - 1 -: PHASE_INCREMENT_INDEX_A_DURATION];
+    wire [PHASE_DECREMENT_INDEX_B_DURATION     -1:0] fsm_shreg_decrement_index_b      = fsm_shreg[FSM_SHREG_WIDTH - PHASE_DECREMENT_INDEX_B_OFFSET      - 1 -: PHASE_DECREMENT_INDEX_B_DURATION];
+    wire [PHASE_STORE_MSB_SI_DURATION          -1:0] fsm_shreg_store_msb_si           = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_SI_OFFSET           - 1 -: PHASE_STORE_MSB_SI_DURATION];
+    wire [PHASE_STORE_LSB_SI_DURATION          -1:0] fsm_shreg_store_lsb_si           = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_SI_OFFSET           - 1 -: PHASE_STORE_LSB_SI_DURATION];
+    wire [PHASE_SHIFT_SI_DURATION              -1:0] fsm_shreg_shift_si               = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_SI_OFFSET               - 1 -: PHASE_SHIFT_SI_DURATION];
+    wire [PHASE_MASK_SUM_CW1_DURATION          -1:0] fsm_shreg_mask_sum_cw1           = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_SUM_CW1_OFFSET           - 1 -: PHASE_MASK_SUM_CW1_DURATION];
+    wire [PHASE_STORE_LSB_C_DURATION           -1:0] fsm_shreg_store_lsb_c            = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_C_OFFSET            - 1 -: PHASE_STORE_LSB_C_DURATION];
+    wire [PHASE_STORE_MSB_C_DURATION           -1:0] fsm_shreg_store_msb_c            = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_MSB_C_OFFSET            - 1 -: PHASE_STORE_MSB_C_DURATION];
+    wire [PHASE_MASK_B_R3_DURATION             -1:0] fsm_shreg_mask_b_r3              = fsm_shreg[FSM_SHREG_WIDTH - PHASE_MASK_B_R3_OFFSET              - 1 -: PHASE_MASK_B_R3_DURATION];
+    wire [PHASE_CALCULATE_CARRY_MSB_S1_DURATION-1:0] fsm_shreg_calculate_carry_msb_s1 = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CALCULATE_CARRY_MSB_S1_OFFSET - 1 -: PHASE_CALCULATE_CARRY_MSB_S1_DURATION];
+    wire [PHASE_STORE_LSB_S1_DURATION          -1:0] fsm_shreg_store_lsb_s1           = fsm_shreg[FSM_SHREG_WIDTH - PHASE_STORE_LSB_S1_OFFSET           - 1 -: PHASE_STORE_LSB_S1_DURATION];
+    wire [PHASE_SHIFT_S1_DURATION              -1:0] fsm_shreg_shift_s1               = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SHIFT_S1_OFFSET               - 1 -: PHASE_SHIFT_S1_DURATION];
+    wire [PHASE_CHANGE_LSB_B_P_DURATION        -1:0] fsm_shreg_change_lsb_b_p         = fsm_shreg[FSM_SHREG_WIDTH - PHASE_CHANGE_LSB_B_P_OFFSET         - 1 -: PHASE_CHANGE_LSB_B_P_DURATION];
+    wire [PHASE_SELECT_S2_OR_PN_DURATION       -1:0] fsm_shreg_select_s2_or_pn        = fsm_shreg[FSM_SHREG_WIDTH - PHASE_SELECT_S2_OR_PN_OFFSET        - 1 -: PHASE_SELECT_S2_OR_PN_DURATION];
+    wire [PHASE_UPDATE_P_DOUT_DURATION         -1:0] fsm_shreg_update_p_dout          = fsm_shreg[FSM_SHREG_WIDTH - PHASE_UPDATE_P_DOUT_OFFSET          - 1 -: PHASE_UPDATE_P_DOUT_DURATION];
+
+    wire flag_increment_index_a      = |fsm_shreg_increment_index_a;
+    wire flag_decrement_index_b      = |fsm_shreg_decrement_index_b;
+    wire flag_store_msb_si           = |fsm_shreg_store_msb_si;
+    wire flag_store_lsb_si           = |fsm_shreg_store_lsb_si;
+    wire flag_shift_si               = |fsm_shreg_shift_si;
+    wire flag_mask_sum_cw1           = |fsm_shreg_mask_sum_cw1;
+    wire flag_store_lsb_c            = |fsm_shreg_store_lsb_c;
+    wire flag_store_msb_c            = |fsm_shreg_store_msb_c;
+    wire flag_mask_b_r3              = |fsm_shreg_mask_b_r3;
+    wire flag_calculate_carry_msb_s1 = |fsm_shreg_calculate_carry_msb_s1;
+    wire flag_store_lsb_s1           = |fsm_shreg_store_lsb_s1;
+    wire flag_shift_s1               = |fsm_shreg_shift_s1;
+    wire flag_change_lsb_b_p         = |fsm_shreg_change_lsb_b_p;
+    wire flag_select_s2_or_pn        = |fsm_shreg_select_s2_or_pn;
+    wire flag_update_p_dout          = |fsm_shreg_update_p_dout;
+    
+    reg flag_store_word_a   = 0;
+    reg flag_enable_mac_ab  = 0;
+    reg flag_delay_msb_c    = 0;
+    reg flag_mask_a_s2      = 0;
+    reg flag_mask_b_out_p   = 0;
+    reg flag_store_s2       = 0;
+    reg flag_store_pn       = 0;
+    
+    always @(posedge clk) begin
+        flag_store_word_a   <= flag_increment_index_a;
+        flag_enable_mac_ab  <= flag_decrement_index_b;
+        flag_delay_msb_c    <= flag_store_msb_c;
+        flag_mask_a_s2      <= flag_calculate_carry_msb_s1;
+        flag_mask_b_out_p   <= flag_change_lsb_b_p;
+        flag_store_s2       <= flag_shift_s1;
+        flag_store_pn       <= flag_store_s2;
+    end    
 
 
     //
     // FSM Logic
-    //
-    wire reduce_done;
-    wire fsm_freeze = reduce_wait && !reduce_done;
-    
+    //    
     always @(posedge clk or negedge rst_n)
         //
         if (rst_n == 1'b0)
@@ -173,8 +263,8 @@ module ed25519_modular_multiplier
         //
         else begin
             //
-            if (rdy)                fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
-            else if (!fsm_freeze)   fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+            if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+            else     fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
         end
 
 
@@ -184,7 +274,7 @@ module ed25519_modular_multiplier
     always @(posedge clk)
         //
         if (rdy)              index_a <= WORD_INDEX_ZERO;
-        else if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+        else if (flag_increment_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
 
 
     //
@@ -193,7 +283,7 @@ module ed25519_modular_multiplier
     always @(posedge clk)
         //
         if (rdy)                                index_b <= WORD_INDEX_LAST;
-        else if (dec_index_b && !index_b_ff)    index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+        else if (flag_decrement_index_b && !index_b_ff)    index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
 
         
     //
@@ -201,10 +291,6 @@ module ed25519_modular_multiplier
     //
     reg [255:0] buf_a_wide;
 
-    always @(posedge clk)
-        //
-        if (store_word_a)       buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]};
-        else if (enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
     
 
     //
@@ -220,7 +306,7 @@ module ed25519_modular_multiplier
 
     always @(posedge clk)
         //
-        if (dec_index_b)    index_b_ff <= ~index_b_ff;
+        if (flag_decrement_index_b)    index_b_ff <= ~index_b_ff;
         else                index_b_ff <= 1'b0;
         
 
@@ -231,7 +317,7 @@ module ed25519_modular_multiplier
     
     always @(posedge clk)
         //
-        if (store_part_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0];
+        if (flag_decrement_index_b) buf_b_narrow <= !index_b_ff ? b_din[31:16] : b_din[15:0];
 
 
     //
@@ -241,7 +327,7 @@ module ed25519_modular_multiplier
 
     always @(posedge clk)
         //
-        if (!enable_mac_ab) mac_clear <= {16{1'b1}};
+        if (!flag_enable_mac_ab) mac_clear <= {16{1'b1}};
         else begin
             if (mac_clear[0])       mac_clear <= 16'b0000000000000010;
             else if (mac_clear[15]) mac_clear <= 16'b1111111111111111;
@@ -263,7 +349,7 @@ module ed25519_modular_multiplier
             `ED25519_MAC16_PRIMITIVE mac16_inst
             (
                 .clk    (clk),
-                .ce     (enable_mac_ab),
+                .ce     (flag_enable_mac_ab),
 
                 .clr    (mac_clear[i]),
 
@@ -283,7 +369,6 @@ module ed25519_modular_multiplier
     reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
     reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
 
-
     wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
     wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
     
@@ -301,22 +386,25 @@ module ed25519_modular_multiplier
 
     always @(posedge clk)
         //
-        if (shift_si) begin
+        if (flag_shift_si) begin
             si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
             si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
         end else begin
-            if (store_si_msb)   si_msb <= si_msb_new;
-            if (store_si_lsb)   si_lsb <= si_lsb_new;
+            if (flag_store_msb_si)   si_msb <= si_msb_new;
+            if (flag_store_lsb_si)   si_lsb <= si_lsb_new;
         end
 
 
     //
     // Accumulators
     //
-    wire    [46: 0]    add47_cw0_s;
-    wire    [46: 0]    add47_cw1_s;
-
+    wire [46:0] add47_cw0_s;
+    wire [46:0] add47_cw1_s;
+    wire [14:0] add47_cw1_s_masked = flag_mask_sum_cw1 ? {15{1'b0}} : add47_cw1_s[32+:15];
+    
+    wire [46:0] add47_r3_b_masked = {{32{1'b0}}, flag_mask_b_r3 ? {15{1'b0}} : add47_r3_s[46:32]};
 
+    
     //
     // cw0, cw1
     //
@@ -325,7 +413,7 @@ module ed25519_modular_multiplier
 
     always @(posedge clk)
         //
-        if (shift_si) si_prev_dly <= si_lsb[93:63];
+        if (flag_shift_si) si_prev_dly <= si_lsb[93:63];
         else          si_prev_dly <= {31{1'b0}};
 
     always @(posedge clk)
@@ -336,7 +424,7 @@ module ed25519_modular_multiplier
     wire [46:0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
 
     wire [46:0] add47_cw1_a = add47_cw0_s;
-    wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, mask_cw1_sum ? {15{1'b0}} : add47_cw1_s[32+:15]};
+    wire [46:0] add47_cw1_b = {{15{1'b0}}, si_next_dly, 1'b0, add47_cw1_s_masked};
 
     `ED25519_ADD47_PRIMITIVE add47_cw0_inst
     (
@@ -353,60 +441,181 @@ module ed25519_modular_multiplier
         .b      (add47_cw1_b),
         .s      (add47_cw1_s)
     );
-
+    
     
     //
     // Full-Size Product
     //
-    reg  [WORD_COUNTER_WIDTH:0] bram_c_addr;
+    wire [31:0] c_word_lower = add47_cw1_s[31:0];
+
+     
+    wire [46:0] add47_r0_s;
+    wire [46:0] add47_r1_s;
+    wire [46:0] add47_r2_s;
+    wire [46:0] add47_r3_s;
+    
+    reg [255:0] c_lsb_s1_shreg;
+    reg [ 31:0] c_msb_latch;
+            
     
-    wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
-    wire [                31:0] reduce_c_word;
+        
 
+        
     always @(posedge clk)
         //
-        if (store_c_word)   bram_c_addr <= bram_c_addr + 1'b1;
-        else                bram_c_addr <= {(2*WORD_COUNTER_WIDTH){1'b0}};
-
-    bram_1rw_1ro_readfirst #
+        if (flag_store_msb_c) c_msb_latch <= c_word_lower;
+        else             c_msb_latch <= {32{1'b0}};
+    
+        
+    reg [4:0] c_msb_latch_upper_dly;
+    reg [31:0] c_lsb_shreg_lower_dly;
+    
+    always @(posedge clk)
+        //
+        if (flag_delay_msb_c) c_msb_latch_upper_dly <= c_msb_latch[31:27];
+        else             c_msb_latch_upper_dly <= {5{1'b0}};
+            
+    
+    always @(posedge clk)
+        //
+        if (flag_store_msb_c) c_lsb_shreg_lower_dly <= c_lsb_s1_shreg[31:0];
+        else             c_lsb_shreg_lower_dly <= {32{1'b0}};
+    
+    
+    
+    reg [11:0] carry_msb_s1;
+    
+    always @(posedge clk)
+        //
+        if (flag_calculate_carry_msb_s1) carry_msb_s1 <= {{6{1'b0}}, 6'd38} * {{6{1'b0}}, add47_r3_s[5:0]};
+    
+    
+    wire [46:0] add47_s2_a_masked = {{32{1'b0}}, flag_mask_a_s2 ? {3'b000, carry_msb_s1} : add47_s2_s[46:32]};
+    
+    `ED25519_ADD47_PRIMITIVE add47_r0
     (
-        .MEM_WIDTH(32),
-        .MEM_ADDR_BITS(WORD_COUNTER_WIDTH + 1)
-    )
-    bram_c_inst
+        .clk    (clk),
+        .a      ({{15{1'b0}}, c_msb_latch[30:0], c_msb_latch_upper_dly[4]}),
+        .b      ({{15{1'b0}}, c_msb_latch[29:0], c_msb_latch_upper_dly[4:3]}),
+        .s      (add47_r0_s)
+    );
+    `ED25519_ADD47_PRIMITIVE add47_r1
     (
         .clk    (clk),
+        .a      ({{15{1'b0}}, c_msb_latch[26:0], c_msb_latch_upper_dly[4:0]}),
+        .b      ({{15{1'b0}}, c_lsb_shreg_lower_dly}),
+        .s      (add47_r1_s)
+    );
+    `ED25519_ADD47_PRIMITIVE add47_r2
+    (
+        .clk    (clk),
+        .a      (add47_r0_s),
+        .b      (add47_r1_s),
+        .s      (add47_r2_s)
+    );
+    `ED25519_ADD47_PRIMITIVE add47_r3
+    (
+        .clk    (clk),
+        .a      (add47_r2_s),
+        .b      (add47_r3_b_masked),
+        .s      (add47_r3_s)
+    );
+    
+          
+    
+    wire [46:0] add47_s2_s;
+    `ED25519_ADD47_PRIMITIVE add47_s2
+    (
+        .clk    (clk),
+        .a      (add47_s2_a_masked),
+        .b      ({{15{1'b0}}, c_lsb_s1_shreg[31:0]}),
+        .s      (add47_s2_s)
+    );
+    
+    
+    reg sub32_b_bit;
+    
+    wire [31:0] sub32_b = {{26{1'b1}},  // ...*11*1*
+        sub32_b_bit, {2{1'b1}}, sub32_b_bit, 1'b1, sub32_b_bit};
+    
+    always @(posedge clk)
+        //
+        if (!fsm_shreg_change_lsb_b_p) sub32_b_bit <= 1'b1;
+        else                           sub32_b_bit <= 1'b0;
+    
+    wire [31:0] sub32_pn_d;
+    wire        sub32_b_in;
+    wire        sub32_b_out;
 
-        .a_addr (bram_c_addr),
-        .a_wr   (store_c_word),
-        .a_in   (add47_cw1_s[31:0]),
-        .a_out  (),
-
-        .b_addr (reduce_c_addr),
-        .b_out  (reduce_c_word)
+    assign sub32_b_in = sub32_b_out & !flag_mask_b_out_p;
+    
+    `ED25519_SUB32_PRIMITIVE sub32_pn
+    (
+        .clk    (clk),
+        .a      (add47_s2_s[31:0]),
+        .b      (sub32_b),
+        .d      (sub32_pn_d),
+        .b_in   (sub32_b_in),
+        .b_out  (sub32_b_out)
     );
+    
 
+    wire [31:0] add47_r3_s_lower = add47_r3_s[31:0];
+    
 
-    //
-    // Reduction Stage
-    //
-    ed25519_modular_reductor reductor_inst
-    (
-        .clk        (clk),
-        .rst_n      (rst_n),
+    always @(posedge clk)
+        //
+        if (flag_store_word_a)       buf_a_wide <= {buf_a_wide[16+:256-3*16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256-2*16+:16]};
+        else if (flag_enable_mac_ab) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+        else if (flag_store_s2)      buf_a_wide <= {add47_s2_s[31:0], buf_a_wide[255:32]};
+        else if (flag_update_p_dout)  buf_a_wide <= {{32{1'bX}}, buf_a_wide[255:32]};
 
-        .ena        (reduce_start),
-        .rdy        (reduce_done),
+        
+    always @(posedge clk)
+        //
+        if      (flag_store_lsb_c)                  c_lsb_s1_shreg <= {c_word_lower,     c_lsb_s1_shreg[255:32]};
+        else if (flag_store_lsb_s1)                 c_lsb_s1_shreg <= {add47_r3_s_lower, c_lsb_s1_shreg[255:32]};
+        else if (flag_store_pn)                     c_lsb_s1_shreg <= {sub32_pn_d,       c_lsb_s1_shreg[255:32]};
+        else if (flag_store_msb_c || flag_shift_s1) c_lsb_s1_shreg <= {{32{1'b0}},       c_lsb_s1_shreg[255:32]};
+        else if (flag_update_p_dout)                 c_lsb_s1_shreg <= {{32{1'b0}},       c_lsb_s1_shreg[255:32]};
 
-        .x_addr     (reduce_c_addr),
-        .y_addr     (p_addr),
-        .y_wren     (p_wren),
 
-        .x_din      (reduce_c_word),
-        .y_dout     (p_dout)
-    );
+    reg sel_pn; // 0: output in S2, 1: output in PN
+    
+    always @(posedge clk)
+        //
+        if (flag_select_s2_or_pn) sel_pn <= sub32_b_out & add47_s2_s[0];
+        
+        
+    reg [31:0] p_dout_reg;
+    
+    assign p_dout = p_dout_reg;
+    
+    always @(posedge clk)
+        //
+        if (flag_update_p_dout) p_dout_reg <= sel_pn ? c_lsb_s1_shreg[31:0] : buf_a_wide[31:0];
+        else                    p_dout_reg <= {32{1'bX}};
+        
 
+    reg p_wren_reg = 0;
+    
+    assign p_wren = p_wren_reg;
+    
+    always @(posedge clk)
+        //
+        p_wren_reg <= flag_update_p_dout;
 
+        
+    reg [WORD_COUNTER_WIDTH-1:0] p_addr_reg;
+        
+    assign p_addr = p_addr_reg;
+        
+    always @(posedge clk)
+        //
+        if (p_wren_reg) p_addr_reg <= WORD_INDEX_NEXT_OR_ZERO(p_addr_reg);
+        else            p_addr_reg <= WORD_INDEX_ZERO;
+    
+        
 endmodule
 
 



More information about the Commits mailing list