[Cryptech-Commits] [user/shatov/modexpng] 02/13: Added "MERGE_LH" micro-operation. To be able to do Garner's formula we need regular (not modular) multiplication. We're doing this by telling the modular multiplier to stop after the "square" step, which computes A*B. The problem is that the multiplier stores the lower part of the product in the internal bank L and the upper part in the internal bank H, but we need to be able to do operations on the product as a whole. MERGE_LH that combines the two halves of the product into one bank.

git at cryptech.is git at cryptech.is
Wed Oct 23 16:22:01 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit 3213b3ef3c1d40dfa416b6be409cfa3d15af0930
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 21 12:46:22 2019 +0300

    Added "MERGE_LH" micro-operation. To be able to do Garner's formula we need
    regular (not modular) multiplication. We're doing this by telling the modular
    multiplier to stop after the "square" step, which computes A*B. The problem is
    that the multiplier stores the lower part of the product in the internal bank L
    and the upper part in the internal bank H, but we need to be able to do
    operations on the product as a whole. MERGE_LH that combines the two halves of
    the product into one bank.
---
 rtl/modexpng_core_top.v       | 41 ++++++++++++++++++++++++++++++--
 rtl/modexpng_general_worker.v | 55 ++++++++++++++++++++++++++++++++++++++++---
 rtl/modexpng_microcode.vh     |  4 ++++
 rtl/modexpng_mmm_dual.v       | 13 ++++++----
 rtl/modexpng_uop_rom.v        | 11 +++++++++
 5 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index dea7f0a..4c1f065 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -83,13 +83,15 @@ module modexpng_core_top
     wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT         ) ||
                                 (uop_data_opcode == UOP_OPCODE_LADDER_STEP         ) ;
     wire uop_opcode_is_mmm    = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY    ) ||
-                                (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ;
+                                (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ||
+                                (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY    ) ;
     wire uop_opcode_is_wrk    = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES   ) ||
                                 (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X        ) ||
                                 (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
                                 (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y    ) ||
                                 (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y   ) ||
-                                (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT    ) ;
+                                (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT    ) ||
+                                (uop_data_opcode == UOP_OPCODE_MERGE_LH            ) ;
                                 
     wire uop_loop_now;
     
@@ -716,6 +718,9 @@ module modexpng_core_top
     reg                     mmm_only_reduce_x;
     reg                     mmm_only_reduce_y;
     
+    reg                     mmm_just_multiply_x;
+    reg                     mmm_just_multiply_y;
+    
     wire                    rdct_ena_x;
     wire                    rdct_ena_y;
     wire                    rdct_rdy_x;
@@ -734,6 +739,7 @@ module modexpng_core_top
         .word_index_last_minus1     (mmm_word_index_last_minus1_x),
         .force_unity_b              (mmm_force_unity_b_x),
         .only_reduce                (mmm_only_reduce_x),
+        .just_multiply              (mmm_just_multiply_x),
         
         .sel_wide_in                (mmm_sel_wide_in_x),
         .sel_narrow_in              (mmm_sel_narrow_in_x),
@@ -790,6 +796,7 @@ module modexpng_core_top
         .word_index_last_minus1     (mmm_word_index_last_minus1_y),
         .force_unity_b              (mmm_force_unity_b_y),
         .only_reduce                (mmm_only_reduce_y),
+        .just_multiply              (mmm_just_multiply_y),
         
         .sel_wide_in                (mmm_sel_wide_in_y),
         .sel_narrow_in              (mmm_sel_narrow_in_y),
@@ -1088,6 +1095,7 @@ module modexpng_core_top
                         UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q};
                     endcase
                     //
+                    {mmm_just_multiply_x,   mmm_just_multiply_y  } <= {2{1'b0}};
                     {mmm_only_reduce_x,     mmm_only_reduce_y    } <= {2{1'b0}};
                     {mmm_force_unity_b_x,   mmm_force_unity_b_y  } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
                     {mmm_sel_wide_in_x,     mmm_sel_wide_in_y    } <= {2{uop_data_sel_wide_in      }};
@@ -1110,6 +1118,20 @@ module modexpng_core_top
                     //
                 end
                 //
+                UOP_OPCODE_REGULAR_MULTIPLY: begin
+                    //
+                    {mmm_ladder_mode_x,     mmm_ladder_mode_y   }  <= {2{1'b1}};
+                    //
+                    {mmm_just_multiply_x,   mmm_just_multiply_y  } <= {2{1'b1}};
+                    {mmm_only_reduce_x,     mmm_only_reduce_y    } <= {2{1'b0}};
+                    {mmm_force_unity_b_x,   mmm_force_unity_b_y  } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
+                    {mmm_sel_wide_in_x,     mmm_sel_wide_in_y    } <= {2{uop_data_sel_wide_in      }};
+                    {mmm_sel_narrow_in_x,   mmm_sel_narrow_in_y  } <= {2{uop_data_sel_narrow_in    }};
+                    {rdct_sel_wide_out_x,   rdct_sel_wide_out_y  } <= {2{uop_data_sel_wide_out     }}; 
+                    {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out   }};
+                    //
+                end
+                //
                 UOP_OPCODE_PROPAGATE_CARRIES: begin
                     wrk_sel_narrow_in   <= uop_data_sel_narrow_in;
                     wrk_sel_narrow_out  <= uop_data_sel_narrow_out;
@@ -1121,6 +1143,10 @@ module modexpng_core_top
                     wrk_sel_narrow_out <= uop_data_sel_narrow_out;
                 end
                 //
+                UOP_OPCODE_MERGE_LH: begin
+                    wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+                end
+                //
                 UOP_OPCODE_COPY_CRT_Y2X,
                 UOP_OPCODE_COPY_LADDERS_X2Y,
                 UOP_OPCODE_CROSS_LADDERS_X2Y: begin
@@ -1181,10 +1207,21 @@ module modexpng_core_top
                     {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{word_index_last_pq       }};
                 end
                 //
+                UOP_OPCODE_REGULAR_MULTIPLY: begin
+                    {mmm_word_index_last_x,        mmm_word_index_last_y       } <= {2{word_index_last_pq       }};  
+                    {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}};
+                    {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{word_index_last_pq       }};
+                end
+                //
                 UOP_OPCODE_MODULAR_SUBTRACT: begin
                     wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
                 end
                 //
+                UOP_OPCODE_MERGE_LH: begin
+                    wrk_word_index_last      <= word_index_last_n;
+                    wrk_word_index_last_half <= word_index_last_pq;
+                end
+                //
                 UOP_OPCODE_LADDER_INIT: begin
                     io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
                     io_mgr_ladder_steps    <= crt_mode ? bit_index_last_pq : bit_index_last_n;
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 74c939b..d82a120 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -334,6 +334,10 @@ module modexpng_general_worker
                             //
                         end
                         //
+                        UOP_OPCODE_MERGE_LH:
+                            //
+                            enable_wide_xy_rd_en;
+                            //
                     endcase
                 //
             endcase
@@ -424,7 +428,8 @@ module modexpng_general_worker
                     //
                     case (opcode)
                         //
-                        UOP_OPCODE_PROPAGATE_CARRIES:
+                        UOP_OPCODE_PROPAGATE_CARRIES,
+                        UOP_OPCODE_MERGE_LH:
                             //
                             enable_narrow_xy_wr_en;
                             //
@@ -738,6 +743,13 @@ module modexpng_general_worker
                                          wrk_rd_narrow_x_din_y,
                                          wrk_rd_narrow_y_din_y);
                     //
+                    UOP_OPCODE_MERGE_LH:
+                        //
+                        update_narrow_dout(wrk_rd_wide_x_din_x,
+                                           wrk_rd_wide_y_din_x,
+                                           wrk_rd_wide_x_din_y,
+                                           wrk_rd_wide_y_din_y);
+                    //
                 endcase
             //
         endcase
@@ -819,6 +831,8 @@ module modexpng_general_worker
     reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
 
+    reg rd_wide_xy_addr_xy_next_last_seen;
+
     wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
     wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
     
@@ -874,6 +888,22 @@ module modexpng_general_worker
         rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
     endtask 
     
+    always @(posedge clk)
+        //
+        case (wrk_fsm_state_next_one_pass)
+            //
+            WRK_FSM_STATE_LATENCY_PRE1:
+                //
+                rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
+            //
+            WRK_FSM_STATE_LATENCY_PRE2,
+            WRK_FSM_STATE_BUSY:
+                //
+                if (!rd_wide_xy_addr_xy_next_last_seen)
+                    rd_wide_xy_addr_xy_next_last_seen <= rd_wide_xy_addr_xy_next_is_last;
+            //
+        endcase
+
     always @(posedge clk) begin
         //
         update_rd_wide_bank_addr  (BANK_DNC, OP_ADDR_DNC);
@@ -897,6 +927,11 @@ module modexpng_general_worker
                         //
                     end
                     //
+                    UOP_OPCODE_MERGE_LH: begin
+                        update_rd_wide_bank_addr  (BANK_WIDE_L,   OP_ADDR_ZERO); update_rd_wide_addr_next  (OP_ADDR_ONE);
+                        update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+                    end
+                    //
                 endcase
                 //
             WRK_FSM_STATE_LATENCY_PRE2,
@@ -920,6 +955,15 @@ module modexpng_general_worker
                         //
                     end
                     //
+                    UOP_OPCODE_MERGE_LH: begin
+                        //
+                        if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr  (BANK_WIDE_L,   rd_wide_xy_addr_xy_next  ); 
+                        else                                    update_rd_wide_bank_addr  (BANK_WIDE_H,   rd_wide_xy_addr_xy_next  );
+                                                                                                                                      advance_rd_wide_addr_next  ;
+                                                                update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+                        //
+                    end
+                    //
                 endcase
             //
         endcase
@@ -1060,6 +1104,9 @@ module modexpng_general_worker
                     UOP_OPCODE_MODULAR_REDUCE_INIT:
                         update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);                    
                     //
+                    UOP_OPCODE_MERGE_LH:
+                        update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
+                    //
                 endcase
                 //
         endcase
@@ -1121,7 +1168,8 @@ module modexpng_general_worker
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_OUTPUT_FROM_NARROW,
             UOP_OPCODE_COPY_CRT_Y2X,
-            UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+            UOP_OPCODE_MODULAR_REDUCE_INIT,
+            UOP_OPCODE_MERGE_LH:            wrk_fsm_state <= wrk_fsm_state_next_one_pass;
             UOP_OPCODE_COPY_LADDERS_X2Y,
             UOP_OPCODE_CROSS_LADDERS_X2Y:   wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
             UOP_OPCODE_MODULAR_SUBTRACT:    wrk_fsm_state <= wrk_fsm_state_next_two_pass;
@@ -1148,7 +1196,8 @@ module modexpng_general_worker
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_OUTPUT_FROM_NARROW,
             UOP_OPCODE_COPY_CRT_Y2X,
-            UOP_OPCODE_MODULAR_REDUCE_INIT:
+            UOP_OPCODE_MODULAR_REDUCE_INIT,
+            UOP_OPCODE_MERGE_LH:
                 //
                 case (wrk_fsm_state)
                     WRK_FSM_STATE_BUSY:
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 3493e26..47cdeb2 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -78,6 +78,10 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12;
  * source and destination WIDE are don't care
  */
 
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MERGE_LH = 5'd13;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_MULTIPLY = 5'd14;
+
 localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16;
 localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17;
 /* CRT is don't care
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 6e52a97..13a8773 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -9,6 +9,7 @@ module modexpng_mmm_dual
     word_index_last_minus1,
     force_unity_b,
     only_reduce,
+    just_multiply,
     
     sel_wide_in, sel_narrow_in,
     
@@ -74,6 +75,7 @@ module modexpng_mmm_dual
     input [7:0] word_index_last_minus1;
     input       force_unity_b;
     input only_reduce;
+    input just_multiply;
         
     input [BANK_ADDR_W-1:0] sel_wide_in;
     input [BANK_ADDR_W-1:0] sel_narrow_in;
@@ -127,6 +129,7 @@ module modexpng_mmm_dual
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
+    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_square_holdoff;
 
     
     //
@@ -911,7 +914,9 @@ module modexpng_mmm_dual
     assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
     assign fsm_state_after_mult_square    = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
     assign fsm_state_after_mult_triangle  = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
-    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; 
+    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+    assign fsm_state_after_square_holdoff = just_multiply ? FSM_STATE_STOP : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
+     
 
     always @* begin
         //
@@ -928,7 +933,7 @@ module modexpng_mmm_dual
             FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
             FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square    : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
             
-            FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
+            FSM_STATE_MULT_SQUARE_HOLDOFF:    fsm_state_next =                         rcmb_rdy ? fsm_state_after_square_holdoff : FSM_STATE_MULT_SQUARE_HOLDOFF;
 
             FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
             FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
@@ -952,9 +957,9 @@ module modexpng_mmm_dual
             
             FSM_STATE_WAIT_REDUCTOR:             fsm_state_next =                         rdct_rdy ? FSM_STATE_STOP : FSM_STATE_WAIT_REDUCTOR;
             
-            FSM_STATE_STOP:                 fsm_state_next =                         FSM_STATE_IDLE                   ;
+            FSM_STATE_STOP:                     fsm_state_next =                         FSM_STATE_IDLE                   ;
             
-            default:                          fsm_state_next =                         FSM_STATE_IDLE                   ;
+            default:                             fsm_state_next =                         FSM_STATE_IDLE                   ;
 
         endcase
         //
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index adc657a..61501f9 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -78,6 +78,17 @@ module modexpng_uop_rom
                                                                                                                                                                                           //
             6'd43:   data <= {UOP_OPCODE_MODULAR_SUBTRACT,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_WIDE_C,    BANK_NARROW_C    }; //
                                                                                                                                                                                           //
+            6'd44:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_C, BANK_NARROW_E,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd45:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_C, BANK_NARROW_A,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+                                                                                                                                                                                          //            
+            6'd46:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q,        BANK_WIDE_E,    BANK_DNC         }; //
+                                                                                                                                                                                          //            
+            6'd47:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q,        BANK_DNC,       BANK_NARROW_E    }; //
+                                                                                                                                                                                          //
+            6'd48:   data <= {UOP_OPCODE_REGULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_E, BANK_NARROW_C,      BANK_DNC,       BANK_DNC         }; //
+                                                                                                                                                                                          //
+            6'd49:   data <= {UOP_OPCODE_MERGE_LH,            UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_DNC,           BANK_DNC,       BANK_NARROW_A    }; //
+                                                                                                                                                                                          //
             default: data <= {UOP_OPCODE_STOP,                UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //                                            
         endcase
 



More information about the Commits mailing list