[Cryptech-Commits] [core/math/modexpng] 36/92: Added "MERGE_LH" micro-operation. To be able to do Garner's formula we need regular (not modular) multiplication. We're doing this by telling the modular multiplier to stop after the "square" step, which computes A*B. The problem is that the multiplier stores the lower part of the product in the internal bank L and the upper part in the internal bank H, but we need to be able to do operations on the product as a whole. MERGE_LH that combines the two halves of the product into one bank.
git at cryptech.is
git at cryptech.is
Sat Mar 14 18:19:15 UTC 2020
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.
commit 3213b3ef3c1d40dfa416b6be409cfa3d15af0930
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 21 12:46:22 2019 +0300
Added "MERGE_LH" micro-operation. To be able to do Garner's formula we need
regular (not modular) multiplication. We're doing this by telling the modular
multiplier to stop after the "square" step, which computes A*B. The problem is
that the multiplier stores the lower part of the product in the internal bank L
and the upper part in the internal bank H, but we need to be able to do
operations on the product as a whole. MERGE_LH that combines the two halves of
the product into one bank.
---
rtl/modexpng_core_top.v | 41 ++++++++++++++++++++++++++++++--
rtl/modexpng_general_worker.v | 55 ++++++++++++++++++++++++++++++++++++++++---
rtl/modexpng_microcode.vh | 4 ++++
rtl/modexpng_mmm_dual.v | 13 ++++++----
rtl/modexpng_uop_rom.v | 11 +++++++++
5 files changed, 115 insertions(+), 9 deletions(-)
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index dea7f0a..4c1f065 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -83,13 +83,15 @@ module modexpng_core_top
wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) ||
(uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ;
wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) ||
- (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ;
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ||
+ (uop_data_opcode == UOP_OPCODE_REGULAR_MULTIPLY ) ;
wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
(uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
(uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
(uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
(uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
- (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ;
+ (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ||
+ (uop_data_opcode == UOP_OPCODE_MERGE_LH ) ;
wire uop_loop_now;
@@ -716,6 +718,9 @@ module modexpng_core_top
reg mmm_only_reduce_x;
reg mmm_only_reduce_y;
+ reg mmm_just_multiply_x;
+ reg mmm_just_multiply_y;
+
wire rdct_ena_x;
wire rdct_ena_y;
wire rdct_rdy_x;
@@ -734,6 +739,7 @@ module modexpng_core_top
.word_index_last_minus1 (mmm_word_index_last_minus1_x),
.force_unity_b (mmm_force_unity_b_x),
.only_reduce (mmm_only_reduce_x),
+ .just_multiply (mmm_just_multiply_x),
.sel_wide_in (mmm_sel_wide_in_x),
.sel_narrow_in (mmm_sel_narrow_in_x),
@@ -790,6 +796,7 @@ module modexpng_core_top
.word_index_last_minus1 (mmm_word_index_last_minus1_y),
.force_unity_b (mmm_force_unity_b_y),
.only_reduce (mmm_only_reduce_y),
+ .just_multiply (mmm_just_multiply_y),
.sel_wide_in (mmm_sel_wide_in_y),
.sel_narrow_in (mmm_sel_narrow_in_y),
@@ -1088,6 +1095,7 @@ module modexpng_core_top
UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q};
endcase
//
+ {mmm_just_multiply_x, mmm_just_multiply_y } <= {2{1'b0}};
{mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}};
{mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
{mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }};
@@ -1110,6 +1118,20 @@ module modexpng_core_top
//
end
//
+ UOP_OPCODE_REGULAR_MULTIPLY: begin
+ //
+ {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'b1}};
+ //
+ {mmm_just_multiply_x, mmm_just_multiply_y } <= {2{1'b1}};
+ {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}};
+ {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
+ {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }};
+ {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }};
+ {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }};
+ {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out }};
+ //
+ end
+ //
UOP_OPCODE_PROPAGATE_CARRIES: begin
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
@@ -1121,6 +1143,10 @@ module modexpng_core_top
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_MERGE_LH: begin
+ wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+ end
+ //
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: begin
@@ -1181,10 +1207,21 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
end
//
+ UOP_OPCODE_REGULAR_MULTIPLY: begin
+ {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }};
+ {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}};
+ {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
+ end
+ //
UOP_OPCODE_MODULAR_SUBTRACT: begin
wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
end
//
+ UOP_OPCODE_MERGE_LH: begin
+ wrk_word_index_last <= word_index_last_n;
+ wrk_word_index_last_half <= word_index_last_pq;
+ end
+ //
UOP_OPCODE_LADDER_INIT: begin
io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 74c939b..d82a120 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -334,6 +334,10 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_MERGE_LH:
+ //
+ enable_wide_xy_rd_en;
+ //
endcase
//
endcase
@@ -424,7 +428,8 @@ module modexpng_general_worker
//
case (opcode)
//
- UOP_OPCODE_PROPAGATE_CARRIES:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_MERGE_LH:
//
enable_narrow_xy_wr_en;
//
@@ -738,6 +743,13 @@ module modexpng_general_worker
wrk_rd_narrow_x_din_y,
wrk_rd_narrow_y_din_y);
//
+ UOP_OPCODE_MERGE_LH:
+ //
+ update_narrow_dout(wrk_rd_wide_x_din_x,
+ wrk_rd_wide_y_din_x,
+ wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y);
+ //
endcase
//
endcase
@@ -819,6 +831,8 @@ module modexpng_general_worker
reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
+ reg rd_wide_xy_addr_xy_next_last_seen;
+
wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
@@ -874,6 +888,22 @@ module modexpng_general_worker
rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
endtask
+ always @(posedge clk)
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1:
+ //
+ rd_wide_xy_addr_xy_next_last_seen <= 1'b0;
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
+ //
+ if (!rd_wide_xy_addr_xy_next_last_seen)
+ rd_wide_xy_addr_xy_next_last_seen <= rd_wide_xy_addr_xy_next_is_last;
+ //
+ endcase
+
always @(posedge clk) begin
//
update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
@@ -897,6 +927,11 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_MERGE_LH: begin
+ update_rd_wide_bank_addr (BANK_WIDE_L, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end
+ //
endcase
//
WRK_FSM_STATE_LATENCY_PRE2,
@@ -920,6 +955,15 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_MERGE_LH: begin
+ //
+ if (!rd_wide_xy_addr_xy_next_last_seen) update_rd_wide_bank_addr (BANK_WIDE_L, rd_wide_xy_addr_xy_next );
+ else update_rd_wide_bank_addr (BANK_WIDE_H, rd_wide_xy_addr_xy_next );
+ advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ //
+ end
+ //
endcase
//
endcase
@@ -1060,6 +1104,9 @@ module modexpng_general_worker
UOP_OPCODE_MODULAR_REDUCE_INIT:
update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);
//
+ UOP_OPCODE_MERGE_LH:
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
+ //
endcase
//
endcase
@@ -1121,7 +1168,8 @@ module modexpng_general_worker
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_MODULAR_REDUCE_INIT,
+ UOP_OPCODE_MERGE_LH: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
UOP_OPCODE_COPY_LADDERS_X2Y,
UOP_OPCODE_CROSS_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass;
@@ -1148,7 +1196,8 @@ module modexpng_general_worker
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT:
+ UOP_OPCODE_MODULAR_REDUCE_INIT,
+ UOP_OPCODE_MERGE_LH:
//
case (wrk_fsm_state)
WRK_FSM_STATE_BUSY:
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 3493e26..47cdeb2 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -78,6 +78,10 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12;
* source and destination WIDE are don't care
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MERGE_LH = 5'd13;
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_REGULAR_MULTIPLY = 5'd14;
+
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16;
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17;
/* CRT is don't care
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 6e52a97..13a8773 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -9,6 +9,7 @@ module modexpng_mmm_dual
word_index_last_minus1,
force_unity_b,
only_reduce,
+ just_multiply,
sel_wide_in, sel_narrow_in,
@@ -74,6 +75,7 @@ module modexpng_mmm_dual
input [7:0] word_index_last_minus1;
input force_unity_b;
input only_reduce;
+ input just_multiply;
input [BANK_ADDR_W-1:0] sel_wide_in;
input [BANK_ADDR_W-1:0] sel_narrow_in;
@@ -127,6 +129,7 @@ module modexpng_mmm_dual
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_square_holdoff;
//
@@ -911,7 +914,9 @@ module modexpng_mmm_dual
assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
- assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+ assign fsm_state_after_square_holdoff = just_multiply ? FSM_STATE_STOP : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
+
always @* begin
//
@@ -928,7 +933,7 @@ module modexpng_mmm_dual
FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
- FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
+ FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? fsm_state_after_square_holdoff : FSM_STATE_MULT_SQUARE_HOLDOFF;
FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
@@ -952,9 +957,9 @@ module modexpng_mmm_dual
FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? FSM_STATE_STOP : FSM_STATE_WAIT_REDUCTOR;
- FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ;
+ FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ;
- default: fsm_state_next = FSM_STATE_IDLE ;
+ default: fsm_state_next = FSM_STATE_IDLE ;
endcase
//
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index adc657a..61501f9 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -78,6 +78,17 @@ module modexpng_uop_rom
//
6'd43: data <= {UOP_OPCODE_MODULAR_SUBTRACT, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; //
//
+ 6'd44: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_E, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd45: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd46: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_E, BANK_DNC }; //
+ //
+ 6'd47: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_DNC, BANK_NARROW_E }; //
+ //
+ 6'd48: data <= {UOP_OPCODE_REGULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_C, BANK_DNC, BANK_DNC }; //
+ //
+ 6'd49: data <= {UOP_OPCODE_MERGE_LH, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_NARROW_A }; //
+ //
default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase
More information about the Commits
mailing list