[Cryptech-Commits] [core/math/modexpa7] branch systolic updated: Systolic multiplier simplified a bit: * passes testbench tests again * this time synthesizes fine (without major issues)
git at cryptech.is
git at cryptech.is
Thu Jul 13 18:47:33 UTC 2017
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.
The following commit(s) were added to refs/heads/systolic by this push:
new 72a67f0 Systolic multiplier simplified a bit: * passes testbench tests again * this time synthesizes fine (without major issues)
72a67f0 is described below
commit 72a67f04a21ba4006c7b5bf38e01a3aa6592740f
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Jul 13 21:38:53 2017 +0300
Systolic multiplier simplified a bit:
* passes testbench tests again
* this time synthesizes fine (without major issues)
List of things that need polishing in the future:
* Parallelized operand loader can be reduced by a factor of 3
to only store one operand at a time: it currently stores
B, N_COEFF and N. After B is consumed, it can be overwritten
with AB, N_COEFF can be loaded sequentially the same way
A is loaded. After that loader can be filled with Q while
N will be loaded sequentially.
* Turns out QN block memory is not needed at all. After we obtain
the next word of QN, we immediately calculate SN. After that QN
can be discarded, no need to store it.
* Currently there are two wide memories T and PE_C_OUT. XST throws
weird warnings about multi-port RAM before finally deciding
to implement it using flip-flop. Those memories should be turned
into FIFOs to simplify the design and not confuse XST.
---
src/rtl/modexpa7_systolic_multiplier.v | 239 ++++++++++++++++++++++++++++++++-
src/tb/tb_systolic_multiplier.v | 1 -
2 files changed, 234 insertions(+), 6 deletions(-)
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 56e7be3..513b5aa 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -118,6 +118,10 @@ module modexpa7_systolic_multiplier #
localparam [ 7: 0] FSM_STATE_MULT_Q_N_RELOAD = 8'h63;
localparam [ 7: 0] FSM_STATE_MULT_Q_N_FINAL = 8'h64;
+ localparam [ 7: 0] FSM_STATE_SAVE_START = 8'h71;
+ localparam [ 7: 0] FSM_STATE_SAVE_WRITE = 8'h72;
+ localparam [ 7: 0] FSM_STATE_SAVE_FINAL = 8'h73;
+
localparam [ 7: 0] FSM_STATE_STOP = 8'hFF;
//
@@ -271,7 +275,7 @@ module modexpa7_systolic_multiplier #
//
// Loader currently stores B, N_COEFF and N, it can be coded another way
- // to initially stire B, then AB, then Q. Some memory can be saved thay way.
+ // to initially store B, then AB, then Q. Some memory can be saved thay way.
// Maybe later...
//
@@ -324,6 +328,9 @@ module modexpa7_systolic_multiplier #
reg [OPERAND_ADDR_WIDTH :0] ab_addr_ext;
reg [OPERAND_ADDR_WIDTH-1:0] q_addr;
reg [OPERAND_ADDR_WIDTH :0] qn_addr_ext;
+ reg [OPERAND_ADDR_WIDTH-1:0] s_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] sn_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] r_addr;
/* handy increment values */
wire [OPERAND_ADDR_WIDTH-1:0] a_addr_next = a_addr + 1'b1;
@@ -333,6 +340,9 @@ module modexpa7_systolic_multiplier #
wire [OPERAND_ADDR_WIDTH :0] ab_addr_ext_next = ab_addr_ext + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] q_addr_next = q_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH :0] qn_addr_ext_next = qn_addr_ext + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] s_addr_next = s_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] sn_addr_next = sn_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1;
/* handy stop flags */
wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
@@ -342,6 +352,9 @@ module modexpa7_systolic_multiplier #
wire ab_addr_ext_done = (ab_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0;
wire q_addr_done = (q_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire qn_addr_ext_done = (qn_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0;
+ wire s_addr_done = (s_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire sn_addr_done = (sn_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
/* delayed B address */
reg [OPERAND_ADDR_WIDTH-1:0] b_addr_dly;
@@ -358,9 +371,16 @@ module modexpa7_systolic_multiplier #
assign b_bram_addr = b_addr;
assign n_coeff_bram_addr = n_coeff_addr;
assign n_bram_addr = n_addr;
+ assign r_bram_addr = r_addr;
//
+ // Flag
+ //
+ reg flag_select_s;
+
+
+ //
// Memory Address Control Logic
//
always @(posedge clk) begin
@@ -375,6 +395,20 @@ module modexpa7_systolic_multiplier #
FSM_STATE_LOAD_N_SHIFT: n_addr <= n_addr_next;
endcase
//
+ case (fsm_state)
+ FSM_STATE_MULT_Q_N_RELOAD:
+ if (qn_addr_ext == {1'b0, bram_addr_last})
+ n_addr <= bram_addr_zero;
+ else if (qn_addr_ext > {1'b0, bram_addr_last})
+ n_addr <= n_addr_next;
+
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START: r_addr <= bram_addr_zero;
+ FSM_STATE_SAVE_WRITE: r_addr <= r_addr_next;
+ endcase
+ //
case (fsm_next_state)
FSM_STATE_MULT_A_B_START: a_addr <= bram_addr_zero;
FSM_STATE_MULT_A_B_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr;
@@ -391,16 +425,28 @@ module modexpa7_systolic_multiplier #
reg [31: 0] ab_data_in;
reg [31: 0] q_data_in;
reg [31: 0] qn_data_in;
+ wire [31: 0] s_data_in;
+ wire [31: 0] sn_data_in;
+ reg [31: 0] r_data_in;
/* memory outputs */
wire [31: 0] ab_data_out;
wire [31: 0] q_data_out;
wire [31: 0] qn_data_out;
+ wire [31: 0] s_data_out;
+ wire [31: 0] sn_data_out;
/* write enables */
reg ab_wren;
reg q_wren;
reg qn_wren;
+ reg s_wren;
+ reg sn_wren;
+ reg r_wren;
+
+ /* map */
+ assign r_bram_in = r_data_in;
+ assign r_bram_wr = r_wren;
bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
bram_ab (.clk(clk), .a_addr(ab_addr_ext), .a_wr(ab_wren), .a_in(ab_data_in), .a_out(ab_data_out));
@@ -411,6 +457,12 @@ module modexpa7_systolic_multiplier #
bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
bram_qn (.clk(clk), .a_addr(qn_addr_ext), .a_wr(qn_wren), .a_in(qn_data_in), .a_out(qn_data_out));
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_s (.clk(clk), .a_addr(s_addr), .a_wr(s_wren), .a_in(s_data_in), .a_out(s_data_out));
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_sn (.clk(clk), .a_addr(sn_addr), .a_wr(sn_wren), .a_in(sn_data_in), .a_out(sn_data_out));
+
//
// Wide Operand Loader
@@ -646,13 +698,46 @@ module modexpa7_systolic_multiplier #
case (fsm_state)
FSM_STATE_MULT_A_B_START: ab_addr_ext <= bram_addr_ext_zero;
FSM_STATE_MULT_AB_N_COEFF_START: q_addr <= bram_addr_zero;
- FSM_STATE_MULT_Q_N_START: qn_addr_ext <= bram_addr_ext_zero;
+ FSM_STATE_MULT_Q_N_START: begin qn_addr_ext <= bram_addr_ext_zero;
+ ab_addr_ext <= bram_addr_ext_zero;
+ end
FSM_STATE_MULT_A_B_RELOAD: ab_addr_ext <= ab_addr_ext_next;
FSM_STATE_MULT_AB_N_COEFF_RELOAD: q_addr <= q_addr_next;
- FSM_STATE_MULT_Q_N_RELOAD: qn_addr_ext <= qn_addr_ext_next;
+ FSM_STATE_MULT_Q_N_RELOAD: begin qn_addr_ext <= qn_addr_ext_next;
+ ab_addr_ext <= ab_addr_ext_next;
+ end
+ endcase
+ //
+ case (fsm_state)
+
+ FSM_STATE_MULT_Q_N_RELOAD: begin
+ if (qn_addr_ext == {1'b0, bram_addr_last}) begin
+ s_addr <= bram_addr_zero;
+ sn_addr <= bram_addr_zero;
+ end
+
+ if ((qn_addr_ext > {1'b0, bram_addr_last}) && (qn_addr_ext < bram_addr_ext_last)) begin
+ s_addr <= s_addr_next;
+ sn_addr <= sn_addr_next;
+ end
+
+ if (qn_addr_ext == bram_addr_ext_last) begin
+ s_addr <= bram_addr_zero;
+ sn_addr <= bram_addr_zero;
+ end
+
+ end
+
+ FSM_STATE_MULT_Q_N_FINAL,
+ FSM_STATE_SAVE_START,
+ FSM_STATE_SAVE_WRITE: begin
+ s_addr <= !s_addr_done ? s_addr_next : s_addr;
+ sn_addr <= !sn_addr_done ? sn_addr_next : sn_addr;
+ end
endcase
+
//
case (fsm_next_state)
FSM_STATE_MULT_AB_N_COEFF_START: ab_addr_ext <= bram_addr_ext_zero;
@@ -692,7 +777,12 @@ module modexpa7_systolic_multiplier #
qn_wren <= 1'b0;
qn_data_in <= 32'hXXXXXXXX;
end
-
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START: r_wren <= 1'b1;
+ FSM_STATE_SAVE_WRITE: r_wren <= ~r_addr_done;
+ default: r_wren <= 1'b0;
+ endcase
//
end
@@ -816,6 +906,140 @@ module modexpa7_systolic_multiplier #
//
end
+
+ //
+ // Adder
+ //
+ /*
+ * This adder is used to calculate S = AB + QN.
+ *
+ */
+ reg add1_ce; // clock enable
+ reg [31: 0] add1_s; // sum output
+ wire add1_c_in; // carry input
+ wire [31: 0] add1_a; // A-input
+ reg [31: 0] add1_b; // B-input
+ reg add1_c_in_mask; // flag to not carry anything into the very first word
+ reg add1_c_out; // carry output
+
+ /* add masking into carry feedback chain */
+ assign add1_c_in = add1_c_out & ~add1_c_in_mask;
+
+ /* mask carry for the very first word of N */
+ //always @(posedge clk) add1_c_in_mask <= (fsm_next_state == FSM_STATE_INIT_2) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ if (add1_ce)
+ //
+ {add1_c_out, add1_s} <= {{1{1'b0}}, add1_a} + {{1{1'b0}}, add1_b} + {{32{1'b0}}, add1_c_in};
+
+ assign add1_a = qn_data_in;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_b <= shreg_done_latency_dly ? ab_data_out : 32'hXXXXXXXX;
+ else
+ add1_b <= 32'hXXXXXXXX;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_c_in_mask <= (shreg_done_latency_dly && (ab_addr_ext == bram_addr_ext_zero)) ? 1'b1 : 1'b0;
+ else
+ add1_c_in_mask <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_ce <= shreg_done_latency_dly;
+ else
+ add1_ce <= 1'b0;
+
+
+ assign s_data_in = add1_s;
+ assign sn_data_in = sub1_d;
+
+ always @(posedge clk) begin
+ //
+ s_wren <= add1_ce;
+ sn_wren <= sub1_ce;
+ end
+
+
+
+ //
+ // Subtractor
+ //
+ /*
+ * This subtractor is used to calculate SN = S - N.
+ *
+ */
+ reg sub1_ce; // clock enable
+ reg [31: 0] sub1_d; // difference output
+ wire sub1_b_in; // borrow input
+ wire [31: 0] sub1_a; // A-input
+ reg [31: 0] sub1_b; // B-input
+ reg sub1_b_in_mask; // flag to not borrow anything from the very first word
+ reg sub1_b_out; // borrow output
+
+ /* add masking into borrow feedback chain */
+ assign sub1_b_in = sub1_b_out & ~sub1_b_in_mask;
+
+ always @(posedge clk)
+ //
+ if (sub1_ce)
+ //
+ {sub1_b_out, sub1_d} <= {{1{1'b0}}, sub1_a} - {{1{1'b0}}, sub1_b} - {{32{1'b0}}, sub1_b_in};
+
+ assign sub1_a = add1_s;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_b <= add1_ce ? n_bram_out : 32'hXXXXXXXX;
+ else
+ sub1_b <= 32'hXXXXXXXX;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_b_in_mask <= (add1_ce && ((qn_addr_ext - 1'b1) == {1'b0, bram_addr_last})) ? 1'b1 : 1'b0;
+ else
+ sub1_b_in_mask <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_ce <= add1_ce && (qn_addr_ext > {1'b0, q_addr});
+ else
+ sub1_ce <= 1'b0;
+
+
+ assign s_data_in = add1_s;
+
+ always @(posedge clk)
+ //
+ s_wren <= add1_ce;
+
+
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_FINAL)
+ flag_select_s <= sub1_b_out & ~add1_c_out;
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START,
+ FSM_STATE_SAVE_WRITE:
+ r_data_in <= flag_select_s ? s_data_out : sn_data_out;
+ endcase
+
+
//
// FSM Process
@@ -878,7 +1102,12 @@ module modexpa7_systolic_multiplier #
else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH;
FSM_STATE_MULT_Q_N_RELOAD: if (qn_addr_ext_done) fsm_next_state = FSM_STATE_MULT_Q_N_FINAL;
else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH;
- FSM_STATE_MULT_Q_N_FINAL: fsm_next_state = FSM_STATE_STOP;
+ FSM_STATE_MULT_Q_N_FINAL: fsm_next_state = FSM_STATE_SAVE_START;
+ //
+ FSM_STATE_SAVE_START: fsm_next_state = FSM_STATE_SAVE_WRITE;
+ FSM_STATE_SAVE_WRITE: if (r_addr_done) fsm_next_state = FSM_STATE_SAVE_FINAL;
+ else fsm_next_state = FSM_STATE_SAVE_WRITE;
+ FSM_STATE_SAVE_FINAL: fsm_next_state = FSM_STATE_STOP;
//
FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
diff --git a/src/tb/tb_systolic_multiplier.v b/src/tb/tb_systolic_multiplier.v
index 9df492e..33d1e01 100644
--- a/src/tb/tb_systolic_multiplier.v
+++ b/src/tb/tb_systolic_multiplier.v
@@ -273,7 +273,6 @@ module tb_systolic_multiplier;
b = ab_modulo; // prepare for next round
- #1000000;
end
// final step, display results
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the Commits
mailing list