[Cryptech-Commits] [core/math/modexpa7] 01/03: CRT mode seems to work. Finally.

git at cryptech.is git at cryptech.is
Fri Aug 11 00:09:27 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic_crt
in repository core/math/modexpa7.

commit d6092c84f08118c093142b7d8c6f9a332774400f
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Fri Aug 11 02:37:40 2017 +0300

    CRT mode seems to work. Finally.
    
    Strangely enough non-CRT mode continues to work fine(!). One does not simply
    add a feature without breaking something else. Very suspicious...
---
 src/rtl/modexpa7_exponentiator.v             | 183 ++++++++++++++++++---------
 src/rtl/modexpa7_systolic_multiplier_array.v |   5 +-
 src/rtl/modexpa7_top.v                       |   6 +-
 src/rtl/modexpa7_wrapper.v                   |   2 +
 src/tb/tb_exponentiator.v                    | 136 +++++++++++++++++++-
 5 files changed, 261 insertions(+), 71 deletions(-)

diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index 93c8047..c50a881 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -456,109 +456,134 @@ module modexpa7_exponentiator #
 		// d_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_CALC_1:	d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+			FSM_STATE_EXP_CALC_1:		d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
 		endcase
 		//
 		// f_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_MUL_INIT_1: 	f_addr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_1: 		f_addr <= bram_addr_zero;
 			FSM_STATE_MUL_INIT_2,
 			FSM_STATE_MUL_INIT_3,
-			FSM_STATE_MUL_INIT_4:	f_addr <= !f_addr_done ? f_addr_next : f_addr;
+			FSM_STATE_MUL_INIT_4:		f_addr <= !f_addr_done ? f_addr_next : f_addr;
+			//
+			FSM_STATE_CRT_INIT_B_1: 	f_addr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_B_2,
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4:		f_addr <= !f_addr_done ? f_addr_next : f_addr;
+			//
 		endcase
 		//
 		// r_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_SAVE_3:	r_addr <= bram_addr_zero;
-			FSM_STATE_EXP_SAVE_4:	r_addr <= r_addr_next;
+			FSM_STATE_EXP_SAVE_3:		r_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_4:		r_addr <= r_addr_next;
 		endcase
 		//
 		// p_addr_wr
 		//
 		case (fsm_next_state)
 			//
-			FSM_STATE_MUL_INIT_3:	p_addr_wr <= bram_addr_zero;
-			FSM_STATE_MUL_INIT_4:	p_addr_wr <= p_addr_wr_next;
+			FSM_STATE_MUL_INIT_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_4:		p_addr_wr <= p_addr_wr_next;
+			//
+			FSM_STATE_CRT_INIT_B_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_B_4:		p_addr_wr <= p_addr_wr_next;
 			//
-			FSM_STATE_EXP_INIT_3:	p_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:	p_addr_wr <= p_addr_wr_next;
+			FSM_STATE_EXP_INIT_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		p_addr_wr <= p_addr_wr_next;
 			//
-			FSM_STATE_EXP_FILL_3:	p_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:	p_addr_wr <= p_addr_wr_next;
+			FSM_STATE_EXP_FILL_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		p_addr_wr <= p_addr_wr_next;
 		endcase
 		//
 		// t0_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_LOAD_3:	t0_addr <= bram_addr_zero;
-			FSM_STATE_EXP_LOAD_4:	t0_addr <= t0_addr_next;
+			FSM_STATE_EXP_LOAD_3:		t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_4:		t0_addr <= t0_addr_next;
 			//
-			FSM_STATE_EXP_FILL_1:	t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_1:		t0_addr <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
+			FSM_STATE_EXP_FILL_4:		t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
 		endcase		
 		//
 		// t1_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:	t1_addr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:	t1_addr <= t1_addr_next;
+			FSM_STATE_EXP_INIT_3:		t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		t1_addr <= t1_addr_next;
 			//
-			FSM_STATE_EXP_LOAD_1:	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_1:		t1_addr <= bram_addr_zero;
 			FSM_STATE_EXP_LOAD_2,
 			FSM_STATE_EXP_LOAD_3,
-			FSM_STATE_EXP_LOAD_4:	t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			FSM_STATE_EXP_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
 			//
-			FSM_STATE_EXP_FILL_3:	t1_addr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:	t1_addr <= t1_addr_next;
+			FSM_STATE_EXP_FILL_3:		t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		t1_addr <= t1_addr_next;
 			//
-			FSM_STATE_EXP_SAVE_1:	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_1:		t1_addr <= bram_addr_zero;
 			FSM_STATE_EXP_SAVE_2,
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:	t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			FSM_STATE_EXP_SAVE_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
 		endcase
 		//
 		// t2_addr_wr
 		//
 		case (fsm_next_state)
 			//
-			FSM_STATE_MUL_INIT_3:	t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_MUL_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
+			FSM_STATE_MUL_INIT_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
 			//
-			FSM_STATE_CRT_INIT_A_3:	t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_CRT_INIT_A_4:	t2_addr_wr <= t2_addr_wr_next;
+			FSM_STATE_CRT_INIT_A_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_A_4:		t2_addr_wr <= t2_addr_wr_next;
 			//
-			FSM_STATE_EXP_INIT_3:	t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
+			FSM_STATE_CRT_INIT_B_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_B_4:		t2_addr_wr <= t2_addr_wr_next;
 			//
-			FSM_STATE_EXP_FILL_3:	t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:	t2_addr_wr <= t2_addr_wr_next;
+			FSM_STATE_CRT_INIT_C_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_C_4:		t2_addr_wr <= t2_addr_wr_next;
+			//
+			FSM_STATE_EXP_INIT_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
+			//
+			FSM_STATE_EXP_FILL_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		t2_addr_wr <= t2_addr_wr_next;
 		endcase		
 		//
 		// pp_addr_rd
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_FILL_1:	pp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_1:		pp_addr_rd <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+			FSM_STATE_EXP_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
 		endcase
 		//
 		// tp_addr_rd
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_1: 		tp_addr_rd <= bram_addr_zero;
 			FSM_STATE_EXP_INIT_2,
 			FSM_STATE_EXP_INIT_3,
-			FSM_STATE_EXP_INIT_4:	tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			FSM_STATE_EXP_INIT_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			//
+			FSM_STATE_CRT_INIT_B_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_B_2,
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
 			//
-			FSM_STATE_EXP_FILL_1:	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_C_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_C_2,
+			FSM_STATE_CRT_INIT_C_3,
+			FSM_STATE_CRT_INIT_C_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			//
+			FSM_STATE_EXP_FILL_1:		tp_addr_rd <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			FSM_STATE_EXP_FILL_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
 		endcase
 		//
 	end
@@ -573,8 +598,8 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:	r_wren <= 1'b1;
-			default:						r_wren <= 1'b0;
+			FSM_STATE_EXP_SAVE_4:		r_wren <= 1'b1;
+			default:							r_wren <= 1'b0;
 		endcase
 		//
 		// p_wren
@@ -582,19 +607,21 @@ module modexpa7_exponentiator #
 		case (fsm_next_state)
 			FSM_STATE_MUL_INIT_3,
 			FSM_STATE_MUL_INIT_4,
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4,
 			FSM_STATE_EXP_INIT_3,
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	p_wren <= 1'b1;
-			default:						p_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:		p_wren <= 1'b1;
+			default:							p_wren <= 1'b0;
 		endcase
 		//
 		// t0_wren
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_LOAD_3,		
-			FSM_STATE_EXP_LOAD_4:	t0_wren <= 1'b1;
-			default:						t0_wren <= 1'b0;
+			FSM_STATE_EXP_LOAD_4:		t0_wren <= 1'b1;
+			default:							t0_wren <= 1'b0;
 		endcase
 		//
 		// t1_wren
@@ -603,8 +630,8 @@ module modexpa7_exponentiator #
 			FSM_STATE_EXP_INIT_3,		
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	t1_wren <= 1'b1;
-			default:						t1_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:		t1_wren <= 1'b1;
+			default:							t1_wren <= 1'b0;
 		endcase
 		//
 		// t2_wren
@@ -614,11 +641,15 @@ module modexpa7_exponentiator #
 			FSM_STATE_MUL_INIT_4,
 			FSM_STATE_CRT_INIT_A_3,
 			FSM_STATE_CRT_INIT_A_4,
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4,
+			FSM_STATE_CRT_INIT_C_3,
+			FSM_STATE_CRT_INIT_C_4,
 			FSM_STATE_EXP_INIT_3,		
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	t2_wren <= 1'b1;
-			default:						t2_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:		t2_wren <= 1'b1;
+			default:							t2_wren <= 1'b0;
 		endcase
 		//
 	end
@@ -633,8 +664,8 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:	r_data_in	<= t1_data_out;
-			default:						r_data_in	<= 32'dX;
+			FSM_STATE_EXP_SAVE_4:		r_data_in	<= t1_data_out;
+			default:							r_data_in	<= 32'dX;
 		endcase		
 		//
 		// p_data_in
@@ -642,34 +673,37 @@ module modexpa7_exponentiator #
 		case (fsm_next_state)
 			//
 			FSM_STATE_MUL_INIT_3,
-			FSM_STATE_MUL_INIT_4:	p_data_in	<= f_bram_out;
+			FSM_STATE_MUL_INIT_4:		p_data_in	<= f_bram_out;
+			//
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4:		p_data_in	<= f_bram_out;
 			//
 			FSM_STATE_EXP_INIT_3,
-			FSM_STATE_EXP_INIT_4:	p_data_in	<= tp_data_out;
+			FSM_STATE_EXP_INIT_4:		p_data_in	<= tp_data_out;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	p_data_in	<= pp_data_out;
+			FSM_STATE_EXP_FILL_4:		p_data_in	<= pp_data_out;
 			//
-			default:						p_data_in	<= 32'dX;
+			default:							p_data_in	<= 32'dX;
 		endcase
 		//
 		// t0_data_in
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_LOAD_3,
-			FSM_STATE_EXP_LOAD_4:	t0_data_in <= t1_data_out;
-			default:						t0_data_in <= 32'dX;
+			FSM_STATE_EXP_LOAD_4:		t0_data_in <= t1_data_out;
+			default:							t0_data_in <= 32'dX;
 		endcase		
 		//
 		// t1_data_in
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:	t1_data_in <= 32'd1;
-			FSM_STATE_EXP_INIT_4:	t1_data_in <= 32'd0;
+			FSM_STATE_EXP_INIT_3:		t1_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:		t1_data_in <= 32'd0;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
-			default:						t1_data_in <= 32'dX;
+			FSM_STATE_EXP_FILL_4:		t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:							t1_data_in <= 32'dX;
 		endcase		
 		//
 		// t2_data_in
@@ -681,7 +715,12 @@ module modexpa7_exponentiator #
 			//
 			FSM_STATE_CRT_INIT_A_3,
 			FSM_STATE_CRT_INIT_A_4:		t2_data_in <= m_bram_out;
-
+			//
+			FSM_STATE_CRT_INIT_B_3,
+			FSM_STATE_CRT_INIT_B_4:		t2_data_in <= tp_data_out;
+			//
+			FSM_STATE_CRT_INIT_C_3,
+			FSM_STATE_CRT_INIT_C_4:		t2_data_in <= tp_data_out;
 			//
 			FSM_STATE_EXP_INIT_3:		t2_data_in <= 32'd1;
 			FSM_STATE_EXP_INIT_4:		t2_data_in <= 32'd0;
@@ -828,7 +867,29 @@ module modexpa7_exponentiator #
 			FSM_STATE_CRT_CALC_A_1:										fsm_next_state = FSM_STATE_CRT_CALC_A_2;
 			FSM_STATE_CRT_CALC_A_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_A_3;
 											else								fsm_next_state = FSM_STATE_CRT_CALC_A_2;
-			FSM_STATE_CRT_CALC_A_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
+			FSM_STATE_CRT_CALC_A_3:										fsm_next_state = FSM_STATE_CRT_INIT_B_1;
+			//
+			FSM_STATE_CRT_INIT_B_1:										fsm_next_state = FSM_STATE_CRT_INIT_B_2;
+			FSM_STATE_CRT_INIT_B_2:										fsm_next_state = FSM_STATE_CRT_INIT_B_3;
+			FSM_STATE_CRT_INIT_B_3:										fsm_next_state = FSM_STATE_CRT_INIT_B_4;
+			FSM_STATE_CRT_INIT_B_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_CRT_CALC_B_1;
+											else								fsm_next_state = FSM_STATE_CRT_INIT_B_4;
+			//
+			FSM_STATE_CRT_CALC_B_1:										fsm_next_state = FSM_STATE_CRT_CALC_B_2;
+			FSM_STATE_CRT_CALC_B_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_B_3;
+											else								fsm_next_state = FSM_STATE_CRT_CALC_B_2;
+			FSM_STATE_CRT_CALC_B_3:										fsm_next_state = FSM_STATE_CRT_INIT_C_1;
+			//
+			FSM_STATE_CRT_INIT_C_1:										fsm_next_state = FSM_STATE_CRT_INIT_C_2;
+			FSM_STATE_CRT_INIT_C_2:										fsm_next_state = FSM_STATE_CRT_INIT_C_3;
+			FSM_STATE_CRT_INIT_C_3:										fsm_next_state = FSM_STATE_CRT_INIT_C_4;
+			FSM_STATE_CRT_INIT_C_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_CRT_CALC_C_1;
+											else								fsm_next_state = FSM_STATE_CRT_INIT_C_4;
+			//
+			FSM_STATE_CRT_CALC_C_1:										fsm_next_state = FSM_STATE_CRT_CALC_C_2;
+			FSM_STATE_CRT_CALC_C_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_C_3;
+											else								fsm_next_state = FSM_STATE_CRT_CALC_C_2;
+			FSM_STATE_CRT_CALC_C_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
 			//
 			//
 			FSM_STATE_EXP_IDLE:		if (ena_trig)					fsm_next_state = crt ?
diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v
index 3280010..9b2cd94 100644
--- a/src/rtl/modexpa7_systolic_multiplier_array.v
+++ b/src/rtl/modexpa7_systolic_multiplier_array.v
@@ -443,7 +443,7 @@ module modexpa7_systolic_multiplier_array #
 	always @(posedge clk)
 		//
 		if ((fsm_state == FSM_STATE_MULT_CRUNCH) && shreg_done_latency_dly)
-			p_data_in <= pe_p[0];
+			p_data_in <= crt ? pe_a_wide[31:0] : pe_p[0];
 
 		/*
 		 * Block Memory Address Control
@@ -457,7 +457,8 @@ module modexpa7_systolic_multiplier_array #
 		//
 		case (fsm_next_state)
 			FSM_STATE_MULT_START:	a_addr <= bram_addr_zero;
-			FSM_STATE_MULT_RELOAD:	crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr;
+			FSM_STATE_MULT_RELOAD:	if (crt)		a_addr <= !a_addr_done_crt ? a_addr_next : a_addr;
+											else			a_addr <= !a_addr_done     ? a_addr_next : a_addr;
 		endcase
 		//
 	end
diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index 0c4eabe..ad101dd 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -35,7 +35,7 @@
 module modexpa7_top #
 	(
 		parameter OPERAND_ADDR_WIDTH		= 7,
-		parameter SYSTOLIC_ARRAY_POWER	= 4
+		parameter SYSTOLIC_ARRAY_POWER	= 1
 	)
 	(
 		input											clk,
@@ -47,6 +47,8 @@ module modexpa7_top #
 		input											next,
 		output										valid,
 
+		input											crt_mode,
+
 		input		[OPERAND_ADDR_WIDTH-1:0]	modulus_num_words,
 		input		[OPERAND_ADDR_WIDTH+4:0]	exponent_num_bits,
 
@@ -370,6 +372,8 @@ module modexpa7_top #
 		.ena						(exponent_ena),
 		.rdy						(exponent_rdy),
 		
+		.crt						(crt_mode),
+		
 		.m_bram_addr			(core_m_addr),
 		.d_bram_addr			(core_d_addr),
 		.f_bram_addr			(core_f_addr_rd),
diff --git a/src/rtl/modexpa7_wrapper.v b/src/rtl/modexpa7_wrapper.v
index 090ea8d..a4e2319 100644
--- a/src/rtl/modexpa7_wrapper.v
+++ b/src/rtl/modexpa7_wrapper.v
@@ -135,6 +135,8 @@ module modexpa7_wrapper #
 		
 		.next						(reg_control[CONTROL_NEXT_BIT]),
 		.valid					(reg_status[STATUS_VALID_BIT]),
+
+		.crt_mode				(reg_mode[MODE_CRT_BIT]),
 
 		.modulus_num_words	(modulus_num_words_core),
 		.exponent_num_bits	(exponent_num_bits_core),
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
index 440fedc..de801ac 100644
--- a/src/tb/tb_exponentiator.v
+++ b/src/tb/tb_exponentiator.v
@@ -212,12 +212,15 @@ module tb_exponentiator;
 		#100;
 
 			// test "honest" exponentiation
-//		test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
-//		test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
+		test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
+		test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
 
 			// test crt mode
-		test_exponent_192(M_384, DP_192, FACTOR_P_192, P_192, P_COEFF_192, MP_192);
-		//test_exponent_192(M_384, DQ_192, FACTOR_Q_192, Q_192, Q_COEFF_192, MQ_192);
+		test_exponent_192_crt(M_384, DP_192, FACTOR_P_192, P_192, P_COEFF_192, MP_192);
+		test_exponent_192_crt(M_384, DQ_192, FACTOR_Q_192, Q_192, Q_COEFF_192, MQ_192);
+
+		test_exponent_256_crt(M_512, DP_256, FACTOR_P_256, P_256, P_COEFF_256, MP_256);
+		test_exponent_256_crt(M_512, DQ_256, FACTOR_Q_256, Q_256, Q_COEFF_256, MQ_256);
 		
 	end
       
@@ -313,7 +316,7 @@ module tb_exponentiator;
 		//
 	endtask
 
-	task test_exponent_192;
+	task test_exponent_192_crt;
 		//
 		input	[383:0] m;
 		input	[191:0] d;
@@ -355,6 +358,50 @@ module tb_exponentiator;
 			//
 		end
 		//
+	endtask
+	
+	task test_exponent_256_crt;
+		//
+		input	[511:0] m;
+		input	[255:0] d;
+		input [255:0] f;
+		input	[255:0] n;
+		input	[255:0] n_coeff;
+		input	[255:0] s;
+		reg   [255:0] r;
+		//
+		integer i;
+		//
+		begin
+			//						
+			n_num_words = 4'd7;								// set number of words
+			d_num_bits = 9'd255;								// set number of bits
+			//
+			crt = 1;												// enable crt mode
+			//
+			write_memory_256(m, d, f, n, n_coeff);		// fill memory
+			
+			ena = 1;												// start operation
+			#10;													//
+			ena = 0;												// clear flag
+			
+			while (!rdy) #10;									// wait for operation to complete
+			read_memory_256(r);								// get result from memory
+						
+			$display("    calculated: %x", r);			// display result
+			$display("    expected:   %x", s);			//
+							
+				// check calculated value
+			if (r === s) begin
+				$display("        OK");
+				$display("SUCCESS: Test passed.");
+			end else begin
+				$display("        ERROR");
+				$display("FAILURE: Test not passed.");
+			end
+			//
+		end
+		//
 	endtask
 
 	//
@@ -516,6 +563,59 @@ module tb_exponentiator;
 	endtask
 
 
+		//
+		// write_memory_256
+		//
+	task write_memory_256;
+		//
+		input	[511:0] m;
+		input	[255:0] d;
+		input	[255:0] f;
+		input	[255:0] n;
+		input	[255:0] n_coeff;
+		reg	[511:0] m_shreg;
+		reg	[255:0] f_shreg;
+		reg	[255:0] d_shreg;
+		reg	[255:0] n_shreg;
+		reg	[255:0] n_coeff_shreg;
+		//
+		begin
+			//
+			tb_mdfn_wren	= 1;			// start filling memories
+			m_shreg			= m;			// preload shift register
+			d_shreg			= d;			// preload shift register
+			f_shreg			= f;			// preload shift register
+			n_shreg			= n;			// preload shift register
+			n_coeff_shreg	= n_coeff;	// preload shift register
+			//
+			for (w=0; w<NUM_WORDS_512; w=w+1) begin							// write all words
+				tb_mdfn_addr		= w[3:0];											// set address
+				tb_m_data			= m_shreg[31:0];									// set data
+				tb_d_data			= d_shreg[31:0];									// set data
+				tb_f_data			= f_shreg[31:0];									// set data
+				tb_n_data			= n_shreg[31:0];									// set data
+				tb_n_coeff_data	= n_coeff_shreg[31:0];							// set data
+				m_shreg				= {{32{1'bX}}, m_shreg[511:32]};				// update shift register
+				d_shreg				= {{32{1'bX}}, d_shreg[255:32]};				// update shift register
+				f_shreg				= {{32{1'bX}}, f_shreg[255:32]};				// update shift register
+				n_shreg				= {{32{1'bX}}, n_shreg[255:32]};				// update shift register
+				n_coeff_shreg		= {{32{1'bX}}, n_coeff_shreg[255:32]};		// update shift register
+				#10;																			// wait for 1 clock tick
+			end
+			//
+			tb_mdfn_addr		= {4{1'bX}};	// wipe addresses
+			tb_m_data			= {32{1'bX}};	// wipe data
+			tb_d_data			= {32{1'bX}};	// wipe data
+			tb_f_data			= {32{1'bX}};	// wipe data
+			tb_n_data			= {32{1'bX}};	// wipe data
+			tb_n_coeff_data	= {32{1'bX}};	// wipe data
+			tb_mdfn_wren	= 0;				// stop filling memory
+			//
+		end
+		//
+	endtask
+
+
 	//
 	// read_memory_384
 	//
@@ -584,8 +684,30 @@ module tb_exponentiator;
 			//
 		end
 		//
-	endtask
-
+	endtask
+	
+	//
+	// read_memory_256
+	//
+	task read_memory_256;
+		//
+		output	[255:0] r;
+		reg		[255:0] r_shreg;
+		//
+		begin
+			//
+			for (w=0; w<NUM_WORDS_512/2; w=w+1) begin		// read result word-by-word
+				tb_r_addr	= w[3:0];							// set address
+				#10;													// wait for 1 clock tick
+				r_shreg = {tb_r_data, r_shreg[255:32]};	// store data word
+			end
+			//
+			tb_r_addr = {4{1'bX}};								// wipe address
+			r = r_shreg;											// return
+			//
+		end
+		//
+	endtask
 
 endmodule
 



More information about the Commits mailing list