[Cryptech-Commits] [core/math/modexpa7] 02/03: Added pre-multiplication step. Added 512-bit testbench.

git at cryptech.is git at cryptech.is
Wed Jul 19 20:54:52 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.

commit 7486edd118f6d69c4817040e53240baf24628dd7
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jul 19 21:09:48 2017 +0300

    Added pre-multiplication step.
    Added 512-bit testbench.
---
 src/rtl/modexpa7_exponentiator.v | 212 ++++++++++++++++++++++++---------------
 src/tb/tb_exponentiator.v        | 151 +++++++++++++++++-----------
 2 files changed, 227 insertions(+), 136 deletions(-)

diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index e34a7ab..cda6882 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -60,6 +60,7 @@ module modexpa7_exponentiator #
 		
 		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	f_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	n1_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	n2_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff1_bram_addr,
@@ -68,6 +69,7 @@ module modexpa7_exponentiator #
 
 		input		[                32-1:0]	m_bram_out,
 		input		[                32-1:0]	d_bram_out,
+		input		[                32-1:0]	f_bram_out,
 		input		[                32-1:0]	n1_bram_out,
 		input		[                32-1:0]	n2_bram_out,
 		input		[                32-1:0]	n_coeff1_bram_out,
@@ -84,7 +86,7 @@ module modexpa7_exponentiator #
 		//
 		// FSM Declaration
 		//
-	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE		= 8'h00;
+	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE	= 8'h00;
 	//
 	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_1	= 8'hA1;
 	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_2	= 8'hA2;
@@ -121,7 +123,7 @@ module modexpa7_exponentiator #
 	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_2	= 8'h22;
 	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_3	= 8'h23;
 	//
-	localparam	[ 7: 0]	FSM_STATE_EXP_STOP		= 8'hFF;
+	localparam	[ 7: 0]	FSM_STATE_EXP_STOP	= 8'hFF;
 
 
 	/*
@@ -209,7 +211,7 @@ module modexpa7_exponentiator #
 		/* save number of words in a and b when new operation starts */
 	always @(posedge clk)
 		//
-		if (fsm_next_state == FSM_STATE_EXP_INIT_1)
+		if ((fsm_state == FSM_STATE_EXP_IDLE) && ena_trig)
 			{m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits};
 			
 
@@ -231,6 +233,7 @@ module modexpa7_exponentiator #
 		/* address registers */
 	reg	[OPERAND_ADDR_WIDTH-1:0]	m_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	d_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	f_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	r_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	t0_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	t1_addr;
@@ -248,6 +251,7 @@ module modexpa7_exponentiator #
 		/* handy increment values */
 	wire	[OPERAND_ADDR_WIDTH-1:0]	m_addr_next			= m_addr + 1'b1;
 	wire	[OPERAND_ADDR_WIDTH-1:0]	d_addr_next			= d_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	f_addr_next			= f_addr + 1'b1;
 	wire	[OPERAND_ADDR_WIDTH-1:0]	r_addr_next			= r_addr + 1'b1;
 	wire	[OPERAND_ADDR_WIDTH-1:0]	t0_addr_next		= t0_addr + 1'b1;
 	wire	[OPERAND_ADDR_WIDTH-1:0]	t1_addr_next		= t1_addr + 1'b1;
@@ -259,6 +263,7 @@ module modexpa7_exponentiator #
 		/* handy stop flags */
 	wire	m_addr_done			= (m_addr     == bram_addr_last) ? 1'b1 : 1'b0;
 	wire	d_addr_done			= (d_addr     == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	f_addr_done			= (f_addr     == bram_addr_last) ? 1'b1 : 1'b0;
 	wire	r_addr_done			= (r_addr     == bram_addr_last) ? 1'b1 : 1'b0;
 	wire	t0_addr_done		= (t0_addr    == bram_addr_last) ? 1'b1 : 1'b0;
 	wire	t1_addr_done		= (t1_addr    == bram_addr_last) ? 1'b1 : 1'b0;
@@ -270,6 +275,7 @@ module modexpa7_exponentiator #
 		/* map registers to top-level ports */
 	assign m_bram_addr = m_addr;
 	assign d_bram_addr = d_addr;
+	assign f_bram_addr = f_addr;
 	assign r_bram_addr = r_addr;
 	
 		//
@@ -386,93 +392,115 @@ module modexpa7_exponentiator #
 		// m_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_1: 	m_addr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_2,
-			FSM_STATE_EXP_INIT_3,
-			FSM_STATE_EXP_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
+			FSM_STATE_MUL_INIT_1: 	m_addr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_2,
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4:	m_addr <= !m_addr_done ? m_addr_next : m_addr;
 		endcase
 		//
 		// d_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_CALC_1:		d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+			FSM_STATE_EXP_CALC_1:	d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+		endcase
+		//
+		// f_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_MUL_INIT_1: 	f_addr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_2,
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4:	f_addr <= !f_addr_done ? f_addr_next : f_addr;
 		endcase
 		//
 		// r_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_SAVE_3:		r_addr <= bram_addr_zero;
-			FSM_STATE_EXP_SAVE_4:		r_addr <= r_addr_next;
+			FSM_STATE_EXP_SAVE_3:	r_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_4:	r_addr <= r_addr_next;
 		endcase
 		//
 		// p_addr_wr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:		p_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:		p_addr_wr <= p_addr_wr_next;
 			//
-			FSM_STATE_EXP_FILL_3:		p_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:		p_addr_wr <= p_addr_wr_next;
+			FSM_STATE_MUL_INIT_3:	p_addr_wr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_4:	p_addr_wr <= p_addr_wr_next;
+			//
+			FSM_STATE_EXP_INIT_3:	p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:	p_addr_wr <= p_addr_wr_next;
+			//
+			FSM_STATE_EXP_FILL_3:	p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:	p_addr_wr <= p_addr_wr_next;
 		endcase
 		//
 		// t0_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_LOAD_3:		t0_addr <= bram_addr_zero;
-			FSM_STATE_EXP_LOAD_4:		t0_addr <= t0_addr_next;
+			FSM_STATE_EXP_LOAD_3:	t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_4:	t0_addr <= t0_addr_next;
 			//
-			FSM_STATE_EXP_FILL_1: 	t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_1:	t0_addr <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
+			FSM_STATE_EXP_FILL_4:	t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
 		endcase		
 		//
 		// t1_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:		t1_addr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:		t1_addr <= t1_addr_next;
+			FSM_STATE_EXP_INIT_3:	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:	t1_addr <= t1_addr_next;
 			//
-			FSM_STATE_EXP_LOAD_1: 	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_1:	t1_addr <= bram_addr_zero;
 			FSM_STATE_EXP_LOAD_2,
 			FSM_STATE_EXP_LOAD_3,
-			FSM_STATE_EXP_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			FSM_STATE_EXP_LOAD_4:	t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
 			//
-			FSM_STATE_EXP_FILL_3:		t1_addr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:		t1_addr <= t1_addr_next;
+			FSM_STATE_EXP_FILL_3:	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:	t1_addr <= t1_addr_next;
 			//
-			FSM_STATE_EXP_SAVE_1: 	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_1:	t1_addr <= bram_addr_zero;
 			FSM_STATE_EXP_SAVE_2,
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			FSM_STATE_EXP_SAVE_4:	t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
 		endcase
 		//
 		// t2_addr_wr
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:		t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
 			//
-			FSM_STATE_EXP_FILL_3:		t2_addr_wr <= bram_addr_zero;
-			FSM_STATE_EXP_FILL_4:		t2_addr_wr <= t2_addr_wr_next;
+			FSM_STATE_MUL_INIT_3:	t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
+
+			FSM_STATE_EXP_INIT_3:	t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
+			//
+			FSM_STATE_EXP_FILL_3:	t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:	t2_addr_wr <= t2_addr_wr_next;
 		endcase		
 		//
 		// pp_addr_rd
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_FILL_1: 	pp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_1:	pp_addr_rd <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+			FSM_STATE_EXP_FILL_4:	pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
 		endcase
 		//
 		// tp_addr_rd
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_FILL_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_2,
+			FSM_STATE_EXP_INIT_3,
+			FSM_STATE_EXP_INIT_4:	tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			//
+			FSM_STATE_EXP_FILL_1:	tp_addr_rd <= bram_addr_zero;
 			FSM_STATE_EXP_FILL_2,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
+			FSM_STATE_EXP_FILL_4:	tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
 		endcase
 		//
 	end
@@ -487,26 +515,28 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:		r_wren <= 1'b1;
-			default:					r_wren <= 1'b0;
+			FSM_STATE_EXP_SAVE_4:	r_wren <= 1'b1;
+			default:						r_wren <= 1'b0;
 		endcase
 		//
 		// p_wren
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3,		
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4,
+			FSM_STATE_EXP_INIT_3,
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		p_wren <= 1'b1;
-			default:					p_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:	p_wren <= 1'b1;
+			default:						p_wren <= 1'b0;
 		endcase
 		//
 		// t0_wren
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_LOAD_3,		
-			FSM_STATE_EXP_LOAD_4:		t0_wren <= 1'b1;
-			default:					t0_wren <= 1'b0;
+			FSM_STATE_EXP_LOAD_4:	t0_wren <= 1'b1;
+			default:						t0_wren <= 1'b0;
 		endcase
 		//
 		// t1_wren
@@ -515,18 +545,20 @@ module modexpa7_exponentiator #
 			FSM_STATE_EXP_INIT_3,		
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		t1_wren <= 1'b1;
-			default:					t1_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:	t1_wren <= 1'b1;
+			default:						t1_wren <= 1'b0;
 		endcase
 		//
 		// t2_wren
 		//
 		case (fsm_next_state)
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4,
 			FSM_STATE_EXP_INIT_3,		
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		t2_wren <= 1'b1;
-			default:					t2_wren <= 1'b0;
+			FSM_STATE_EXP_FILL_4:	t2_wren <= 1'b1;
+			default:						t2_wren <= 1'b0;
 		endcase
 		//
 	end
@@ -541,50 +573,58 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_SAVE_3,
-			FSM_STATE_EXP_SAVE_4:		r_data_in	<= t1_data_out;
-			default:					r_data_in	<= 32'dX;
+			FSM_STATE_EXP_SAVE_4:	r_data_in	<= t1_data_out;
+			default:						r_data_in	<= 32'dX;
 		endcase		
 		//
 		// p_data_in
 		//
 		case (fsm_next_state)
+			//
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4:	p_data_in	<= f_bram_out;
+			//
 			FSM_STATE_EXP_INIT_3,
-			FSM_STATE_EXP_INIT_4:		p_data_in	<= m_bram_out;
+			FSM_STATE_EXP_INIT_4:	p_data_in	<= tp_data_out;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		p_data_in	<= pp_data_out;
+			FSM_STATE_EXP_FILL_4:	p_data_in	<= pp_data_out;
 			//
-			default:					p_data_in	<= 32'dX;
+			default:						p_data_in	<= 32'dX;
 		endcase
 		//
 		// t0_data_in
 		//
 		case (fsm_next_state)
 			FSM_STATE_EXP_LOAD_3,
-			FSM_STATE_EXP_LOAD_4:		t0_data_in <= t1_data_out;
-			default:					t0_data_in <= 32'dX;
+			FSM_STATE_EXP_LOAD_4:	t0_data_in <= t1_data_out;
+			default:						t0_data_in <= 32'dX;
 		endcase		
 		//
 		// t1_data_in
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:		t1_data_in <= 32'd1;
-			FSM_STATE_EXP_INIT_4:		t1_data_in <= 32'd0;
+			FSM_STATE_EXP_INIT_3:	t1_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:	t1_data_in <= 32'd0;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
-			default:					t1_data_in <= 32'dX;
+			FSM_STATE_EXP_FILL_4:	t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:						t1_data_in <= 32'dX;
 		endcase		
 		//
 		// t2_data_in
 		//
 		case (fsm_next_state)
-			FSM_STATE_EXP_INIT_3:		t2_data_in <= 32'd1;
-			FSM_STATE_EXP_INIT_4:		t2_data_in <= 32'd0;
+			//
+			FSM_STATE_MUL_INIT_3,	
+			FSM_STATE_MUL_INIT_4:	t2_data_in <= m_bram_out;
+			//
+			FSM_STATE_EXP_INIT_3:	t2_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:	t2_data_in <= 32'd0;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:		t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
-			default:					t2_data_in <= 32'dX;
+			FSM_STATE_EXP_FILL_4:	t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:						t2_data_in <= 32'dX;
 		endcase		
 		//
 	end
@@ -661,11 +701,12 @@ module modexpa7_exponentiator #
 	
 	always @(posedge clk)
 		//
-		mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0;
-		
-		
-	
-	
+		case (fsm_next_state)
+			FSM_STATE_MUL_CALC_1,
+			FSM_STATE_EXP_CALC_1:	mul_ena <= 1'b1;
+			default:						mul_ena <= 1'b0;
+		endcase
+			
 
 		//
 		// FSM Process
@@ -685,42 +726,53 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_state)
 			//
-			FSM_STATE_EXP_IDLE:		if (ena_trig)			fsm_next_state = FSM_STATE_EXP_INIT_1;
-										else						fsm_next_state = FSM_STATE_EXP_IDLE;
+			FSM_STATE_MUL_INIT_1:									fsm_next_state = FSM_STATE_MUL_INIT_2;
+			FSM_STATE_MUL_INIT_2:									fsm_next_state = FSM_STATE_MUL_INIT_3;
+			FSM_STATE_MUL_INIT_3:									fsm_next_state = FSM_STATE_MUL_INIT_4;
+			FSM_STATE_MUL_INIT_4:	if (t2_addr_wr_done)		fsm_next_state = FSM_STATE_MUL_CALC_1;
+											else							fsm_next_state = FSM_STATE_MUL_INIT_4;
+			//
+			FSM_STATE_MUL_CALC_1:									fsm_next_state = FSM_STATE_MUL_CALC_2;
+			FSM_STATE_MUL_CALC_2:	if (mul_rdy_tp)			fsm_next_state = FSM_STATE_MUL_CALC_3;
+											else							fsm_next_state = FSM_STATE_MUL_CALC_2;
+			FSM_STATE_MUL_CALC_3:									fsm_next_state = FSM_STATE_EXP_INIT_1;
+			//
+			FSM_STATE_EXP_IDLE:		if (ena_trig)				fsm_next_state = FSM_STATE_MUL_INIT_1;
+											else							fsm_next_state = FSM_STATE_EXP_IDLE;
 			//
 			FSM_STATE_EXP_INIT_1:									fsm_next_state = FSM_STATE_EXP_INIT_2;
 			FSM_STATE_EXP_INIT_2:									fsm_next_state = FSM_STATE_EXP_INIT_3;
 			FSM_STATE_EXP_INIT_3:									fsm_next_state = FSM_STATE_EXP_INIT_4;
-			FSM_STATE_EXP_INIT_4:		if (t1_addr_done)		fsm_next_state = FSM_STATE_EXP_LOAD_1;
-										else						fsm_next_state = FSM_STATE_EXP_INIT_4;
+			FSM_STATE_EXP_INIT_4:	if (t1_addr_done)			fsm_next_state = FSM_STATE_EXP_LOAD_1;
+											else							fsm_next_state = FSM_STATE_EXP_INIT_4;
 			//
 			FSM_STATE_EXP_LOAD_1:									fsm_next_state = FSM_STATE_EXP_LOAD_2;
 			FSM_STATE_EXP_LOAD_2:									fsm_next_state = FSM_STATE_EXP_LOAD_3;
 			FSM_STATE_EXP_LOAD_3:									fsm_next_state = FSM_STATE_EXP_LOAD_4;
-			FSM_STATE_EXP_LOAD_4:		if (t0_addr_done)		fsm_next_state = FSM_STATE_EXP_CALC_1;
-										else						fsm_next_state = FSM_STATE_EXP_LOAD_4;
+			FSM_STATE_EXP_LOAD_4:	if (t0_addr_done)			fsm_next_state = FSM_STATE_EXP_CALC_1;
+											else							fsm_next_state = FSM_STATE_EXP_LOAD_4;
 			//
 			FSM_STATE_EXP_CALC_1:									fsm_next_state = FSM_STATE_EXP_CALC_2;
-			FSM_STATE_EXP_CALC_2:		if (mul_rdy_all)		fsm_next_state = FSM_STATE_EXP_CALC_3;
-										else						fsm_next_state = FSM_STATE_EXP_CALC_2;
+			FSM_STATE_EXP_CALC_2:	if (mul_rdy_all)			fsm_next_state = FSM_STATE_EXP_CALC_3;
+											else							fsm_next_state = FSM_STATE_EXP_CALC_2;
 			FSM_STATE_EXP_CALC_3:									fsm_next_state = FSM_STATE_EXP_FILL_1;
 			//
 			FSM_STATE_EXP_FILL_1:									fsm_next_state = FSM_STATE_EXP_FILL_2;
 			FSM_STATE_EXP_FILL_2:									fsm_next_state = FSM_STATE_EXP_FILL_3;
 			FSM_STATE_EXP_FILL_3:									fsm_next_state = FSM_STATE_EXP_FILL_4;
-			FSM_STATE_EXP_FILL_4:		if (p_addr_wr_done)	fsm_next_state = FSM_STATE_EXP_NEXT;
-										else						fsm_next_state = FSM_STATE_EXP_FILL_4;			
+			FSM_STATE_EXP_FILL_4:	if (p_addr_wr_done)		fsm_next_state = FSM_STATE_EXP_NEXT;
+											else							fsm_next_state = FSM_STATE_EXP_FILL_4;			
 			//
-			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)		fsm_next_state = FSM_STATE_EXP_SAVE_1;
-										else						fsm_next_state = FSM_STATE_EXP_LOAD_1;
+			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)			fsm_next_state = FSM_STATE_EXP_SAVE_1;
+											else							fsm_next_state = FSM_STATE_EXP_LOAD_1;
 			//
 			FSM_STATE_EXP_SAVE_1:									fsm_next_state = FSM_STATE_EXP_SAVE_2;
 			FSM_STATE_EXP_SAVE_2:									fsm_next_state = FSM_STATE_EXP_SAVE_3;
 			FSM_STATE_EXP_SAVE_3:									fsm_next_state = FSM_STATE_EXP_SAVE_4;
-			FSM_STATE_EXP_SAVE_4:		if (r_addr_done)		fsm_next_state = FSM_STATE_EXP_STOP;
-										else						fsm_next_state = FSM_STATE_EXP_SAVE_4;
+			FSM_STATE_EXP_SAVE_4:	if (r_addr_done)			fsm_next_state = FSM_STATE_EXP_STOP;
+											else							fsm_next_state = FSM_STATE_EXP_SAVE_4;
 			//
-			FSM_STATE_EXP_STOP:									fsm_next_state = FSM_STATE_EXP_IDLE;
+			FSM_STATE_EXP_STOP:										fsm_next_state = FSM_STATE_EXP_IDLE;
 			//
 		endcase
 		//
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
index 3b612c5..c854e65 100644
--- a/src/tb/tb_exponentiator.v
+++ b/src/tb/tb_exponentiator.v
@@ -81,6 +81,7 @@ module tb_exponentiator;
 		//
 	wire	[ 3: 0]	core_m_addr;
 	wire	[ 3: 0]	core_d_addr;
+	wire	[ 3: 0]	core_f_addr;
 	wire	[ 3: 0]	core_n1_addr;
 	wire	[ 3: 0]	core_n2_addr;
 	wire	[ 3: 0]	core_n_coeff1_addr;
@@ -89,6 +90,7 @@ module tb_exponentiator;
 	
 	wire	[31: 0]	core_m_data;
 	wire	[31: 0]	core_d_data;
+	wire	[31: 0]	core_f_data;
 	wire	[31: 0]	core_n1_data;
 	wire	[31: 0]	core_n2_data;
 	wire	[31: 0]	core_n_coeff1_data;
@@ -97,48 +99,54 @@ module tb_exponentiator;
 
 	wire				core_r_wren;
 
-	reg	[ 3: 0]	tb_mdn_addr;
+	reg	[ 3: 0]	tb_mdfn_addr;
 	reg	[ 3: 0]	tb_r_addr;
 
 	reg	[31:0]	tb_m_data;
 	reg	[31:0]	tb_d_data;
+	reg	[31:0]	tb_f_data;
 	reg	[31:0]	tb_n_data;
 	reg	[31:0]	tb_n_coeff_data;
 	wire	[31:0]	tb_r_data;
 	
-	reg				tb_mdn_wren;
+	reg				tb_mdfn_wren;
 	
 		//
 		// BRAMs
 		//
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_m (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_m_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_m_data), .a_out(),
 		.b_addr(core_m_addr), .b_out(core_m_data));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_d (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_d_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_d_data), .a_out(),
 		.b_addr(core_d_addr), .b_out(core_d_data));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_f (.clk(clk),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_f_data), .a_out(),
+		.b_addr(core_f_addr), .b_out(core_f_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_n1 (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_data), .a_out(),
 		.b_addr(core_n1_addr), .b_out(core_n1_data));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_n2 (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_data), .a_out(),
 		.b_addr(core_n2_addr), .b_out(core_n2_data));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_n_coeff1 (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_coeff_data), .a_out(),
 		.b_addr(core_n_coeff1_addr), .b_out(core_n_coeff1_data));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
 	bram_n_coeff2 (.clk(clk),
-		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+		.a_addr(tb_mdfn_addr), .a_wr(tb_mdfn_wren), .a_in(tb_n_coeff_data), .a_out(),
 		.b_addr(core_n_coeff2_addr), .b_out(core_n_coeff2_data));
 		
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
@@ -164,6 +172,7 @@ module tb_exponentiator;
 		
 		.m_bram_addr			(core_m_addr),
 		.d_bram_addr			(core_d_addr),
+		.f_bram_addr			(core_f_addr),
 		.n1_bram_addr			(core_n1_addr),
 		.n2_bram_addr			(core_n2_addr),
 		.n_coeff1_bram_addr	(core_n_coeff1_addr),
@@ -172,6 +181,7 @@ module tb_exponentiator;
 
 		.m_bram_out				(core_m_data), 
 		.d_bram_out				(core_d_data), 
+		.f_bram_out				(core_f_data), 
 		.n1_bram_out			(core_n1_data), 
 		.n2_bram_out			(core_n2_data), 
 		.n_coeff1_bram_out	(core_n_coeff1_data), 
@@ -197,8 +207,8 @@ module tb_exponentiator;
 		rst_n = 1'b1;
 		#100;
 		
-		test_exponent_384(M_FACTOR_384, D_384, N_384, N_COEFF_384, S_384);
-		//test_exponent_512(M_512);
+		test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
+		test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
 		
 	end
       
@@ -211,6 +221,7 @@ module tb_exponentiator;
 		//
 		input	[383:0] m;
 		input	[383:0] d;
+		input [383:0] f;
 		input	[383:0] n;
 		input	[383:0] n_coeff;
 		input	[383:0] s;
@@ -223,7 +234,7 @@ module tb_exponentiator;
 			n_num_words = 4'd11;								// set number of words
 			d_num_bits = 9'd383;								// set number of bits
 			//
-			write_memory_384(m, d, n, n_coeff);			// fill memory
+			write_memory_384(m, d, f, n, n_coeff);		// fill memory
 			
 			ena = 1;												// start operation
 			#10;													//
@@ -247,40 +258,38 @@ module tb_exponentiator;
 		end
 		//
 	endtask
-	/*
-	task test_factor_512;
+
+	task test_exponent_512;
 		//
+		input	[511:0] m;
+		input	[511:0] d;
+		input [511:0] f;
 		input	[511:0] n;
-		reg	[511:0] f;
-		reg	[511:0] factor;
+		input	[511:0] n_coeff;
+		input	[511:0] s;
+		reg   [511:0] r;
+		//
 		integer i;
 		//
 		begin
-			//			
-			calc_factor_512(n, f);	// calculate factor on-the-fly
-			
-				// make sure, that the value matches the one saved in the include file
-			if (f !== FACTOR_512) begin
-				$display("ERROR: Calculated factor value differs from the one in the test vector!");
-				$finish;
-			end
-			
-			
+			//						
 			n_num_words = 4'd15;								// set number of words
-			write_memory_512(n);								// fill memory
+			d_num_bits = 9'd511;								// set number of bits
+			//
+			write_memory_512(m, d, f, n, n_coeff);		// fill memory
 			
 			ena = 1;												// start operation
 			#10;													//
 			ena = 0;												// clear flag
 			
 			while (!rdy) #10;									// wait for operation to complete
-			read_memory_512(factor);						// get result from memory
+			read_memory_512(r);								// get result from memory
 						
-			$display("    calculated: %x", factor);	// display result
-			$display("    expected:   %x", f);			//
+			$display("    calculated: %x", r);			// display result
+			$display("    expected:   %x", s);			//
 							
 				// check calculated value
-			if (f === factor) begin
+			if (r === s) begin
 				$display("        OK");
 				$display("SUCCESS: Test passed.");
 			end else begin
@@ -291,7 +300,7 @@ module tb_exponentiator;
 		end
 		//
 	endtask
-	*/
+
 
 	//
 	// write_memory_384
@@ -300,74 +309,104 @@ module tb_exponentiator;
 		//
 		input	[383:0] m;
 		input	[383:0] d;
+		input	[383:0] f;
 		input	[383:0] n;
 		input	[383:0] n_coeff;
 		reg	[383:0] m_shreg;
+		reg	[383:0] f_shreg;
 		reg	[383:0] d_shreg;
 		reg	[383:0] n_shreg;
 		reg	[383:0] n_coeff_shreg;
 		//
 		begin
 			//
-			tb_mdn_wren		= 1;			// start filling memories
+			tb_mdfn_wren	= 1;			// start filling memories
 			m_shreg			= m;			// preload shift register
 			d_shreg			= d;			// preload shift register
+			f_shreg			= f;			// preload shift register
 			n_shreg			= n;			// preload shift register
 			n_coeff_shreg	= n_coeff;	// preload shift register
 			//
 			for (w=0; w<NUM_WORDS_384; w=w+1) begin							// write all words
-				tb_mdn_addr			= w[3:0];											// set address
+				tb_mdfn_addr		= w[3:0];											// set address
 				tb_m_data			= m_shreg[31:0];									// set data
 				tb_d_data			= d_shreg[31:0];									// set data
+				tb_f_data			= f_shreg[31:0];									// set data
 				tb_n_data			= n_shreg[31:0];									// set data
 				tb_n_coeff_data	= n_coeff_shreg[31:0];							// set data
 				m_shreg				= {{32{1'bX}}, m_shreg[383:32]};				// update shift register
 				d_shreg				= {{32{1'bX}}, d_shreg[383:32]};				// update shift register
+				f_shreg				= {{32{1'bX}}, f_shreg[383:32]};				// update shift register
 				n_shreg				= {{32{1'bX}}, n_shreg[383:32]};				// update shift register
 				n_coeff_shreg		= {{32{1'bX}}, n_coeff_shreg[383:32]};		// update shift register
 				#10;																			// wait for 1 clock tick
 			end
 			//
-			tb_mdn_addr			= {4{1'bX}};	// wipe addresses
+			tb_mdfn_addr		= {4{1'bX}};	// wipe addresses
 			tb_m_data			= {32{1'bX}};	// wipe data
 			tb_d_data			= {32{1'bX}};	// wipe data
+			tb_f_data			= {32{1'bX}};	// wipe data
 			tb_n_data			= {32{1'bX}};	// wipe data
 			tb_n_coeff_data	= {32{1'bX}};	// wipe data
-			tb_mdn_wren	= 0;				// stop filling memory
+			tb_mdfn_wren	= 0;				// stop filling memory
 			//
 		end
 		//
 	endtask
 			
-	/*	
+
 	//
 	// write_memory_512
 	//
 	task write_memory_512;
 		//
+		input	[511:0] m;
+		input	[511:0] d;
+		input	[511:0] f;
 		input	[511:0] n;
+		input	[511:0] n_coeff;
+		reg	[511:0] m_shreg;
+		reg	[511:0] f_shreg;
+		reg	[511:0] d_shreg;
 		reg	[511:0] n_shreg;
+		reg	[511:0] n_coeff_shreg;
 		//
 		begin
 			//
-			tb_n_wren	= 1;	// start filling memories
-			n_shreg		= n;	// preload shift register
+			tb_mdfn_wren	= 1;			// start filling memories
+			m_shreg			= m;			// preload shift register
+			d_shreg			= d;			// preload shift register
+			f_shreg			= f;			// preload shift register
+			n_shreg			= n;			// preload shift register
+			n_coeff_shreg	= n_coeff;	// preload shift register
 			//
-			for (w=0; w<NUM_WORDS_512; w=w+1) begin				// write all words
-				tb_n_addr	= w[3:0];									// set address
-				tb_n_data	= n_shreg[31:0];							// set data
-				n_shreg		= {{32{1'bX}}, n_shreg[511:32]};		// update shift register
-				#10;															// wait for 1 clock tick
+			for (w=0; w<NUM_WORDS_512; w=w+1) begin							// write all words
+				tb_mdfn_addr		= w[3:0];											// set address
+				tb_m_data			= m_shreg[31:0];									// set data
+				tb_d_data			= d_shreg[31:0];									// set data
+				tb_f_data			= f_shreg[31:0];									// set data
+				tb_n_data			= n_shreg[31:0];									// set data
+				tb_n_coeff_data	= n_coeff_shreg[31:0];							// set data
+				m_shreg				= {{32{1'bX}}, m_shreg[511:32]};				// update shift register
+				d_shreg				= {{32{1'bX}}, d_shreg[511:32]};				// update shift register
+				f_shreg				= {{32{1'bX}}, f_shreg[511:32]};				// update shift register
+				n_shreg				= {{32{1'bX}}, n_shreg[511:32]};				// update shift register
+				n_coeff_shreg		= {{32{1'bX}}, n_coeff_shreg[511:32]};		// update shift register
+				#10;																			// wait for 1 clock tick
 			end
 			//
-			tb_n_addr	= {4{1'bX}};	// wipe addresses
-			tb_n_data	= {32{1'bX}};	// wipe data
-			tb_n_wren	= 0;				// stop filling memory
+			tb_mdfn_addr		= {4{1'bX}};	// wipe addresses
+			tb_m_data			= {32{1'bX}};	// wipe data
+			tb_d_data			= {32{1'bX}};	// wipe data
+			tb_f_data			= {32{1'bX}};	// wipe data
+			tb_n_data			= {32{1'bX}};	// wipe data
+			tb_n_coeff_data	= {32{1'bX}};	// wipe data
+			tb_mdfn_wren	= 0;				// stop filling memory
 			//
 		end
 		//
 	endtask
-	*/
+
 
 	//
 	// read_memory_384
@@ -392,30 +431,30 @@ module tb_exponentiator;
 		//
 	endtask
 
-	/*
+
 	//
 	// read_memory_512
 	//
 	task read_memory_512;
 		//
-		output	[511:0] f;
-		reg		[511:0] f_shreg;
+		output	[511:0] r;
+		reg		[511:0] r_shreg;
 		//
 		begin
 			//
 			for (w=0; w<NUM_WORDS_512; w=w+1) begin		// read result word-by-word
-				tb_f_addr	= w[3:0];							// set address
+				tb_r_addr	= w[3:0];							// set address
 				#10;													// wait for 1 clock tick
-				f_shreg = {tb_f_data, f_shreg[511:32]};	// store data word
+				r_shreg = {tb_r_data, r_shreg[511:32]};	// store data word
 			end
 			//
-			tb_f_addr = {4{1'bX}};								// wipe address
-			f = f_shreg;											// return
+			tb_r_addr = {4{1'bX}};								// wipe address
+			r = r_shreg;											// return
 			//
 		end
 		//
-	endtask
-	*/
+	endtask
+
 
 endmodule
 



More information about the Commits mailing list