[Cryptech-Commits] [core/math/modexpa7] 01/03: Finished modular exponentiation module: * works in simulator * passes synthesis without major issues

git at cryptech.is git at cryptech.is
Wed Jul 19 20:54:51 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.

commit 344ed1b7ab2fba06158a28b3c691cf9d9ee8cb75
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jul 19 15:00:10 2017 +0300

    Finished modular exponentiation module:
     * works in simulator
     * passes synthesis without major issues
    
    Started adding pre-multiplication logic...
---
 src/rtl/modexpa7_exponentiator.v | 500 +++++++++++++++++++++++++--------------
 src/tb/tb_exponentiator.v        |   2 +-
 2 files changed, 327 insertions(+), 175 deletions(-)

diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index 1f55cec..e34a7ab 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -57,26 +57,26 @@ module modexpa7_exponentiator #
 
 		input											ena,
 		output										rdy,
-
-		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	n1_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	n2_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff1_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff2_bram_addr,
-		output	[OPERAND_ADDR_WIDTH-1:0]	r_bram_addr,
-
-		input		[                32-1:0]	m_bram_out,
-		input		[                32-1:0]	d_bram_out,
-		input		[                32-1:0]	n1_bram_out,
-		input		[                32-1:0]	n2_bram_out,
-		input		[                32-1:0]	n_coeff1_bram_out,
-		input		[                32-1:0]	n_coeff2_bram_out,
+		
+		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n1_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n2_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff1_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff2_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	r_bram_addr,
+
+		input		[                32-1:0]	m_bram_out,
+		input		[                32-1:0]	d_bram_out,
+		input		[                32-1:0]	n1_bram_out,
+		input		[                32-1:0]	n2_bram_out,
+		input		[                32-1:0]	n_coeff1_bram_out,
+		input		[                32-1:0]	n_coeff2_bram_out,
 
 		output	[                32-1:0]	r_bram_in,
 		output										r_bram_wr,
 
-		input		[OPERAND_ADDR_WIDTH-1:0]	n_num_words,
+		input		[OPERAND_ADDR_WIDTH-1:0]	m_num_words,
 		input		[OPERAND_ADDR_WIDTH+4:0]	d_num_bits
 	);
 	
@@ -84,35 +84,86 @@ module modexpa7_exponentiator #
 		//
 		// FSM Declaration
 		//
-	localparam	[ 7: 0]	FSM_STATE_IDLE		= 8'h00;
-
-	localparam	[ 7: 0]	FSM_STATE_INIT_1	= 8'hA1;
-	localparam	[ 7: 0]	FSM_STATE_INIT_2	= 8'hA2;
-	localparam	[ 7: 0]	FSM_STATE_INIT_3	= 8'hA3;
-	localparam	[ 7: 0]	FSM_STATE_INIT_4	= 8'hA4;
-
-	localparam	[ 7: 0]	FSM_STATE_LOAD_1	= 8'hB1;
-	localparam	[ 7: 0]	FSM_STATE_LOAD_2	= 8'hB2;
-	localparam	[ 7: 0]	FSM_STATE_LOAD_3	= 8'hB3;
-	localparam	[ 7: 0]	FSM_STATE_LOAD_4	= 8'hB4;
-
-	localparam	[ 7: 0]	FSM_STATE_CALC_1	= 8'hC1;
-	localparam	[ 7: 0]	FSM_STATE_CALC_2	= 8'hC2;
-	localparam	[ 7: 0]	FSM_STATE_CALC_3	= 8'hC3;
-
-	localparam	[ 7: 0]	FSM_STATE_FILL_1	= 8'hD1;
-	localparam	[ 7: 0]	FSM_STATE_FILL_2	= 8'hD2;
-	localparam	[ 7: 0]	FSM_STATE_FILL_3	= 8'hD3;
-	localparam	[ 7: 0]	FSM_STATE_FILL_4	= 8'hD4;
+	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE		= 8'h00;
+	//
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_1	= 8'hA1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_2	= 8'hA2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_3	= 8'hA3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_4	= 8'hA4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_1	= 8'hB1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_2	= 8'hB2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_3	= 8'hB3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_4	= 8'hB4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_1	= 8'hC1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_2	= 8'hC2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_3	= 8'hC3;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_1	= 8'hD1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_2	= 8'hD2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_3	= 8'hD3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_4	= 8'hD4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_NEXT	= 8'hE0;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_1	= 8'hF1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_2	= 8'hF2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_3	= 8'hF3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_4	= 8'hF4;
+	//
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_1	= 8'h11;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_2	= 8'h12;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_3	= 8'h13;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_4	= 8'h14;
+
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_1	= 8'h21;
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_2	= 8'h22;
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_3	= 8'h23;
+	//
+	localparam	[ 7: 0]	FSM_STATE_EXP_STOP		= 8'hFF;
+
+
+	/*
+	 *  //
+	 *
+	 *  MUL_INIT:	P1 = F
+	 *             P2 = F
+	 *             P3 = F
+	 *             T2 = M
+	 *
+	 *  MUL_CALC:	TP = T2 * P3
+	 *
+	 *  //
+	 *
+	 *  EXP_INIT:	P1 <= TP
+	 *					P2 <= TP
+	 *					P3 <= TP
+	 *					T1 <= 1
+	 *					T2 <= 1
+	 *
+	 *  EXP_LOAD:	T0 <= T1
+	 *
+	 *  EXP_CALC:	PP = P1 * P2
+	 *					TP = T2 * P3
+	 *
+	 *  EXP_FILL:	P1 <= PP
+	 *					P2 <= PP
+	 *					P3 <= PP
+	 *					T1 <= D[i] ? TP : T0
+	 *					T2 <= D[i] ? TP : T0
+	 *
+	 *  EXP_SAVE:	R  <=  T1
+	 *
+	 *  //
+	 *
+	 */
 
-	localparam	[ 7: 0]	FSM_STATE_NEXT		= 8'hE0;
-	
-	localparam	[ 7: 0]	FSM_STATE_STOP		= 8'hFF;
 	
 		//
 		// FSM State / Next State
 		//
-	reg	[ 7: 0]	fsm_state = FSM_STATE_IDLE;
+	reg	[ 7: 0]	fsm_state = FSM_STATE_EXP_IDLE;
 	reg	[ 7: 0]	fsm_next_state;
 
 
@@ -141,10 +192,10 @@ module modexpa7_exponentiator #
 		else begin
 		
 				/* clear flag when operation is started */
-			if (fsm_state == FSM_STATE_IDLE)	rdy_reg <= ~ena_trig;
+			if (fsm_state == FSM_STATE_EXP_IDLE)	rdy_reg <= ~ena_trig;
 			
 				/* set flag after operation is finished */
-			if (fsm_state == FSM_STATE_STOP)	rdy_reg <= 1'b1;			
+			if (fsm_state == FSM_STATE_EXP_STOP)	rdy_reg <= 1'b1;			
 			
 		end
 		
@@ -152,14 +203,14 @@ module modexpa7_exponentiator #
 		//
 		// Parameters Latch
 		//
-	reg	[OPERAND_ADDR_WIDTH-1:0]	n_num_words_latch;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	m_num_words_latch;
 	reg	[OPERAND_ADDR_WIDTH+4:0]	d_num_bits_latch;
 
 		/* save number of words in a and b when new operation starts */
 	always @(posedge clk)
 		//
-		if (fsm_next_state == FSM_STATE_INIT_1)
-			{n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits};
+		if (fsm_next_state == FSM_STATE_EXP_INIT_1)
+			{m_num_words_latch, d_num_bits_latch} <= {m_num_words, d_num_bits};
 			
 
 		//
@@ -175,7 +226,7 @@ module modexpa7_exponentiator #
 	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_zero		= {{OPERAND_ADDR_WIDTH{1'b0}}};
 	
 		/* the very last addresses */
-	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last		= {n_num_words_latch};
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last		= {m_num_words_latch};
 
 		/* address registers */
 	reg	[OPERAND_ADDR_WIDTH-1:0]	m_addr;
@@ -226,6 +277,7 @@ module modexpa7_exponentiator #
 		//
 
 		/* memory inputs */
+	reg	[31: 0]	r_data_in;
 	reg	[31: 0]	t0_data_in;
 	reg	[31: 0]	t1_data_in;
 	reg	[31: 0]	t2_data_in;
@@ -244,12 +296,17 @@ module modexpa7_exponentiator #
 	wire	[31: 0]	tp_data_out;
 
 		/* write enables */
+	reg	r_wren;
 	reg	t0_wren;
 	reg	t1_wren;
 	reg	t2_wren;
 	reg	p_wren;
 	wire	pp_wren;
 	wire	tp_wren;
+	
+		/* map */
+	assign r_bram_in = r_data_in;
+	assign r_bram_wr = r_wren;
 
 	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out));
@@ -257,37 +314,69 @@ module modexpa7_exponentiator #
 	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out));
 	
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_t2 (.clk(clk),
-		.a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
-		.b_addr(t2_addr_rd), .b_out(t2_data_out));
+		.a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
+		.b_addr(t2_addr_rd), .b_out(t2_data_out));
 
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_p1 (.clk(clk),
-		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
-		.b_addr(p1_addr_rd), .b_out(p1_data_out));
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p1_addr_rd), .b_out(p1_data_out));
 
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_p2 (.clk(clk),
-		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
-		.b_addr(p2_addr_rd), .b_out(p2_data_out));
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p2_addr_rd), .b_out(p2_data_out));
 
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_p3 (.clk(clk),
-		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
-		.b_addr(p3_addr_rd), .b_out(p3_data_out));
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p3_addr_rd), .b_out(p3_data_out));
 
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_pp (.clk(clk),
-		.a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
-		.b_addr(pp_addr_rd), .b_out(pp_data_out));
+		.a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
+		.b_addr(pp_addr_rd), .b_out(pp_data_out));
 
-	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
 	bram_tp (.clk(clk),
-		.a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
-		.b_addr(tp_addr_rd), .b_out(tp_data_out));
+		.a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
+		.b_addr(tp_addr_rd), .b_out(tp_data_out));
 
 
+		//
+		// Bit Counter
+		//
+	reg	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt;
+		
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_last = d_num_bits_latch;
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_next = bit_cnt + 1'b1;
+
+		/* handy flag */
+	wire	bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
+	
+	always @(posedge clk)
+		//
+		if (fsm_next_state == FSM_STATE_EXP_LOAD_1)
+			//
+			case (fsm_state)
+				FSM_STATE_EXP_INIT_4: bit_cnt <= bit_cnt_zero;
+				FSM_STATE_EXP_NEXT:   bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
+			endcase
+
+
+		//
+		// Flags
+		//
+	reg	flag_update_r;
+
+	always @(posedge clk)
+		//
+		if (fsm_next_state == FSM_STATE_EXP_CALC_3)
+			flag_update_r <= d_bram_out[bit_cnt[4:0]];
+			
 
 		//
 		// Memory Address Control Logic
@@ -297,54 +386,93 @@ module modexpa7_exponentiator #
 		// m_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_1: 	m_addr <= bram_addr_zero;
-			FSM_STATE_INIT_2,
-			FSM_STATE_INIT_3,
-			FSM_STATE_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
+			FSM_STATE_EXP_INIT_1: 	m_addr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_2,
+			FSM_STATE_EXP_INIT_3,
+			FSM_STATE_EXP_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
+		endcase
+		//
+		// d_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_CALC_1:		d_addr <= bit_cnt[OPERAND_ADDR_WIDTH+4:5];
+		endcase
+		//
+		// r_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_SAVE_3:		r_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_4:		r_addr <= r_addr_next;
 		endcase
 		//
 		// p_addr_wr
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3,
-			FSM_STATE_FILL_3:		p_addr_wr <= bram_addr_zero;
-			FSM_STATE_INIT_4,
-			FSM_STATE_FILL_4:		p_addr_wr <= p_addr_wr_next;
+			FSM_STATE_EXP_INIT_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		p_addr_wr <= p_addr_wr_next;
+			//
+			FSM_STATE_EXP_FILL_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		p_addr_wr <= p_addr_wr_next;
 		endcase
 		//
 		// t0_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_LOAD_3:		t0_addr <= bram_addr_zero;
-			FSM_STATE_LOAD_4:		t0_addr <= t0_addr_next;
+			FSM_STATE_EXP_LOAD_3:		t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_4:		t0_addr <= t0_addr_next;
+			//
+			FSM_STATE_EXP_FILL_1: 	t0_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_2,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		t0_addr <= !t0_addr_done ? t0_addr_next : t0_addr;
 		endcase		
 		//
 		// t1_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3:		t1_addr <= bram_addr_zero;
-			FSM_STATE_INIT_4:		t1_addr <= t1_addr_next;
+			FSM_STATE_EXP_INIT_3:		t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		t1_addr <= t1_addr_next;
 			//
-			FSM_STATE_LOAD_1: 	t1_addr <= bram_addr_zero;
-			FSM_STATE_LOAD_2,
-			FSM_STATE_LOAD_3,
-			FSM_STATE_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			FSM_STATE_EXP_LOAD_1: 	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_LOAD_2,
+			FSM_STATE_EXP_LOAD_3,
+			FSM_STATE_EXP_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+			//
+			FSM_STATE_EXP_FILL_3:		t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		t1_addr <= t1_addr_next;
+			//
+			FSM_STATE_EXP_SAVE_1: 	t1_addr <= bram_addr_zero;
+			FSM_STATE_EXP_SAVE_2,
+			FSM_STATE_EXP_SAVE_3,
+			FSM_STATE_EXP_SAVE_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
 		endcase
 		//
 		// t2_addr_wr
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3:		t2_addr_wr	<= bram_addr_zero;
-			FSM_STATE_INIT_4:		t2_addr_wr	<= t2_addr_wr_next;
+			FSM_STATE_EXP_INIT_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_INIT_4:		t2_addr_wr <= t2_addr_wr_next;
+			//
+			FSM_STATE_EXP_FILL_3:		t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_4:		t2_addr_wr <= t2_addr_wr_next;
 		endcase		
 		//
 		// pp_addr_rd
 		//
 		case (fsm_next_state)
-			FSM_STATE_FILL_1: 	pp_addr_rd <= bram_addr_zero;
-			FSM_STATE_FILL_2,
-			FSM_STATE_FILL_3,
-			FSM_STATE_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+			FSM_STATE_EXP_FILL_1: 	pp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_2,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+		endcase
+		//
+		// tp_addr_rd
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_FILL_1: 	tp_addr_rd <= bram_addr_zero;
+			FSM_STATE_EXP_FILL_2,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		tp_addr_rd <= !tp_addr_rd_done ? tp_addr_rd_next : tp_addr_rd;
 		endcase
 		//
 	end
@@ -355,37 +483,49 @@ module modexpa7_exponentiator #
 		//
 	always @(posedge clk) begin
 		//
+		// r_wren
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_SAVE_3,
+			FSM_STATE_EXP_SAVE_4:		r_wren <= 1'b1;
+			default:					r_wren <= 1'b0;
+		endcase
+		//
 		// p_wren
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3,		
-			FSM_STATE_INIT_4,
-			FSM_STATE_FILL_3,
-			FSM_STATE_FILL_4:		p_wren <= 1'b1;
+			FSM_STATE_EXP_INIT_3,		
+			FSM_STATE_EXP_INIT_4,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		p_wren <= 1'b1;
 			default:					p_wren <= 1'b0;
 		endcase
 		//
 		// t0_wren
 		//
 		case (fsm_next_state)
-			FSM_STATE_LOAD_3,		
-			FSM_STATE_LOAD_4:		t0_wren <= 1'b1;
+			FSM_STATE_EXP_LOAD_3,		
+			FSM_STATE_EXP_LOAD_4:		t0_wren <= 1'b1;
 			default:					t0_wren <= 1'b0;
 		endcase
 		//
 		// t1_wren
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3,		
-			FSM_STATE_INIT_4:		t1_wren <= 1'b1;
+			FSM_STATE_EXP_INIT_3,		
+			FSM_STATE_EXP_INIT_4,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		t1_wren <= 1'b1;
 			default:					t1_wren <= 1'b0;
 		endcase
 		//
 		// t2_wren
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3,		
-			FSM_STATE_INIT_4:		t2_wren <= 1'b1;
+			FSM_STATE_EXP_INIT_3,		
+			FSM_STATE_EXP_INIT_4,
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		t2_wren <= 1'b1;
 			default:					t2_wren <= 1'b0;
 		endcase
 		//
@@ -397,18 +537,22 @@ module modexpa7_exponentiator #
 		//
 	always @(posedge clk) begin
 		//
+		// r_data_in
+		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3:		{t2_data_in, t1_data_in}	<= {2{32'd1}};
-			FSM_STATE_INIT_4:		{t2_data_in, t1_data_in}	<= {2{32'd0}};
-			default:					{t2_data_in, t1_data_in}	<= {2{32'dX}};
-		endcase
+			FSM_STATE_EXP_SAVE_3,
+			FSM_STATE_EXP_SAVE_4:		r_data_in	<= t1_data_out;
+			default:					r_data_in	<= 32'dX;
+		endcase		
+		//
+		// p_data_in
 		//
 		case (fsm_next_state)
-			FSM_STATE_INIT_3,
-			FSM_STATE_INIT_4:		p_data_in	<= m_bram_out;
+			FSM_STATE_EXP_INIT_3,
+			FSM_STATE_EXP_INIT_4:		p_data_in	<= m_bram_out;
 			//
-			FSM_STATE_FILL_3,
-			FSM_STATE_FILL_4:		p_data_in	<= pp_data_out;
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		p_data_in	<= pp_data_out;
 			//
 			default:					p_data_in	<= 32'dX;
 		endcase
@@ -416,9 +560,31 @@ module modexpa7_exponentiator #
 		// t0_data_in
 		//
 		case (fsm_next_state)
-			FSM_STATE_LOAD_3,
-			FSM_STATE_LOAD_4:		t0_data_in	<= t1_data_out;
-			default:					t0_data_in	<= 32'dX;
+			FSM_STATE_EXP_LOAD_3,
+			FSM_STATE_EXP_LOAD_4:		t0_data_in <= t1_data_out;
+			default:					t0_data_in <= 32'dX;
+		endcase		
+		//
+		// t1_data_in
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_INIT_3:		t1_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:		t1_data_in <= 32'd0;
+			//
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		t1_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:					t1_data_in <= 32'dX;
+		endcase		
+		//
+		// t2_data_in
+		//
+		case (fsm_next_state)
+			FSM_STATE_EXP_INIT_3:		t2_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:		t2_data_in <= 32'd0;
+			//
+			FSM_STATE_EXP_FILL_3,
+			FSM_STATE_EXP_FILL_4:		t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:					t2_data_in <= 32'dX;
 		endcase		
 		//
 	end
@@ -445,21 +611,21 @@ module modexpa7_exponentiator #
 		.ena						(mul_ena),
 		.rdy						(mul_rdy_pp),
 
-		.a_bram_addr			(p1_addr_rd),
-		.b_bram_addr			(p2_addr_rd),
-		.n_bram_addr			(n1_bram_addr),
-		.n_coeff_bram_addr	(n_coeff1_bram_addr),
-		.r_bram_addr			(pp_addr_wr),
+		.a_bram_addr			(p1_addr_rd),
+		.b_bram_addr			(p2_addr_rd),
+		.n_bram_addr			(n1_bram_addr),
+		.n_coeff_bram_addr	(n_coeff1_bram_addr),
+		.r_bram_addr			(pp_addr_wr),
 
-		.a_bram_out				(p1_data_out),
-		.b_bram_out				(p2_data_out),
-		.n_bram_out				(n1_bram_out),
-		.n_coeff_bram_out		(n_coeff1_bram_out),
+		.a_bram_out				(p1_data_out),
+		.b_bram_out				(p2_data_out),
+		.n_bram_out				(n1_bram_out),
+		.n_coeff_bram_out		(n_coeff1_bram_out),
 
 		.r_bram_in				(pp_data_in),
 		.r_bram_wr				(pp_wren),
 
-		.ab_num_words			(n_num_words_latch)
+		.ab_num_words			(m_num_words_latch)
 	);
 
 	modexpa7_systolic_multiplier #
@@ -475,50 +641,30 @@ module modexpa7_exponentiator #
 		.ena						(mul_ena),
 		.rdy						(mul_rdy_tp),
 
-		.a_bram_addr			(t2_addr_rd),
-		.b_bram_addr			(p3_addr_rd),
-		.n_bram_addr			(n2_bram_addr),
-		.n_coeff_bram_addr	(n_coeff2_bram_addr),
-		.r_bram_addr			(tp_addr_wr),
+		.a_bram_addr			(t2_addr_rd),
+		.b_bram_addr			(p3_addr_rd),
+		.n_bram_addr			(n2_bram_addr),
+		.n_coeff_bram_addr	(n_coeff2_bram_addr),
+		.r_bram_addr			(tp_addr_wr),
 
-		.a_bram_out				(t2_data_out),
-		.b_bram_out				(p3_data_out),
-		.n_bram_out				(n2_bram_out),
-		.n_coeff_bram_out		(n_coeff2_bram_out),
+		.a_bram_out				(t2_data_out),
+		.b_bram_out				(p3_data_out),
+		.n_bram_out				(n2_bram_out),
+		.n_coeff_bram_out		(n_coeff2_bram_out),
 
 		.r_bram_in				(tp_data_in),
 		.r_bram_wr				(tp_wren),
 
-		.ab_num_words			(n_num_words_latch)
+		.ab_num_words			(m_num_words_latch)
 	);
 	
 	
 	always @(posedge clk)
 		//
-		mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0;
+		mul_ena <= (fsm_next_state == FSM_STATE_EXP_CALC_1) ? 1'b1 : 1'b0;
 		
 		
-		//
-		// Bit Counter
-		//
-	reg	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt;
-		
-	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
-	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_last = d_num_bits_latch;
-	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_next = bit_cnt + 1'b1;
-
-		/* handy flag */
-	wire	bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
 	
-	always @(posedge clk)
-		//
-		if (fsm_next_state == FSM_STATE_LOAD_1)
-			//
-			case (fsm_state)
-				FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero;
-				FSM_STATE_NEXT:   bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
-			endcase
-
 	
 
 		//
@@ -526,7 +672,7 @@ module modexpa7_exponentiator #
 		//
 	always @(posedge clk or negedge rst_n)
 		//
-		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_IDLE;
+		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_EXP_IDLE;
 		else						fsm_state <= fsm_next_state;
 	
 	
@@ -535,40 +681,46 @@ module modexpa7_exponentiator #
 		//
 	always @* begin
 		//
-		fsm_next_state = FSM_STATE_STOP;
+		fsm_next_state = FSM_STATE_EXP_STOP;
 		//
 		case (fsm_state)
 			//
-			FSM_STATE_IDLE:		if (ena_trig)			fsm_next_state = FSM_STATE_INIT_1;
-										else						fsm_next_state = FSM_STATE_IDLE;
+			FSM_STATE_EXP_IDLE:		if (ena_trig)			fsm_next_state = FSM_STATE_EXP_INIT_1;
+										else						fsm_next_state = FSM_STATE_EXP_IDLE;
+			//
+			FSM_STATE_EXP_INIT_1:									fsm_next_state = FSM_STATE_EXP_INIT_2;
+			FSM_STATE_EXP_INIT_2:									fsm_next_state = FSM_STATE_EXP_INIT_3;
+			FSM_STATE_EXP_INIT_3:									fsm_next_state = FSM_STATE_EXP_INIT_4;
+			FSM_STATE_EXP_INIT_4:		if (t1_addr_done)		fsm_next_state = FSM_STATE_EXP_LOAD_1;
+										else						fsm_next_state = FSM_STATE_EXP_INIT_4;
 			//
-			FSM_STATE_INIT_1:									fsm_next_state = FSM_STATE_INIT_2;
-			FSM_STATE_INIT_2:									fsm_next_state = FSM_STATE_INIT_3;
-			FSM_STATE_INIT_3:									fsm_next_state = FSM_STATE_INIT_4;
-			FSM_STATE_INIT_4:		if (t1_addr_done)		fsm_next_state = FSM_STATE_LOAD_1;
-										else						fsm_next_state = FSM_STATE_INIT_4;
+			FSM_STATE_EXP_LOAD_1:									fsm_next_state = FSM_STATE_EXP_LOAD_2;
+			FSM_STATE_EXP_LOAD_2:									fsm_next_state = FSM_STATE_EXP_LOAD_3;
+			FSM_STATE_EXP_LOAD_3:									fsm_next_state = FSM_STATE_EXP_LOAD_4;
+			FSM_STATE_EXP_LOAD_4:		if (t0_addr_done)		fsm_next_state = FSM_STATE_EXP_CALC_1;
+										else						fsm_next_state = FSM_STATE_EXP_LOAD_4;
 			//
-			FSM_STATE_LOAD_1:									fsm_next_state = FSM_STATE_LOAD_2;
-			FSM_STATE_LOAD_2:									fsm_next_state = FSM_STATE_LOAD_3;
-			FSM_STATE_LOAD_3:									fsm_next_state = FSM_STATE_LOAD_4;
-			FSM_STATE_LOAD_4:		if (t0_addr_done)		fsm_next_state = FSM_STATE_CALC_1;
-										else						fsm_next_state = FSM_STATE_LOAD_4;
+			FSM_STATE_EXP_CALC_1:									fsm_next_state = FSM_STATE_EXP_CALC_2;
+			FSM_STATE_EXP_CALC_2:		if (mul_rdy_all)		fsm_next_state = FSM_STATE_EXP_CALC_3;
+										else						fsm_next_state = FSM_STATE_EXP_CALC_2;
+			FSM_STATE_EXP_CALC_3:									fsm_next_state = FSM_STATE_EXP_FILL_1;
 			//
-			FSM_STATE_CALC_1:									fsm_next_state = FSM_STATE_CALC_2;
-			FSM_STATE_CALC_2:		if (mul_rdy_all)		fsm_next_state = FSM_STATE_CALC_3;
-										else						fsm_next_state = FSM_STATE_CALC_2;
-			FSM_STATE_CALC_3:									fsm_next_state = FSM_STATE_FILL_1;
+			FSM_STATE_EXP_FILL_1:									fsm_next_state = FSM_STATE_EXP_FILL_2;
+			FSM_STATE_EXP_FILL_2:									fsm_next_state = FSM_STATE_EXP_FILL_3;
+			FSM_STATE_EXP_FILL_3:									fsm_next_state = FSM_STATE_EXP_FILL_4;
+			FSM_STATE_EXP_FILL_4:		if (p_addr_wr_done)	fsm_next_state = FSM_STATE_EXP_NEXT;
+										else						fsm_next_state = FSM_STATE_EXP_FILL_4;			
 			//
-			FSM_STATE_FILL_1:									fsm_next_state = FSM_STATE_FILL_2;
-			FSM_STATE_FILL_2:									fsm_next_state = FSM_STATE_FILL_3;
-			FSM_STATE_FILL_3:									fsm_next_state = FSM_STATE_FILL_4;
-			FSM_STATE_FILL_4:		if (p_addr_wr_done)	fsm_next_state = FSM_STATE_NEXT;
-										else						fsm_next_state = FSM_STATE_FILL_4;			
+			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)		fsm_next_state = FSM_STATE_EXP_SAVE_1;
+										else						fsm_next_state = FSM_STATE_EXP_LOAD_1;
 			//
-			FSM_STATE_NEXT:		if (bit_cnt_done)		fsm_next_state = FSM_STATE_STOP;
-										else						fsm_next_state = FSM_STATE_LOAD_1;
+			FSM_STATE_EXP_SAVE_1:									fsm_next_state = FSM_STATE_EXP_SAVE_2;
+			FSM_STATE_EXP_SAVE_2:									fsm_next_state = FSM_STATE_EXP_SAVE_3;
+			FSM_STATE_EXP_SAVE_3:									fsm_next_state = FSM_STATE_EXP_SAVE_4;
+			FSM_STATE_EXP_SAVE_4:		if (r_addr_done)		fsm_next_state = FSM_STATE_EXP_STOP;
+										else						fsm_next_state = FSM_STATE_EXP_SAVE_4;
 			//
-			FSM_STATE_STOP:									fsm_next_state = FSM_STATE_IDLE;
+			FSM_STATE_EXP_STOP:									fsm_next_state = FSM_STATE_EXP_IDLE;
 			//
 		endcase
 		//
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
index 8ca9444..3b612c5 100644
--- a/src/tb/tb_exponentiator.v
+++ b/src/tb/tb_exponentiator.v
@@ -180,7 +180,7 @@ module tb_exponentiator;
 		.r_bram_in				(core_r_data_in), 
 		.r_bram_wr				(core_r_wren), 
 		
-		.n_num_words			(n_num_words),
+		.m_num_words			(n_num_words),
 		.d_num_bits				(d_num_bits)
 	);
 



More information about the Commits mailing list