[Cryptech-Commits] [core/math/modexpa7] branch systolic updated: Started adding exponentiator module w/ testbench.

git at cryptech.is git at cryptech.is
Mon Jul 17 23:27:27 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.

The following commit(s) were added to refs/heads/systolic by this push:
     new d887154  Started adding exponentiator module w/ testbench.
d887154 is described below

commit d88715489690e1d77558bb2d89adce92ecabdc84
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Jul 18 02:26:18 2017 +0300

    Started adding exponentiator module w/ testbench.
---
 src/rtl/modexpa7_exponentiator.v   | 578 +++++++++++++++++++++++++++++++++++++
 src/tb/modexp_fpga_model_vectors.v |  22 ++
 src/tb/tb_exponentiator.v          | 424 +++++++++++++++++++++++++++
 3 files changed, 1024 insertions(+)

diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
new file mode 100644
index 0000000..1f55cec
--- /dev/null
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -0,0 +1,578 @@
+//======================================================================
+//
+// modexpa7_exponentiator.v
+// -----------------------------------------------------------------------------
+// Modular Montgomery Exponentiator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_exponentiator #
+	(
+			//
+			// This sets the address widths of memory buffers. Internal data
+			// width is 32 bits, so for e.g. 2048-bit operands buffers must store
+			// 2048 / 32 = 64 words, and these need 5-bit address bus, because
+			// 2 ** 6 = 64.
+			//
+		parameter	OPERAND_ADDR_WIDTH		= 4,
+		
+			//
+			// Explain.
+			//
+		parameter	SYSTOLIC_ARRAY_POWER		= 2
+	)
+	(
+		input											clk,
+		input											rst_n,
+
+		input											ena,
+		output										rdy,
+
+		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n1_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n2_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff1_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	n_coeff2_bram_addr,
+		output	[OPERAND_ADDR_WIDTH-1:0]	r_bram_addr,
+
+		input		[                32-1:0]	m_bram_out,
+		input		[                32-1:0]	d_bram_out,
+		input		[                32-1:0]	n1_bram_out,
+		input		[                32-1:0]	n2_bram_out,
+		input		[                32-1:0]	n_coeff1_bram_out,
+		input		[                32-1:0]	n_coeff2_bram_out,
+
+		output	[                32-1:0]	r_bram_in,
+		output										r_bram_wr,
+
+		input		[OPERAND_ADDR_WIDTH-1:0]	n_num_words,
+		input		[OPERAND_ADDR_WIDTH+4:0]	d_num_bits
+	);
+	
+	
+		//
+		// FSM Declaration
+		//
+	localparam	[ 7: 0]	FSM_STATE_IDLE		= 8'h00;
+
+	localparam	[ 7: 0]	FSM_STATE_INIT_1	= 8'hA1;
+	localparam	[ 7: 0]	FSM_STATE_INIT_2	= 8'hA2;
+	localparam	[ 7: 0]	FSM_STATE_INIT_3	= 8'hA3;
+	localparam	[ 7: 0]	FSM_STATE_INIT_4	= 8'hA4;
+
+	localparam	[ 7: 0]	FSM_STATE_LOAD_1	= 8'hB1;
+	localparam	[ 7: 0]	FSM_STATE_LOAD_2	= 8'hB2;
+	localparam	[ 7: 0]	FSM_STATE_LOAD_3	= 8'hB3;
+	localparam	[ 7: 0]	FSM_STATE_LOAD_4	= 8'hB4;
+
+	localparam	[ 7: 0]	FSM_STATE_CALC_1	= 8'hC1;
+	localparam	[ 7: 0]	FSM_STATE_CALC_2	= 8'hC2;
+	localparam	[ 7: 0]	FSM_STATE_CALC_3	= 8'hC3;
+
+	localparam	[ 7: 0]	FSM_STATE_FILL_1	= 8'hD1;
+	localparam	[ 7: 0]	FSM_STATE_FILL_2	= 8'hD2;
+	localparam	[ 7: 0]	FSM_STATE_FILL_3	= 8'hD3;
+	localparam	[ 7: 0]	FSM_STATE_FILL_4	= 8'hD4;
+
+	localparam	[ 7: 0]	FSM_STATE_NEXT		= 8'hE0;
+	
+	localparam	[ 7: 0]	FSM_STATE_STOP		= 8'hFF;
+	
+		//
+		// FSM State / Next State
+		//
+	reg	[ 7: 0]	fsm_state = FSM_STATE_IDLE;
+	reg	[ 7: 0]	fsm_next_state;
+
+
+		//
+		// Enable Delay and Trigger
+		//
+   reg ena_dly = 1'b0;
+	
+		/* delay enable by one clock cycle */
+   always @(posedge clk) ena_dly <= ena;
+
+		/* trigger new operation when enable goes high */
+   wire ena_trig = ena && !ena_dly;
+	
+	
+		//
+		// Ready Flag Logic
+		//
+	reg rdy_reg = 1'b1;
+	assign rdy = rdy_reg;
+
+   always @(posedge clk or negedge rst_n)
+		
+			/* reset flag */
+		if (rst_n == 1'b0) rdy_reg <= 1'b1;
+		else begin
+		
+				/* clear flag when operation is started */
+			if (fsm_state == FSM_STATE_IDLE)	rdy_reg <= ~ena_trig;
+			
+				/* set flag after operation is finished */
+			if (fsm_state == FSM_STATE_STOP)	rdy_reg <= 1'b1;			
+			
+		end
+		
+		
+		//
+		// Parameters Latch
+		//
+	reg	[OPERAND_ADDR_WIDTH-1:0]	n_num_words_latch;
+	reg	[OPERAND_ADDR_WIDTH+4:0]	d_num_bits_latch;
+
+		/* save number of words in a and b when new operation starts */
+	always @(posedge clk)
+		//
+		if (fsm_next_state == FSM_STATE_INIT_1)
+			{n_num_words_latch, d_num_bits_latch} <= {n_num_words, d_num_bits};
+			
+
+		//
+		// Block Memory Addresses
+		//
+		
+		/*
+		 * Explain what every memory does.
+		 *
+		 */
+		
+		/* the very first addresses */
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_zero		= {{OPERAND_ADDR_WIDTH{1'b0}}};
+	
+		/* the very last addresses */
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last		= {n_num_words_latch};
+
+		/* address registers */
+	reg	[OPERAND_ADDR_WIDTH-1:0]	m_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	d_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	r_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	t0_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	t1_addr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_wr;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_rd;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	p_addr_wr;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	p1_addr_rd;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	p2_addr_rd;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	p3_addr_rd;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_wr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_rd;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_wr;
+	reg	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_rd;
+		
+		/* handy increment values */
+	wire	[OPERAND_ADDR_WIDTH-1:0]	m_addr_next			= m_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	d_addr_next			= d_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	r_addr_next			= r_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	t0_addr_next		= t0_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	t1_addr_next		= t1_addr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	t2_addr_wr_next	= t2_addr_wr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	p_addr_wr_next		= p_addr_wr + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	pp_addr_rd_next	= pp_addr_rd + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_rd_next	= tp_addr_rd + 1'b1;
+	
+		/* handy stop flags */
+	wire	m_addr_done			= (m_addr     == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	d_addr_done			= (d_addr     == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	r_addr_done			= (r_addr     == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	t0_addr_done		= (t0_addr    == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	t1_addr_done		= (t1_addr    == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	t2_addr_wr_done	= (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	p_addr_wr_done		= (p_addr_wr  == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	pp_addr_rd_done	= (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	tp_addr_rd_done	= (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+				
+		/* map registers to top-level ports */
+	assign m_bram_addr = m_addr;
+	assign d_bram_addr = d_addr;
+	assign r_bram_addr = r_addr;
+	
+		//
+		// Internal Memories
+		//
+
+		/* memory inputs */
+	reg	[31: 0]	t0_data_in;
+	reg	[31: 0]	t1_data_in;
+	reg	[31: 0]	t2_data_in;
+	reg	[31: 0]	p_data_in;
+	wire	[31: 0]	pp_data_in;
+	wire	[31: 0]	tp_data_in;
+
+		/* memory outputs */
+	wire	[31: 0]	t0_data_out;
+	wire	[31: 0]	t1_data_out;
+	wire	[31: 0]	t2_data_out;
+	wire	[31: 0]	p1_data_out;
+	wire	[31: 0]	p2_data_out;
+	wire	[31: 0]	p3_data_out;
+	wire	[31: 0]	pp_data_out;
+	wire	[31: 0]	tp_data_out;
+
+		/* write enables */
+	reg	t0_wren;
+	reg	t1_wren;
+	reg	t2_wren;
+	reg	p_wren;
+	wire	pp_wren;
+	wire	tp_wren;
+
+	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_t0 (.clk(clk), .a_addr(t0_addr), .a_wr(t0_wren), .a_in(t0_data_in), .a_out(t0_data_out));
+
+	bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_t1 (.clk(clk), .a_addr(t1_addr), .a_wr(t1_wren), .a_in(t1_data_in), .a_out(t1_data_out));
+	
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_t2 (.clk(clk),
+		.a_addr(t2_addr_wr), .a_wr(t2_wren), .a_in(t2_data_in), .a_out(),
+		.b_addr(t2_addr_rd), .b_out(t2_data_out));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_p1 (.clk(clk),
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p1_addr_rd), .b_out(p1_data_out));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_p2 (.clk(clk),
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p2_addr_rd), .b_out(p2_data_out));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_p3 (.clk(clk),
+		.a_addr(p_addr_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p3_addr_rd), .b_out(p3_data_out));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_pp (.clk(clk),
+		.a_addr(pp_addr_wr), .a_wr(pp_wren), .a_in(pp_data_in), .a_out(),
+		.b_addr(pp_addr_rd), .b_out(pp_data_out));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_tp (.clk(clk),
+		.a_addr(tp_addr_wr), .a_wr(tp_wren), .a_in(tp_data_in), .a_out(),
+		.b_addr(tp_addr_rd), .b_out(tp_data_out));
+
+
+
+		//
+		// Memory Address Control Logic
+		//
+	always @(posedge clk) begin
+		//
+		// m_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_1: 	m_addr <= bram_addr_zero;
+			FSM_STATE_INIT_2,
+			FSM_STATE_INIT_3,
+			FSM_STATE_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
+		endcase
+		//
+		// p_addr_wr
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3,
+			FSM_STATE_FILL_3:		p_addr_wr <= bram_addr_zero;
+			FSM_STATE_INIT_4,
+			FSM_STATE_FILL_4:		p_addr_wr <= p_addr_wr_next;
+		endcase
+		//
+		// t0_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_LOAD_3:		t0_addr <= bram_addr_zero;
+			FSM_STATE_LOAD_4:		t0_addr <= t0_addr_next;
+		endcase		
+		//
+		// t1_addr
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3:		t1_addr <= bram_addr_zero;
+			FSM_STATE_INIT_4:		t1_addr <= t1_addr_next;
+			//
+			FSM_STATE_LOAD_1: 	t1_addr <= bram_addr_zero;
+			FSM_STATE_LOAD_2,
+			FSM_STATE_LOAD_3,
+			FSM_STATE_LOAD_4:		t1_addr <= !t1_addr_done ? t1_addr_next : t1_addr;
+		endcase
+		//
+		// t2_addr_wr
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3:		t2_addr_wr	<= bram_addr_zero;
+			FSM_STATE_INIT_4:		t2_addr_wr	<= t2_addr_wr_next;
+		endcase		
+		//
+		// pp_addr_rd
+		//
+		case (fsm_next_state)
+			FSM_STATE_FILL_1: 	pp_addr_rd <= bram_addr_zero;
+			FSM_STATE_FILL_2,
+			FSM_STATE_FILL_3,
+			FSM_STATE_FILL_4:		pp_addr_rd <= !pp_addr_rd_done ? pp_addr_rd_next : pp_addr_rd;
+		endcase
+		//
+	end
+
+
+		//
+		// Memory Write Enable Logic
+		//
+	always @(posedge clk) begin
+		//
+		// p_wren
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3,		
+			FSM_STATE_INIT_4,
+			FSM_STATE_FILL_3,
+			FSM_STATE_FILL_4:		p_wren <= 1'b1;
+			default:					p_wren <= 1'b0;
+		endcase
+		//
+		// t0_wren
+		//
+		case (fsm_next_state)
+			FSM_STATE_LOAD_3,		
+			FSM_STATE_LOAD_4:		t0_wren <= 1'b1;
+			default:					t0_wren <= 1'b0;
+		endcase
+		//
+		// t1_wren
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3,		
+			FSM_STATE_INIT_4:		t1_wren <= 1'b1;
+			default:					t1_wren <= 1'b0;
+		endcase
+		//
+		// t2_wren
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3,		
+			FSM_STATE_INIT_4:		t2_wren <= 1'b1;
+			default:					t2_wren <= 1'b0;
+		endcase
+		//
+	end
+	
+	
+		//
+		// Memory Input Selector
+		//
+	always @(posedge clk) begin
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3:		{t2_data_in, t1_data_in}	<= {2{32'd1}};
+			FSM_STATE_INIT_4:		{t2_data_in, t1_data_in}	<= {2{32'd0}};
+			default:					{t2_data_in, t1_data_in}	<= {2{32'dX}};
+		endcase
+		//
+		case (fsm_next_state)
+			FSM_STATE_INIT_3,
+			FSM_STATE_INIT_4:		p_data_in	<= m_bram_out;
+			//
+			FSM_STATE_FILL_3,
+			FSM_STATE_FILL_4:		p_data_in	<= pp_data_out;
+			//
+			default:					p_data_in	<= 32'dX;
+		endcase
+		//
+		// t0_data_in
+		//
+		case (fsm_next_state)
+			FSM_STATE_LOAD_3,
+			FSM_STATE_LOAD_4:		t0_data_in	<= t1_data_out;
+			default:					t0_data_in	<= 32'dX;
+		endcase		
+		//
+	end
+	
+	
+		//
+		// Double Multiplier
+		//
+	reg	mul_ena;
+	wire	mul_rdy_pp;
+	wire	mul_rdy_tp;
+	wire	mul_rdy_all = mul_rdy_pp & mul_rdy_tp;
+
+	modexpa7_systolic_multiplier #
+	(
+		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
+		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
+	)
+	mul_pp
+	(
+		.clk						(clk),
+		.rst_n					(rst_n),
+
+		.ena						(mul_ena),
+		.rdy						(mul_rdy_pp),
+
+		.a_bram_addr			(p1_addr_rd),
+		.b_bram_addr			(p2_addr_rd),
+		.n_bram_addr			(n1_bram_addr),
+		.n_coeff_bram_addr	(n_coeff1_bram_addr),
+		.r_bram_addr			(pp_addr_wr),
+
+		.a_bram_out				(p1_data_out),
+		.b_bram_out				(p2_data_out),
+		.n_bram_out				(n1_bram_out),
+		.n_coeff_bram_out		(n_coeff1_bram_out),
+
+		.r_bram_in				(pp_data_in),
+		.r_bram_wr				(pp_wren),
+
+		.ab_num_words			(n_num_words_latch)
+	);
+
+	modexpa7_systolic_multiplier #
+	(
+		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
+		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
+	)
+	mul_tp
+	(
+		.clk						(clk),
+		.rst_n					(rst_n),
+
+		.ena						(mul_ena),
+		.rdy						(mul_rdy_tp),
+
+		.a_bram_addr			(t2_addr_rd),
+		.b_bram_addr			(p3_addr_rd),
+		.n_bram_addr			(n2_bram_addr),
+		.n_coeff_bram_addr	(n_coeff2_bram_addr),
+		.r_bram_addr			(tp_addr_wr),
+
+		.a_bram_out				(t2_data_out),
+		.b_bram_out				(p3_data_out),
+		.n_bram_out				(n2_bram_out),
+		.n_coeff_bram_out		(n_coeff2_bram_out),
+
+		.r_bram_in				(tp_data_in),
+		.r_bram_wr				(tp_wren),
+
+		.ab_num_words			(n_num_words_latch)
+	);
+	
+	
+	always @(posedge clk)
+		//
+		mul_ena <= (fsm_next_state == FSM_STATE_CALC_1) ? 1'b1 : 1'b0;
+		
+		
+		//
+		// Bit Counter
+		//
+	reg	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt;
+		
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_zero = {{OPERAND_ADDR_WIDTH{1'b0}}, {5{1'b0}}};
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_last = d_num_bits_latch;
+	wire	[OPERAND_ADDR_WIDTH+4:0]	bit_cnt_next = bit_cnt + 1'b1;
+
+		/* handy flag */
+	wire	bit_cnt_done = (bit_cnt == bit_cnt_last) ? 1'b1 : 1'b0;
+	
+	always @(posedge clk)
+		//
+		if (fsm_next_state == FSM_STATE_LOAD_1)
+			//
+			case (fsm_state)
+				FSM_STATE_INIT_4: bit_cnt <= bit_cnt_zero;
+				FSM_STATE_NEXT:   bit_cnt <= !bit_cnt_done ? bit_cnt_next : bit_cnt;
+			endcase
+
+	
+
+		//
+		// FSM Process
+		//
+	always @(posedge clk or negedge rst_n)
+		//
+		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_IDLE;
+		else						fsm_state <= fsm_next_state;
+	
+	
+		//
+		// FSM Transition Logic
+		//
+	always @* begin
+		//
+		fsm_next_state = FSM_STATE_STOP;
+		//
+		case (fsm_state)
+			//
+			FSM_STATE_IDLE:		if (ena_trig)			fsm_next_state = FSM_STATE_INIT_1;
+										else						fsm_next_state = FSM_STATE_IDLE;
+			//
+			FSM_STATE_INIT_1:									fsm_next_state = FSM_STATE_INIT_2;
+			FSM_STATE_INIT_2:									fsm_next_state = FSM_STATE_INIT_3;
+			FSM_STATE_INIT_3:									fsm_next_state = FSM_STATE_INIT_4;
+			FSM_STATE_INIT_4:		if (t1_addr_done)		fsm_next_state = FSM_STATE_LOAD_1;
+										else						fsm_next_state = FSM_STATE_INIT_4;
+			//
+			FSM_STATE_LOAD_1:									fsm_next_state = FSM_STATE_LOAD_2;
+			FSM_STATE_LOAD_2:									fsm_next_state = FSM_STATE_LOAD_3;
+			FSM_STATE_LOAD_3:									fsm_next_state = FSM_STATE_LOAD_4;
+			FSM_STATE_LOAD_4:		if (t0_addr_done)		fsm_next_state = FSM_STATE_CALC_1;
+										else						fsm_next_state = FSM_STATE_LOAD_4;
+			//
+			FSM_STATE_CALC_1:									fsm_next_state = FSM_STATE_CALC_2;
+			FSM_STATE_CALC_2:		if (mul_rdy_all)		fsm_next_state = FSM_STATE_CALC_3;
+										else						fsm_next_state = FSM_STATE_CALC_2;
+			FSM_STATE_CALC_3:									fsm_next_state = FSM_STATE_FILL_1;
+			//
+			FSM_STATE_FILL_1:									fsm_next_state = FSM_STATE_FILL_2;
+			FSM_STATE_FILL_2:									fsm_next_state = FSM_STATE_FILL_3;
+			FSM_STATE_FILL_3:									fsm_next_state = FSM_STATE_FILL_4;
+			FSM_STATE_FILL_4:		if (p_addr_wr_done)	fsm_next_state = FSM_STATE_NEXT;
+										else						fsm_next_state = FSM_STATE_FILL_4;			
+			//
+			FSM_STATE_NEXT:		if (bit_cnt_done)		fsm_next_state = FSM_STATE_STOP;
+										else						fsm_next_state = FSM_STATE_LOAD_1;
+			//
+			FSM_STATE_STOP:									fsm_next_state = FSM_STATE_IDLE;
+			//
+		endcase
+		//
+	end
+			
+
+endmodule
diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v
index 7a2b8e9..d5284c9 100644
--- a/src/tb/modexp_fpga_model_vectors.v
+++ b/src/tb/modexp_fpga_model_vectors.v
@@ -30,6 +30,16 @@ localparam [383:0] M_FACTOR_384 =
 	 32'h91e92683, 32'hc483bb6c, 32'h0ee1571d, 32'h6e28c2f5, 
 	 32'hff5e6b61, 32'h65fb6164, 32'hd3651e5a, 32'h746b8ca0};
 
+localparam [383:0] D_384 =
+	{32'had24a30c, 32'h766d8dc3, 32'he2100b02, 32'h24d1c4b0, 
+	 32'hbb6a6342, 32'h577df9be, 32'h89bb1ec3, 32'hdc3259f0, 
+	 32'h1a343f93, 32'h57a12599, 32'ha328ae2f, 32'hf85ef401};
+
+localparam [383:0] S_384 =
+	{32'h65752d0f, 32'h9a017293, 32'h36bfa115, 32'h4a7a81fc, 
+	 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, 
+	 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819};
+
 localparam [511:0] M_512 =
 	{32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 
 	 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, 
@@ -66,3 +76,15 @@ localparam [511:0] M_FACTOR_512 =
 	 32'h663032a3, 32'h70734b62, 32'h2d30c132, 32'hefa75cc6, 
 	 32'h9f18b32a, 32'h97d6ddf8, 32'h2f6df2d0, 32'he9098874};
 
+localparam [511:0] D_512 =
+	{32'hc9686c43, 32'hbbe28d66, 32'h758ef8bc, 32'h9b7828e5, 
+	 32'h2ec2804a, 32'hb76745de, 32'h83fcbba0, 32'h2d9eba78, 
+	 32'h215f4cc2, 32'hf49387b3, 32'h8ed0b9dc, 32'h6c129231, 
+	 32'h944368be, 32'hdbf2db79, 32'h16323c49, 32'h34cdf801};
+
+localparam [511:0] S_512 =
+	{32'hcc2fc6b6, 32'he4849987, 32'h75773499, 32'hcb0792b0, 
+	 32'he79f4600, 32'hb2d739c5, 32'h1a661ac6, 32'hd3bf2db5, 
+	 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, 
+	 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203};
+
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
new file mode 100644
index 0000000..8ca9444
--- /dev/null
+++ b/src/tb/tb_exponentiator.v
@@ -0,0 +1,424 @@
+//======================================================================
+//
+// tb_expoentiator.v
+// -----------------------------------------------------------------------------
+// Testbench for Montgomery modular exponentiation block.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module tb_exponentiator;
+
+		//
+		// Test Vectors
+		//
+	`include "modexp_fpga_model_vectors.v";
+	
+		//
+		// Parameters
+		//
+	localparam NUM_WORDS_384 = 384 / 32;
+	localparam NUM_WORDS_512 = 512 / 32;
+		
+		//
+		// Clock (100 MHz)
+		//
+	reg clk = 1'b0;
+	always #5 clk = ~clk;
+			
+		//
+		// Inputs
+		//
+	reg				rst_n;
+	reg				ena;
+	
+	reg	[ 3: 0]	n_num_words;
+	reg	[ 8: 0]	d_num_bits;
+
+		//
+		// Outputs
+		//
+	wire				rdy;
+
+		//
+		// Integers
+		//
+	integer w;
+	
+		//
+		// BRAM Interfaces
+		//
+	wire	[ 3: 0]	core_m_addr;
+	wire	[ 3: 0]	core_d_addr;
+	wire	[ 3: 0]	core_n1_addr;
+	wire	[ 3: 0]	core_n2_addr;
+	wire	[ 3: 0]	core_n_coeff1_addr;
+	wire	[ 3: 0]	core_n_coeff2_addr;
+	wire	[ 3: 0]	core_r_addr;
+	
+	wire	[31: 0]	core_m_data;
+	wire	[31: 0]	core_d_data;
+	wire	[31: 0]	core_n1_data;
+	wire	[31: 0]	core_n2_data;
+	wire	[31: 0]	core_n_coeff1_data;
+	wire	[31: 0]	core_n_coeff2_data;
+	wire	[31: 0]	core_r_data_in;
+
+	wire				core_r_wren;
+
+	reg	[ 3: 0]	tb_mdn_addr;
+	reg	[ 3: 0]	tb_r_addr;
+
+	reg	[31:0]	tb_m_data;
+	reg	[31:0]	tb_d_data;
+	reg	[31:0]	tb_n_data;
+	reg	[31:0]	tb_n_coeff_data;
+	wire	[31:0]	tb_r_data;
+	
+	reg				tb_mdn_wren;
+	
+		//
+		// BRAMs
+		//
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_m (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_m_data), .a_out(),
+		.b_addr(core_m_addr), .b_out(core_m_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_d (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_d_data), .a_out(),
+		.b_addr(core_d_addr), .b_out(core_d_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_n1 (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+		.b_addr(core_n1_addr), .b_out(core_n1_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_n2 (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_data), .a_out(),
+		.b_addr(core_n2_addr), .b_out(core_n2_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_n_coeff1 (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+		.b_addr(core_n_coeff1_addr), .b_out(core_n_coeff1_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_n_coeff2 (.clk(clk),
+		.a_addr(tb_mdn_addr), .a_wr(tb_mdn_wren), .a_in(tb_n_coeff_data), .a_out(),
+		.b_addr(core_n_coeff2_addr), .b_out(core_n_coeff2_data));
+		
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(4))
+	bram_r (.clk(clk),
+		.a_addr(core_r_addr), .a_wr(core_r_wren), .a_in(core_r_data_in), .a_out(),
+		.b_addr(tb_r_addr), .b_out(tb_r_data));
+
+		//
+		// UUT
+		//
+	modexpa7_exponentiator #
+	(
+		.OPERAND_ADDR_WIDTH		(4),	// 32 * (2**4) = 512-bit operands
+		.SYSTOLIC_ARRAY_POWER	(2)	// 2 ** 2 = 4-tap systolic array
+	)
+	uut
+	(
+		.clk						(clk), 
+		.rst_n					(rst_n), 
+		
+		.ena						(ena), 
+		.rdy						(rdy), 
+		
+		.m_bram_addr			(core_m_addr),
+		.d_bram_addr			(core_d_addr),
+		.n1_bram_addr			(core_n1_addr),
+		.n2_bram_addr			(core_n2_addr),
+		.n_coeff1_bram_addr	(core_n_coeff1_addr),
+		.n_coeff2_bram_addr	(core_n_coeff2_addr),
+		.r_bram_addr			(core_r_addr),
+
+		.m_bram_out				(core_m_data), 
+		.d_bram_out				(core_d_data), 
+		.n1_bram_out			(core_n1_data), 
+		.n2_bram_out			(core_n2_data), 
+		.n_coeff1_bram_out	(core_n_coeff1_data), 
+		.n_coeff2_bram_out	(core_n_coeff1_data), 
+		
+		.r_bram_in				(core_r_data_in), 
+		.r_bram_wr				(core_r_wren), 
+		
+		.n_num_words			(n_num_words),
+		.d_num_bits				(d_num_bits)
+	);
+
+
+		//
+		// Script
+		//
+	initial begin
+
+		rst_n = 1'b0;
+		ena = 1'b0;
+		
+		#200;		
+		rst_n = 1'b1;
+		#100;
+		
+		test_exponent_384(M_FACTOR_384, D_384, N_384, N_COEFF_384, S_384);
+		//test_exponent_512(M_512);
+		
+	end
+      
+		
+		//
+		// Test Tasks
+		//
+		
+	task test_exponent_384;
+		//
+		input	[383:0] m;
+		input	[383:0] d;
+		input	[383:0] n;
+		input	[383:0] n_coeff;
+		input	[383:0] s;
+		reg   [383:0] r;
+		//
+		integer i;
+		//
+		begin
+			//						
+			n_num_words = 4'd11;								// set number of words
+			d_num_bits = 9'd383;								// set number of bits
+			//
+			write_memory_384(m, d, n, n_coeff);			// fill memory
+			
+			ena = 1;												// start operation
+			#10;													//
+			ena = 0;												// clear flag
+			
+			while (!rdy) #10;									// wait for operation to complete
+			read_memory_384(r);								// get result from memory
+						
+			$display("    calculated: %x", r);			// display result
+			$display("    expected:   %x", s);			//
+							
+				// check calculated value
+			if (r === s) begin
+				$display("        OK");
+				$display("SUCCESS: Test passed.");
+			end else begin
+				$display("        ERROR");
+				$display("FAILURE: Test not passed.");
+			end
+			//
+		end
+		//
+	endtask
+	/*
+	task test_factor_512;
+		//
+		input	[511:0] n;
+		reg	[511:0] f;
+		reg	[511:0] factor;
+		integer i;
+		//
+		begin
+			//			
+			calc_factor_512(n, f);	// calculate factor on-the-fly
+			
+				// make sure, that the value matches the one saved in the include file
+			if (f !== FACTOR_512) begin
+				$display("ERROR: Calculated factor value differs from the one in the test vector!");
+				$finish;
+			end
+			
+			
+			n_num_words = 4'd15;								// set number of words
+			write_memory_512(n);								// fill memory
+			
+			ena = 1;												// start operation
+			#10;													//
+			ena = 0;												// clear flag
+			
+			while (!rdy) #10;									// wait for operation to complete
+			read_memory_512(factor);						// get result from memory
+						
+			$display("    calculated: %x", factor);	// display result
+			$display("    expected:   %x", f);			//
+							
+				// check calculated value
+			if (f === factor) begin
+				$display("        OK");
+				$display("SUCCESS: Test passed.");
+			end else begin
+				$display("        ERROR");
+				$display("FAILURE: Test not passed.");
+			end
+			//
+		end
+		//
+	endtask
+	*/
+
+	//
+	// write_memory_384
+	//
+	task write_memory_384;
+		//
+		input	[383:0] m;
+		input	[383:0] d;
+		input	[383:0] n;
+		input	[383:0] n_coeff;
+		reg	[383:0] m_shreg;
+		reg	[383:0] d_shreg;
+		reg	[383:0] n_shreg;
+		reg	[383:0] n_coeff_shreg;
+		//
+		begin
+			//
+			tb_mdn_wren		= 1;			// start filling memories
+			m_shreg			= m;			// preload shift register
+			d_shreg			= d;			// preload shift register
+			n_shreg			= n;			// preload shift register
+			n_coeff_shreg	= n_coeff;	// preload shift register
+			//
+			for (w=0; w<NUM_WORDS_384; w=w+1) begin							// write all words
+				tb_mdn_addr			= w[3:0];											// set address
+				tb_m_data			= m_shreg[31:0];									// set data
+				tb_d_data			= d_shreg[31:0];									// set data
+				tb_n_data			= n_shreg[31:0];									// set data
+				tb_n_coeff_data	= n_coeff_shreg[31:0];							// set data
+				m_shreg				= {{32{1'bX}}, m_shreg[383:32]};				// update shift register
+				d_shreg				= {{32{1'bX}}, d_shreg[383:32]};				// update shift register
+				n_shreg				= {{32{1'bX}}, n_shreg[383:32]};				// update shift register
+				n_coeff_shreg		= {{32{1'bX}}, n_coeff_shreg[383:32]};		// update shift register
+				#10;																			// wait for 1 clock tick
+			end
+			//
+			tb_mdn_addr			= {4{1'bX}};	// wipe addresses
+			tb_m_data			= {32{1'bX}};	// wipe data
+			tb_d_data			= {32{1'bX}};	// wipe data
+			tb_n_data			= {32{1'bX}};	// wipe data
+			tb_n_coeff_data	= {32{1'bX}};	// wipe data
+			tb_mdn_wren	= 0;				// stop filling memory
+			//
+		end
+		//
+	endtask
+			
+	/*	
+	//
+	// write_memory_512
+	//
+	task write_memory_512;
+		//
+		input	[511:0] n;
+		reg	[511:0] n_shreg;
+		//
+		begin
+			//
+			tb_n_wren	= 1;	// start filling memories
+			n_shreg		= n;	// preload shift register
+			//
+			for (w=0; w<NUM_WORDS_512; w=w+1) begin				// write all words
+				tb_n_addr	= w[3:0];									// set address
+				tb_n_data	= n_shreg[31:0];							// set data
+				n_shreg		= {{32{1'bX}}, n_shreg[511:32]};		// update shift register
+				#10;															// wait for 1 clock tick
+			end
+			//
+			tb_n_addr	= {4{1'bX}};	// wipe addresses
+			tb_n_data	= {32{1'bX}};	// wipe data
+			tb_n_wren	= 0;				// stop filling memory
+			//
+		end
+		//
+	endtask
+	*/
+
+	//
+	// read_memory_384
+	//
+	task read_memory_384;
+		//
+		output	[383:0] r;
+		reg		[383:0] r_shreg;
+		//
+		begin
+			//
+			for (w=0; w<NUM_WORDS_384; w=w+1) begin		// read result word-by-word
+				tb_r_addr	= w[3:0];							// set address
+				#10;													// wait for 1 clock tick
+				r_shreg = {tb_r_data, r_shreg[383:32]};	// store data word
+			end
+			//
+			tb_r_addr = {4{1'bX}};								// wipe address
+			r = r_shreg;											// return
+			//
+		end
+		//
+	endtask
+
+	/*
+	//
+	// read_memory_512
+	//
+	task read_memory_512;
+		//
+		output	[511:0] f;
+		reg		[511:0] f_shreg;
+		//
+		begin
+			//
+			for (w=0; w<NUM_WORDS_512; w=w+1) begin		// read result word-by-word
+				tb_f_addr	= w[3:0];							// set address
+				#10;													// wait for 1 clock tick
+				f_shreg = {tb_f_data, f_shreg[511:32]};	// store data word
+			end
+			//
+			tb_f_addr = {4{1'bX}};								// wipe address
+			f = f_shreg;											// return
+			//
+		end
+		//
+	endtask
+	*/
+
+endmodule
+
+//======================================================================
+// End of file
+//======================================================================

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list