[Cryptech-Commits] [core/math/modexpa7] 01/01: Work in progress.

git at cryptech.is git at cryptech.is
Thu Aug 10 22:17:46 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic_crt
in repository core/math/modexpa7.

commit e48040122cddd4374d5600b24807ef8189f1c0c2
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Fri Aug 11 01:16:48 2017 +0300

    Work in progress.
---
 src/rtl/modexpa7_exponentiator.v             | 344 +++++++++++++++++----------
 src/rtl/modexpa7_systolic_multiplier.v       |  21 +-
 src/rtl/modexpa7_systolic_multiplier_array.v |  11 +-
 src/rtl/pe/modexpa7_primitive_switch.v       |   2 +-
 src/tb/modexp_fpga_model_vectors.v           |  80 +++++++
 src/tb/tb_exponentiator.v                    | 139 ++++++++++-
 6 files changed, 461 insertions(+), 136 deletions(-)

diff --git a/src/rtl/modexpa7_exponentiator.v b/src/rtl/modexpa7_exponentiator.v
index b33360a..93c8047 100644
--- a/src/rtl/modexpa7_exponentiator.v
+++ b/src/rtl/modexpa7_exponentiator.v
@@ -58,6 +58,8 @@ module modexpa7_exponentiator #
 		input											ena,
 		output										rdy,
 		
+		input											crt,
+		
 		output	[OPERAND_ADDR_WIDTH-1:0]	m_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	d_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	f_bram_addr,
@@ -86,76 +88,120 @@ module modexpa7_exponentiator #
 		//
 		// FSM Declaration
 		//
-	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE	= 8'h00;
+	localparam	[ 7: 0]	FSM_STATE_EXP_IDLE		= 8'h00;
+	//
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_1		= 8'hA1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_2		= 8'hA2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_3		= 8'hA3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_4		= 8'hA4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_1		= 8'hB1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_2		= 8'hB2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_3		= 8'hB3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_4		= 8'hB4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_1		= 8'hC1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_2		= 8'hC2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_3		= 8'hC3;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_1		= 8'hD1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_2		= 8'hD2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_3		= 8'hD3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_4		= 8'hD4;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_NEXT		= 8'hE0;
+
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_1		= 8'hF1;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_2		= 8'hF2;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_3		= 8'hF3;
+	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_4		= 8'hF4;
+	//
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_1		= 8'h11;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_2		= 8'h12;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_3		= 8'h13;
+	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_4		= 8'h14;
+
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_1		= 8'h21;
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_2		= 8'h22;
+	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_3		= 8'h23;
 	//
-	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_1	= 8'hA1;
-	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_2	= 8'hA2;
-	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_3	= 8'hA3;
-	localparam	[ 7: 0]	FSM_STATE_EXP_INIT_4	= 8'hA4;
-
-	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_1	= 8'hB1;
-	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_2	= 8'hB2;
-	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_3	= 8'hB3;
-	localparam	[ 7: 0]	FSM_STATE_EXP_LOAD_4	= 8'hB4;
-
-	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_1	= 8'hC1;
-	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_2	= 8'hC2;
-	localparam	[ 7: 0]	FSM_STATE_EXP_CALC_3	= 8'hC3;
-
-	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_1	= 8'hD1;
-	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_2	= 8'hD2;
-	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_3	= 8'hD3;
-	localparam	[ 7: 0]	FSM_STATE_EXP_FILL_4	= 8'hD4;
-
-	localparam	[ 7: 0]	FSM_STATE_EXP_NEXT	= 8'hE0;
-
-	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_1	= 8'hF1;
-	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_2	= 8'hF2;
-	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_3	= 8'hF3;
-	localparam	[ 7: 0]	FSM_STATE_EXP_SAVE_4	= 8'hF4;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_1	= 8'h31;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_2	= 8'h32;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_3	= 8'h33;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_A_4	= 8'h34;
+
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_1	= 8'h41;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_2	= 8'h42;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_A_3	= 8'h43;
 	//
-	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_1	= 8'h11;
-	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_2	= 8'h12;
-	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_3	= 8'h13;
-	localparam	[ 7: 0]	FSM_STATE_MUL_INIT_4	= 8'h14;
-
-	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_1	= 8'h21;
-	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_2	= 8'h22;
-	localparam	[ 7: 0]	FSM_STATE_MUL_CALC_3	= 8'h23;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_1	= 8'h51;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_2	= 8'h52;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_3	= 8'h53;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_B_4	= 8'h54;
+
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_1	= 8'h61;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_2	= 8'h62;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_B_3	= 8'h63;
 	//
-	localparam	[ 7: 0]	FSM_STATE_EXP_STOP	= 8'hFF;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_1	= 8'h71;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_2	= 8'h72;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_3	= 8'h73;
+	localparam	[ 7: 0]	FSM_STATE_CRT_INIT_C_4	= 8'h74;
+
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_1	= 8'h81;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_2	= 8'h82;
+	localparam	[ 7: 0]	FSM_STATE_CRT_CALC_C_3	= 8'h83;
+	//
+	localparam	[ 7: 0]	FSM_STATE_EXP_STOP		= 8'hFF;
 
 
 	/*
 	 *  //
 	 *
-	 *  MUL_INIT:	P1 = F
-	 *             P2 = F
-	 *             P3 = F
-	 *             T2 = M
+	 *  MUL_INIT:		P1 <= F
+	 *             	P2 <= F
+	 *             	P3 <= F
+	 *            		T2 <= M
 	 *
-	 *  MUL_CALC:	TP = T2 * P3
+	 *  MUL_CALC:		TP = T2 * P3
 	 *
 	 *  //
 	 *
-	 *  EXP_INIT:	P1 <= TP
-	 *					P2 <= TP
-	 *					P3 <= TP
-	 *					T1 <= 1
-	 *					T2 <= 1
+	 *  CRT_INIT_A:	T2 <= M
+	 *
+	 *  CRT_CALC_A:	TP = T2 * P3 ("reduce only")
 	 *
-	 *  EXP_LOAD:	T0 <= T1
+	 *  CRT_INIT_B:  	P1 <= F
+	 *						P2 <= F
+	 *						P3 <= F
+	 *						T2 <= TP
 	 *
-	 *  EXP_CALC:	PP = P1 * P2
-	 *					TP = T2 * P3
+	 *  CRT_CALC_B:	TP = T2 * P3
 	 *
-	 *  EXP_FILL:	P1 <= PP
-	 *					P2 <= PP
-	 *					P3 <= PP
-	 *					T1 <= D[i] ? TP : T0
-	 *					T2 <= D[i] ? TP : T0
+	 *  CRT_INIT_C:  	T2 <= TP
 	 *
-	 *  EXP_SAVE:	R  <=  T1
+	 *  CRT_CALC_C:	TP = T2 * P3
+	 *
+	 *  //
+	 *
+	 *  EXP_INIT:		P1 <= TP
+	 *						P2 <= TP
+	 *						P3 <= TP
+	 *						T1 <= 1
+	 *						T2 <= 1
+	 *
+	 *  EXP_LOAD:		T0 <= T1
+	 *
+	 *  EXP_CALC:		PP = P1 * P2
+	 *						TP = T2 * P3
+	 *
+	 *  EXP_FILL:		P1 <= PP
+	 *						P2 <= PP
+	 *						P3 <= PP
+	 *						T1 <= D[i] ? TP : T0
+	 *						T2 <= D[i] ? TP : T0
+	 *
+	 *  EXP_SAVE:		R  <=  T1
 	 *
 	 *  //
 	 *
@@ -225,10 +271,12 @@ module modexpa7_exponentiator #
 		 */
 		
 		/* the very first addresses */
-	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_zero		= {{OPERAND_ADDR_WIDTH{1'b0}}};
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_zero			= {{OPERAND_ADDR_WIDTH{1'b0}}};
 	
 		/* the very last addresses */
-	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last		= {m_num_words_latch};
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last			= {m_num_words_latch};
+	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_addr_last_crt	=
+		{m_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
 
 		/* address registers */
 	reg	[OPERAND_ADDR_WIDTH-1:0]	m_addr;
@@ -261,16 +309,18 @@ module modexpa7_exponentiator #
 	wire	[OPERAND_ADDR_WIDTH-1:0]	tp_addr_rd_next	= tp_addr_rd + 1'b1;
 	
 		/* handy stop flags */
-	wire	m_addr_done			= (m_addr     == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	d_addr_done			= (d_addr     == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	f_addr_done			= (f_addr     == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	r_addr_done			= (r_addr     == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	t0_addr_done		= (t0_addr    == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	t1_addr_done		= (t1_addr    == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	t2_addr_wr_done	= (t2_addr_wr == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	p_addr_wr_done		= (p_addr_wr  == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	pp_addr_rd_done	= (pp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
-	wire	tp_addr_rd_done	= (tp_addr_rd == bram_addr_last) ? 1'b1 : 1'b0;
+	wire	m_addr_done				= (m_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	m_addr_done_crt		= (m_addr     == bram_addr_last_crt) ? 1'b1 : 1'b0;
+	wire	d_addr_done				= (d_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	f_addr_done				= (f_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	r_addr_done				= (r_addr     == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	t0_addr_done			= (t0_addr    == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	t1_addr_done			= (t1_addr    == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	t2_addr_wr_done		= (t2_addr_wr == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	t2_addr_wr_done_crt	= (t2_addr_wr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+	wire	p_addr_wr_done			= (p_addr_wr  == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	pp_addr_rd_done		= (pp_addr_rd == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	tp_addr_rd_done		= (tp_addr_rd == bram_addr_last)     ? 1'b1 : 1'b0;
 				
 		/* map registers to top-level ports */
 	assign m_bram_addr = m_addr;
@@ -392,10 +442,15 @@ module modexpa7_exponentiator #
 		// m_addr
 		//
 		case (fsm_next_state)
-			FSM_STATE_MUL_INIT_1: 	m_addr <= bram_addr_zero;
+			FSM_STATE_MUL_INIT_1: 		m_addr <= bram_addr_zero;
 			FSM_STATE_MUL_INIT_2,
 			FSM_STATE_MUL_INIT_3,
-			FSM_STATE_MUL_INIT_4:	m_addr <= !m_addr_done ? m_addr_next : m_addr;
+			FSM_STATE_MUL_INIT_4:		m_addr <= !m_addr_done ? m_addr_next : m_addr;
+			//
+			FSM_STATE_CRT_INIT_A_1: 	m_addr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_A_2,
+			FSM_STATE_CRT_INIT_A_3,
+			FSM_STATE_CRT_INIT_A_4:		m_addr <= !m_addr_done_crt ? m_addr_next : m_addr;
 		endcase
 		//
 		// d_addr
@@ -472,7 +527,10 @@ module modexpa7_exponentiator #
 			//
 			FSM_STATE_MUL_INIT_3:	t2_addr_wr <= bram_addr_zero;
 			FSM_STATE_MUL_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
-
+			//
+			FSM_STATE_CRT_INIT_A_3:	t2_addr_wr <= bram_addr_zero;
+			FSM_STATE_CRT_INIT_A_4:	t2_addr_wr <= t2_addr_wr_next;
+			//
 			FSM_STATE_EXP_INIT_3:	t2_addr_wr <= bram_addr_zero;
 			FSM_STATE_EXP_INIT_4:	t2_addr_wr <= t2_addr_wr_next;
 			//
@@ -554,6 +612,8 @@ module modexpa7_exponentiator #
 		case (fsm_next_state)
 			FSM_STATE_MUL_INIT_3,
 			FSM_STATE_MUL_INIT_4,
+			FSM_STATE_CRT_INIT_A_3,
+			FSM_STATE_CRT_INIT_A_4,
 			FSM_STATE_EXP_INIT_3,		
 			FSM_STATE_EXP_INIT_4,
 			FSM_STATE_EXP_FILL_3,
@@ -616,15 +676,19 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			//
-			FSM_STATE_MUL_INIT_3,	
-			FSM_STATE_MUL_INIT_4:	t2_data_in <= m_bram_out;
+			FSM_STATE_MUL_INIT_3,
+			FSM_STATE_MUL_INIT_4:		t2_data_in <= m_bram_out;
+			//
+			FSM_STATE_CRT_INIT_A_3,
+			FSM_STATE_CRT_INIT_A_4:		t2_data_in <= m_bram_out;
+
 			//
-			FSM_STATE_EXP_INIT_3:	t2_data_in <= 32'd1;
-			FSM_STATE_EXP_INIT_4:	t2_data_in <= 32'd0;
+			FSM_STATE_EXP_INIT_3:		t2_data_in <= 32'd1;
+			FSM_STATE_EXP_INIT_4:		t2_data_in <= 32'd0;
 			//
 			FSM_STATE_EXP_FILL_3,
-			FSM_STATE_EXP_FILL_4:	t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
-			default:						t2_data_in <= 32'dX;
+			FSM_STATE_EXP_FILL_4:		t2_data_in <= flag_update_r ? tp_data_out : t0_data_out;
+			default:							t2_data_in <= 32'dX;
 		endcase		
 		//
 	end
@@ -634,6 +698,7 @@ module modexpa7_exponentiator #
 		// Double Multiplier
 		//
 	reg	mul_ena;
+	reg	mul_crt;
 	wire	mul_rdy_pp;
 	wire	mul_rdy_tp;
 	wire	mul_rdy_all = mul_rdy_pp & mul_rdy_tp;
@@ -651,6 +716,8 @@ module modexpa7_exponentiator #
 		.ena						(mul_ena),
 		.rdy						(mul_rdy_pp),
 
+		.reduce_only			(1'b0),
+
 		.a_bram_addr			(p1_addr_rd),
 		.b_bram_addr			(p2_addr_rd),
 		.n_bram_addr			(n1_bram_addr),
@@ -681,6 +748,8 @@ module modexpa7_exponentiator #
 		.ena						(mul_ena),
 		.rdy						(mul_rdy_tp),
 
+		.reduce_only			(mul_crt),
+
 		.a_bram_addr			(t2_addr_rd),
 		.b_bram_addr			(p3_addr_rd),
 		.n_bram_addr			(n2_bram_addr),
@@ -703,8 +772,18 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_next_state)
 			FSM_STATE_MUL_CALC_1,
-			FSM_STATE_EXP_CALC_1:	mul_ena <= 1'b1;
-			default:						mul_ena <= 1'b0;
+			FSM_STATE_CRT_CALC_A_1,
+			FSM_STATE_CRT_CALC_B_1,
+			FSM_STATE_CRT_CALC_C_1,
+			FSM_STATE_EXP_CALC_1:		mul_ena <= 1'b1;
+			default:							mul_ena <= 1'b0;
+		endcase
+
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+			FSM_STATE_CRT_CALC_A_1:		mul_crt <= 1'b1;
+			default:							mul_crt <= 1'b0;
 		endcase
 			
 
@@ -726,53 +805,70 @@ module modexpa7_exponentiator #
 		//
 		case (fsm_state)
 			//
-			FSM_STATE_MUL_INIT_1:									fsm_next_state = FSM_STATE_MUL_INIT_2;
-			FSM_STATE_MUL_INIT_2:									fsm_next_state = FSM_STATE_MUL_INIT_3;
-			FSM_STATE_MUL_INIT_3:									fsm_next_state = FSM_STATE_MUL_INIT_4;
-			FSM_STATE_MUL_INIT_4:	if (t2_addr_wr_done)		fsm_next_state = FSM_STATE_MUL_CALC_1;
-											else							fsm_next_state = FSM_STATE_MUL_INIT_4;
-			//
-			FSM_STATE_MUL_CALC_1:									fsm_next_state = FSM_STATE_MUL_CALC_2;
-			FSM_STATE_MUL_CALC_2:	if (mul_rdy_tp)			fsm_next_state = FSM_STATE_MUL_CALC_3;
-											else							fsm_next_state = FSM_STATE_MUL_CALC_2;
-			FSM_STATE_MUL_CALC_3:									fsm_next_state = FSM_STATE_EXP_INIT_1;
-			//
-			FSM_STATE_EXP_IDLE:		if (ena_trig)				fsm_next_state = FSM_STATE_MUL_INIT_1;
-											else							fsm_next_state = FSM_STATE_EXP_IDLE;
-			//
-			FSM_STATE_EXP_INIT_1:									fsm_next_state = FSM_STATE_EXP_INIT_2;
-			FSM_STATE_EXP_INIT_2:									fsm_next_state = FSM_STATE_EXP_INIT_3;
-			FSM_STATE_EXP_INIT_3:									fsm_next_state = FSM_STATE_EXP_INIT_4;
-			FSM_STATE_EXP_INIT_4:	if (t1_addr_done)			fsm_next_state = FSM_STATE_EXP_LOAD_1;
-											else							fsm_next_state = FSM_STATE_EXP_INIT_4;
-			//
-			FSM_STATE_EXP_LOAD_1:									fsm_next_state = FSM_STATE_EXP_LOAD_2;
-			FSM_STATE_EXP_LOAD_2:									fsm_next_state = FSM_STATE_EXP_LOAD_3;
-			FSM_STATE_EXP_LOAD_3:									fsm_next_state = FSM_STATE_EXP_LOAD_4;
-			FSM_STATE_EXP_LOAD_4:	if (t0_addr_done)			fsm_next_state = FSM_STATE_EXP_CALC_1;
-											else							fsm_next_state = FSM_STATE_EXP_LOAD_4;
-			//
-			FSM_STATE_EXP_CALC_1:									fsm_next_state = FSM_STATE_EXP_CALC_2;
-			FSM_STATE_EXP_CALC_2:	if (mul_rdy_all)			fsm_next_state = FSM_STATE_EXP_CALC_3;
-											else							fsm_next_state = FSM_STATE_EXP_CALC_2;
-			FSM_STATE_EXP_CALC_3:									fsm_next_state = FSM_STATE_EXP_FILL_1;
-			//
-			FSM_STATE_EXP_FILL_1:									fsm_next_state = FSM_STATE_EXP_FILL_2;
-			FSM_STATE_EXP_FILL_2:									fsm_next_state = FSM_STATE_EXP_FILL_3;
-			FSM_STATE_EXP_FILL_3:									fsm_next_state = FSM_STATE_EXP_FILL_4;
-			FSM_STATE_EXP_FILL_4:	if (p_addr_wr_done)		fsm_next_state = FSM_STATE_EXP_NEXT;
-											else							fsm_next_state = FSM_STATE_EXP_FILL_4;			
-			//
-			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)			fsm_next_state = FSM_STATE_EXP_SAVE_1;
-											else							fsm_next_state = FSM_STATE_EXP_LOAD_1;
-			//
-			FSM_STATE_EXP_SAVE_1:									fsm_next_state = FSM_STATE_EXP_SAVE_2;
-			FSM_STATE_EXP_SAVE_2:									fsm_next_state = FSM_STATE_EXP_SAVE_3;
-			FSM_STATE_EXP_SAVE_3:									fsm_next_state = FSM_STATE_EXP_SAVE_4;
-			FSM_STATE_EXP_SAVE_4:	if (r_addr_done)			fsm_next_state = FSM_STATE_EXP_STOP;
-											else							fsm_next_state = FSM_STATE_EXP_SAVE_4;
-			//
-			FSM_STATE_EXP_STOP:										fsm_next_state = FSM_STATE_EXP_IDLE;
+			//
+			FSM_STATE_MUL_INIT_1:										fsm_next_state = FSM_STATE_MUL_INIT_2;
+			FSM_STATE_MUL_INIT_2:										fsm_next_state = FSM_STATE_MUL_INIT_3;
+			FSM_STATE_MUL_INIT_3:										fsm_next_state = FSM_STATE_MUL_INIT_4;
+			FSM_STATE_MUL_INIT_4:	if (t2_addr_wr_done)			fsm_next_state = FSM_STATE_MUL_CALC_1;
+											else								fsm_next_state = FSM_STATE_MUL_INIT_4;
+			//
+			FSM_STATE_MUL_CALC_1:										fsm_next_state = FSM_STATE_MUL_CALC_2;
+			FSM_STATE_MUL_CALC_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_MUL_CALC_3;
+											else								fsm_next_state = FSM_STATE_MUL_CALC_2;
+			FSM_STATE_MUL_CALC_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
+			//
+			//
+			FSM_STATE_CRT_INIT_A_1:										fsm_next_state = FSM_STATE_CRT_INIT_A_2;
+			FSM_STATE_CRT_INIT_A_2:										fsm_next_state = FSM_STATE_CRT_INIT_A_3;
+			FSM_STATE_CRT_INIT_A_3:										fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+			FSM_STATE_CRT_INIT_A_4:	if (t2_addr_wr_done_crt)	fsm_next_state = FSM_STATE_CRT_CALC_A_1;
+											else								fsm_next_state = FSM_STATE_CRT_INIT_A_4;
+
+			//
+			FSM_STATE_CRT_CALC_A_1:										fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+			FSM_STATE_CRT_CALC_A_2:	if (mul_rdy_tp)				fsm_next_state = FSM_STATE_CRT_CALC_A_3;
+											else								fsm_next_state = FSM_STATE_CRT_CALC_A_2;
+			FSM_STATE_CRT_CALC_A_3:										fsm_next_state = FSM_STATE_EXP_INIT_1;
+			//
+			//
+			FSM_STATE_EXP_IDLE:		if (ena_trig)					fsm_next_state = crt ?
+																					FSM_STATE_CRT_INIT_A_1 : FSM_STATE_MUL_INIT_1;
+											else								fsm_next_state = FSM_STATE_EXP_IDLE;
+			//
+			//
+			FSM_STATE_EXP_INIT_1:										fsm_next_state = FSM_STATE_EXP_INIT_2;
+			FSM_STATE_EXP_INIT_2:										fsm_next_state = FSM_STATE_EXP_INIT_3;
+			FSM_STATE_EXP_INIT_3:										fsm_next_state = FSM_STATE_EXP_INIT_4;
+			FSM_STATE_EXP_INIT_4:	if (t1_addr_done)				fsm_next_state = FSM_STATE_EXP_LOAD_1;
+											else								fsm_next_state = FSM_STATE_EXP_INIT_4;
+			//
+			FSM_STATE_EXP_LOAD_1:										fsm_next_state = FSM_STATE_EXP_LOAD_2;
+			FSM_STATE_EXP_LOAD_2:										fsm_next_state = FSM_STATE_EXP_LOAD_3;
+			FSM_STATE_EXP_LOAD_3:										fsm_next_state = FSM_STATE_EXP_LOAD_4;
+			FSM_STATE_EXP_LOAD_4:	if (t0_addr_done)				fsm_next_state = FSM_STATE_EXP_CALC_1;
+											else								fsm_next_state = FSM_STATE_EXP_LOAD_4;
+			//
+			FSM_STATE_EXP_CALC_1:										fsm_next_state = FSM_STATE_EXP_CALC_2;
+			FSM_STATE_EXP_CALC_2:	if (mul_rdy_all)				fsm_next_state = FSM_STATE_EXP_CALC_3;
+											else								fsm_next_state = FSM_STATE_EXP_CALC_2;
+			FSM_STATE_EXP_CALC_3:										fsm_next_state = FSM_STATE_EXP_FILL_1;
+			//
+			FSM_STATE_EXP_FILL_1:										fsm_next_state = FSM_STATE_EXP_FILL_2;
+			FSM_STATE_EXP_FILL_2:										fsm_next_state = FSM_STATE_EXP_FILL_3;
+			FSM_STATE_EXP_FILL_3:										fsm_next_state = FSM_STATE_EXP_FILL_4;
+			FSM_STATE_EXP_FILL_4:	if (p_addr_wr_done)			fsm_next_state = FSM_STATE_EXP_NEXT;
+											else								fsm_next_state = FSM_STATE_EXP_FILL_4;			
+			//
+			FSM_STATE_EXP_NEXT:		if (bit_cnt_done)				fsm_next_state = FSM_STATE_EXP_SAVE_1;
+											else								fsm_next_state = FSM_STATE_EXP_LOAD_1;
+			//
+			FSM_STATE_EXP_SAVE_1:										fsm_next_state = FSM_STATE_EXP_SAVE_2;
+			FSM_STATE_EXP_SAVE_2:										fsm_next_state = FSM_STATE_EXP_SAVE_3;
+			FSM_STATE_EXP_SAVE_3:										fsm_next_state = FSM_STATE_EXP_SAVE_4;
+			FSM_STATE_EXP_SAVE_4:	if (r_addr_done)				fsm_next_state = FSM_STATE_EXP_STOP;
+											else								fsm_next_state = FSM_STATE_EXP_SAVE_4;
+			//
+			FSM_STATE_EXP_STOP:											fsm_next_state = FSM_STATE_EXP_IDLE;
 			//
 		endcase
 		//
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 7293998..444693d 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -57,6 +57,8 @@ module modexpa7_systolic_multiplier #
 
 		input											ena,
 		output										rdy,
+
+		input											reduce_only,
 
 		output	[OPERAND_ADDR_WIDTH-1:0]	a_bram_addr,
 		output	[OPERAND_ADDR_WIDTH-1:0]	b_bram_addr,
@@ -155,7 +157,8 @@ module modexpa7_systolic_multiplier #
 		 * Parameters Latch
 		 */
 	reg	[OPERAND_ADDR_WIDTH-1:0]	n_num_words_latch;
-	reg	[OPERAND_ADDR_WIDTH  :0]	p_num_words_latch;
+	reg	[OPERAND_ADDR_WIDTH  :0]	p_num_words_latch;
+	reg										reduce_only_latch;
 
 		// save number of words in n when new operation starts
 	always @(posedge clk)
@@ -163,7 +166,12 @@ module modexpa7_systolic_multiplier #
 		if ((fsm_state == FSM_STATE_IDLE) && ena_trig)
 			n_num_words_latch <= n_num_words;
 			
+	always @(posedge clk)
+		//
+		if ((fsm_state == FSM_STATE_IDLE) && ena_trig)
+			reduce_only_latch <= reduce_only;
 			
+		
 		/*
 		 * Multiplication Phase
 		 */
@@ -174,6 +182,7 @@ module modexpa7_systolic_multiplier #
 	
 	reg	[ 1: 0]	mult_phase;
 	
+	wire	mult_phase_ab   = (mult_phase == MULT_PHASE_A_B)   ? 1'b1 : 1'b0;
 	wire	mult_phase_done = (mult_phase == MULT_PHASE_STALL) ? 1'b1 : 1'b0;
 	
    always @(posedge clk)
@@ -296,6 +305,7 @@ module modexpa7_systolic_multiplier #
 	wire	[OPERAND_ADDR_WIDTH  :0]	bram_addr_ext_last = {n_num_words_latch, 1'b1};
 
 		// address registers
+	wire	[OPERAND_ADDR_WIDTH-1:0]	a_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	b_addr;
 	reg	[OPERAND_ADDR_WIDTH-1:0]	n_addr;
 	wire	[OPERAND_ADDR_WIDTH  :0]	p_addr_ext_wr;
@@ -570,8 +580,9 @@ module modexpa7_systolic_multiplier #
 				MULT_PHASE_Q_N:			p_num_words_latch <= {n_num_words_latch, 1'b1};
 			endcase
 			
-	assign n_coeff_bram_addr = a_bram_addr;
-	assign q_addr_rd = a_bram_addr;
+	assign a_bram_addr = a_addr;
+	assign n_coeff_bram_addr = a_addr;
+	assign q_addr_rd = a_addr;
 	
 	reg	[31: 0]	a_data_out;
 	
@@ -597,12 +608,14 @@ module modexpa7_systolic_multiplier #
 		.ena					(pe_array_ena),
 		.rdy					(pe_array_rdy),
 
+		.crt					(reduce_only_latch && mult_phase_ab),
+
 		.loader_addr_rd	(loader_addr_rd),
 		
 		.pe_a_wide			({SYSTOLIC_ARRAY_LENGTH{a_data_out}}),
 		.pe_b_wide			(pe_b_wide),
 		
-		.a_bram_addr		(a_bram_addr),
+		.a_bram_addr		(a_addr),
 		
 		.p_bram_addr		(p_addr_ext_wr),
 		.p_bram_in			(p_data_in),
diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v
index 754203d..3280010 100644
--- a/src/rtl/modexpa7_systolic_multiplier_array.v
+++ b/src/rtl/modexpa7_systolic_multiplier_array.v
@@ -48,6 +48,8 @@ module modexpa7_systolic_multiplier_array #
 		input																				ena,
 		output																			rdy,
 
+		input																				crt,
+
 		output	[OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0]	loader_addr_rd,
 
 		input		[         32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0]	pe_a_wide,
@@ -385,6 +387,8 @@ module modexpa7_systolic_multiplier_array #
 	
 		// the very last address
 	wire	[OPERAND_ADDR_WIDTH - 1 : 0]	bram_addr_last     = n_num_words_latch;
+	wire	[OPERAND_ADDR_WIDTH - 1 : 0]	bram_addr_last_crt =
+		{n_num_words_latch[OPERAND_ADDR_WIDTH-2:0], 1'b1};
 	wire	[OPERAND_ADDR_WIDTH     : 0]	bram_addr_ext_last = p_num_words_latch;
 		
 		// registers
@@ -398,8 +402,9 @@ module modexpa7_systolic_multiplier_array #
 	wire	[OPERAND_ADDR_WIDTH     : 0]	p_addr_next = p_addr + 1'b1;
 	
 		// handy flags
-	wire	a_addr_done = (a_addr == bram_addr_last)     ? 1'b1 : 1'b0;
-	wire	p_addr_done = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
+	wire	a_addr_done     = (a_addr == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	a_addr_done_crt = (a_addr == bram_addr_last_crt) ? 1'b1 : 1'b0;
+	wire	p_addr_done     = (p_addr == bram_addr_ext_last) ? 1'b1 : 1'b0;
 	
 		// map top-level ports to internal registers
 	assign a_bram_addr	= a_addr;
@@ -452,7 +457,7 @@ module modexpa7_systolic_multiplier_array #
 		//
 		case (fsm_next_state)
 			FSM_STATE_MULT_START:	a_addr <= bram_addr_zero;
-			FSM_STATE_MULT_RELOAD:	a_addr <= !a_addr_done ? a_addr_next : a_addr;
+			FSM_STATE_MULT_RELOAD:	crt ? //a_addr <= !a_addr_done ? a_addr_next : a_addr;
 		endcase
 		//
 	end
diff --git a/src/rtl/pe/modexpa7_primitive_switch.v b/src/rtl/pe/modexpa7_primitive_switch.v
index fa958ec..17e8264 100644
--- a/src/rtl/pe/modexpa7_primitive_switch.v
+++ b/src/rtl/pe/modexpa7_primitive_switch.v
@@ -1,4 +1,4 @@
-`define USE_VENDOR_PRIMITIVES
+//`define USE_VENDOR_PRIMITIVES
 
 `ifdef USE_VENDOR_PRIMITIVES
 
diff --git a/src/tb/modexp_fpga_model_vectors.v b/src/tb/modexp_fpga_model_vectors.v
index d5284c9..c86f7ba 100644
--- a/src/tb/modexp_fpga_model_vectors.v
+++ b/src/tb/modexp_fpga_model_vectors.v
@@ -40,6 +40,46 @@ localparam [383:0] S_384 =
 	 32'ha76b945b, 32'h49a3f645, 32'h76801499, 32'hb98e6a16, 
 	 32'hd2467b6a, 32'h75b7d614, 32'h0fff0fde, 32'hb31d1819};
 
+localparam [191:0] P_192 =
+	{32'he9ac4cf6, 32'h03b2d80a, 32'h7f1d091e, 32'h49d5f1a0, 
+	 32'hac2ae4ff, 32'hbf9bf375};
+
+localparam [191:0] Q_192 =
+	{32'hc1468f3e, 32'hc6909231, 32'h5a4d74ba, 32'h477b303f, 
+	 32'h4b2e10d1, 32'h1f44e815};
+
+localparam [191:0] P_COEFF_192 =
+	{32'h8ba8d46c, 32'hb4ed830d, 32'hfbb97c6e, 32'h72d150d3, 
+	 32'h72d21392, 32'h70d2fb23};
+
+localparam [191:0] Q_COEFF_192 =
+	{32'hd863905a, 32'hc1541c8a, 32'h25952b0e, 32'ha62b0348, 
+	 32'h837f149f, 32'hd6cc58c3};
+
+localparam [191:0] FACTOR_P_192 =
+	{32'h886bad59, 32'h9bf7a46e, 32'h482ed232, 32'he55164cf, 
+	 32'hcb46a9e8, 32'he9bd888b};
+
+localparam [191:0] FACTOR_Q_192 =
+	{32'h324b776e, 32'h3734d186, 32'h73dc8796, 32'h9e1aba2c, 
+	 32'h4d5df285, 32'he97656b7};
+
+localparam [191:0] DP_192 =
+	{32'h69b6c286, 32'h95fbc613, 32'h51988034, 32'h8cb0d684, 
+	 32'h9aff38e4, 32'h9ef9ddb5};
+
+localparam [191:0] DQ_192 =
+	{32'h1eda82b7, 32'h84bf4377, 32'h39712ff7, 32'h24be179f, 
+	 32'ha302c190, 32'h80ab6159};
+
+localparam [191:0] MP_192 =
+	{32'h9e163bb5, 32'h35e718cb, 32'hcde52b7b, 32'h5db8552b, 
+	 32'h46a300e0, 32'h34f91e6b};
+
+localparam [191:0] MQ_192 =
+	{32'h7b01a724, 32'h90f0d5f9, 32'h9e237ce5, 32'h6d31fd28, 
+	 32'h4ecb9dad, 32'h58bf366a};
+
 localparam [511:0] M_512 =
 	{32'h005536b6, 32'h43ea651f, 32'h2fd3c70a, 32'ha83659cb, 
 	 32'hd0c1f47b, 32'ha8033730, 32'h29c6b082, 32'h6db48613, 
@@ -88,3 +128,43 @@ localparam [511:0] S_512 =
 	 32'hfd1e029d, 32'hfe887387, 32'h4312635f, 32'hb2b54b8d, 
 	 32'h5d3b379e, 32'h161eaa4f, 32'hedfd932b, 32'h780f0203};
 
+localparam [255:0] P_256 =
+	{32'hfedea889, 32'h97cfdb79, 32'hcca87074, 32'he5abcda1, 
+	 32'h3be201c4, 32'hc416fd15, 32'hf2130931, 32'h61ff5937};
+
+localparam [255:0] Q_256 =
+	{32'hf0889147, 32'h5aa60f93, 32'hb9927d86, 32'h8f795c5c, 
+	 32'h8e98dcf2, 32'had3aad74, 32'h9441583a, 32'h967dce41};
+
+localparam [255:0] P_COEFF_256 =
+	{32'h7af63ffc, 32'h428d9408, 32'h86e79fb9, 32'h018dad77, 
+	 32'h4ff704df, 32'h93effb1e, 32'h265d181a, 32'h47ae5379};
+
+localparam [255:0] Q_COEFF_256 =
+	{32'hd27f8aa0, 32'h9f2b9800, 32'h2dfd2392, 32'h4f868b9d, 
+	 32'h0fc51e1d, 32'h022de65b, 32'ha55f9ad1, 32'h0676be3f};
+
+localparam [255:0] FACTOR_P_256 =
+	{32'h1a5f27a1, 32'h8d16b0cb, 32'h8c2751b8, 32'h106a099c, 
+	 32'ha6efbadd, 32'hcb313a5f, 32'hf530eeb6, 32'hbbc7d8f5};
+
+localparam [255:0] FACTOR_Q_256 =
+	{32'h6794987c, 32'h932203a6, 32'h8c5b1e68, 32'h18d458e6, 
+	 32'h6737f12a, 32'h664d4187, 32'hc4ec03ba, 32'h4bd3d0c2};
+
+localparam [255:0] DP_256 =
+	{32'h2504d437, 32'hfffbe9e5, 32'hfc0aef22, 32'h9b8563bd, 
+	 32'haa83fe3b, 32'hc53b8d91, 32'h15731c5f, 32'hb6db2eeb};
+
+localparam [255:0] DQ_256 =
+	{32'hd3265fba, 32'h2eb65638, 32'h4d106ec7, 32'h000dfe69, 
+	 32'h75f87505, 32'h47d299d0, 32'h1c115cdd, 32'h599ca8c1};
+
+localparam [255:0] MP_256 =
+	{32'h23359955, 32'hcad299b6, 32'h049bb248, 32'h3828b6a5, 
+	 32'h74c85825, 32'h7dd8e109, 32'h07edbda9, 32'h4980c2c9};
+
+localparam [255:0] MQ_256 =
+	{32'h8578120b, 32'h91f4ca9e, 32'h371d3e70, 32'h0005bb89, 
+	 32'hd31ed864, 32'h477bd9cf, 32'h65a1f03b, 32'h606d3bc8};
+
diff --git a/src/tb/tb_exponentiator.v b/src/tb/tb_exponentiator.v
index 16be0a5..440fedc 100644
--- a/src/tb/tb_exponentiator.v
+++ b/src/tb/tb_exponentiator.v
@@ -63,6 +63,8 @@ module tb_exponentiator;
 	reg				rst_n;
 	reg				ena;
 	
+	reg				crt;
+	
 	reg	[ 3: 0]	n_num_words;
 	reg	[ 8: 0]	d_num_bits;
 
@@ -170,6 +172,8 @@ module tb_exponentiator;
 		.ena						(ena), 
 		.rdy						(rdy), 
 		
+		.crt						(crt),
+		
 		.m_bram_addr			(core_m_addr),
 		.d_bram_addr			(core_d_addr),
 		.f_bram_addr			(core_f_addr),
@@ -206,9 +210,14 @@ module tb_exponentiator;
 		#200;		
 		rst_n = 1'b1;
 		#100;
-		
-		test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
-		test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
+
+			// test "honest" exponentiation
+//		test_exponent_384(M_384, D_384, FACTOR_384, N_384, N_COEFF_384, S_384);
+//		test_exponent_512(M_512, D_512, FACTOR_512, N_512, N_COEFF_512, S_512);
+
+			// test crt mode
+		test_exponent_192(M_384, DP_192, FACTOR_P_192, P_192, P_COEFF_192, MP_192);
+		//test_exponent_192(M_384, DQ_192, FACTOR_Q_192, Q_192, Q_COEFF_192, MQ_192);
 		
 	end
       
@@ -216,7 +225,6 @@ module tb_exponentiator;
 		//
 		// Test Tasks
 		//
-		
 	task test_exponent_384;
 		//
 		input	[383:0] m;
@@ -234,6 +242,8 @@ module tb_exponentiator;
 			n_num_words = 4'd11;								// set number of words
 			d_num_bits = 9'd383;								// set number of bits
 			//
+			crt = 0;												// disable crt mode
+			//
 			write_memory_384(m, d, f, n, n_coeff);		// fill memory
 			
 			ena = 1;												// start operation
@@ -276,6 +286,8 @@ module tb_exponentiator;
 			n_num_words = 4'd15;								// set number of words
 			d_num_bits = 9'd511;								// set number of bits
 			//
+			crt = 0;												// disable crt mode
+			//
 			write_memory_512(m, d, f, n, n_coeff);		// fill memory
 			
 			ena = 1;												// start operation
@@ -301,6 +313,49 @@ module tb_exponentiator;
 		//
 	endtask
 
+	task test_exponent_192;
+		//
+		input	[383:0] m;
+		input	[191:0] d;
+		input [191:0] f;
+		input	[191:0] n;
+		input	[191:0] n_coeff;
+		input	[191:0] s;
+		reg   [191:0] r;
+		//
+		integer i;
+		//
+		begin
+			//						
+			n_num_words = 4'd5;								// set number of words
+			d_num_bits = 9'd191;								// set number of bits
+			//
+			crt = 1;												// enable crt mode
+			//
+			write_memory_192(m, d, f, n, n_coeff);		// fill memory
+			
+			ena = 1;												// start operation
+			#10;													//
+			ena = 0;												// clear flag
+			
+			while (!rdy) #10;									// wait for operation to complete
+			read_memory_192(r);								// get result from memory
+						
+			$display("    calculated: %x", r);			// display result
+			$display("    expected:   %x", s);			//
+							
+				// check calculated value
+			if (r === s) begin
+				$display("        OK");
+				$display("SUCCESS: Test passed.");
+			end else begin
+				$display("        ERROR");
+				$display("FAILURE: Test not passed.");
+			end
+			//
+		end
+		//
+	endtask
 
 	//
 	// write_memory_384
@@ -409,6 +464,59 @@ module tb_exponentiator;
 
 
 	//
+	// write_memory_192
+	//
+	task write_memory_192;
+		//
+		input	[383:0] m;
+		input	[191:0] d;
+		input	[191:0] f;
+		input	[191:0] n;
+		input	[191:0] n_coeff;
+		reg	[383:0] m_shreg;
+		reg	[191:0] f_shreg;
+		reg	[191:0] d_shreg;
+		reg	[191:0] n_shreg;
+		reg	[191:0] n_coeff_shreg;
+		//
+		begin
+			//
+			tb_mdfn_wren	= 1;			// start filling memories
+			m_shreg			= m;			// preload shift register
+			d_shreg			= d;			// preload shift register
+			f_shreg			= f;			// preload shift register
+			n_shreg			= n;			// preload shift register
+			n_coeff_shreg	= n_coeff;	// preload shift register
+			//
+			for (w=0; w<NUM_WORDS_384; w=w+1) begin							// write all words
+				tb_mdfn_addr		= w[3:0];											// set address
+				tb_m_data			= m_shreg[31:0];									// set data
+				tb_d_data			= d_shreg[31:0];									// set data
+				tb_f_data			= f_shreg[31:0];									// set data
+				tb_n_data			= n_shreg[31:0];									// set data
+				tb_n_coeff_data	= n_coeff_shreg[31:0];							// set data
+				m_shreg				= {{32{1'bX}}, m_shreg[383:32]};				// update shift register
+				d_shreg				= {{32{1'bX}}, d_shreg[191:32]};				// update shift register
+				f_shreg				= {{32{1'bX}}, f_shreg[191:32]};				// update shift register
+				n_shreg				= {{32{1'bX}}, n_shreg[191:32]};				// update shift register
+				n_coeff_shreg		= {{32{1'bX}}, n_coeff_shreg[191:32]};		// update shift register
+				#10;																			// wait for 1 clock tick
+			end
+			//
+			tb_mdfn_addr		= {4{1'bX}};	// wipe addresses
+			tb_m_data			= {32{1'bX}};	// wipe data
+			tb_d_data			= {32{1'bX}};	// wipe data
+			tb_f_data			= {32{1'bX}};	// wipe data
+			tb_n_data			= {32{1'bX}};	// wipe data
+			tb_n_coeff_data	= {32{1'bX}};	// wipe data
+			tb_mdfn_wren	= 0;				// stop filling memory
+			//
+		end
+		//
+	endtask
+
+
+	//
 	// read_memory_384
 	//
 	task read_memory_384;
@@ -455,6 +563,29 @@ module tb_exponentiator;
 		//
 	endtask
 
+	//
+	// read_memory_192
+	//
+	task read_memory_192;
+		//
+		output	[191:0] r;
+		reg		[191:0] r_shreg;
+		//
+		begin
+			//
+			for (w=0; w<NUM_WORDS_384/2; w=w+1) begin		// read result word-by-word
+				tb_r_addr	= w[3:0];							// set address
+				#10;													// wait for 1 clock tick
+				r_shreg = {tb_r_data, r_shreg[191:32]};	// store data word
+			end
+			//
+			tb_r_addr = {4{1'bX}};								// wipe address
+			r = r_shreg;											// return
+			//
+		end
+		//
+	endtask
+
 
 endmodule
 



More information about the Commits mailing list