[Cryptech-Commits] [core/math/modexpa7] 04/06: Updated STM32 demo program to show how to use the precomputation block.

git at cryptech.is git at cryptech.is
Sun Sep 3 21:36:51 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic_crt
in repository core/math/modexpa7.

commit 058c54213a307fd360df1486f5d369d04b3a84d9
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Sep 4 00:14:09 2017 +0300

    Updated STM32 demo program to show how to use the precomputation block.
---
 src/rtl/modexpa7_top.v             |  40 +++++--
 src/stm32/modexpa7_driver_sample.c | 236 ++++++++++++++++++++++++++-----------
 src/tb/tb_wrapper.v                | 131 +++++++++++++++++++-
 3 files changed, 322 insertions(+), 85 deletions(-)

diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index 7723b88..ea3d2c2 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -109,24 +109,38 @@ module modexpa7_top #
 	reg valid_reg = 1'b0;
 	
 	assign ready = ready_reg;
-	assign valid = valid_reg;
+	assign valid = valid_reg;
+	
+	reg	init_trig_latch;
+	reg	next_trig_latch;
+	
+	always @(posedge clk)
+		//
+		if (fsm_state == FSM_STATE_IDLE)
+			//
+			case ({next_trig, init_trig})
+				2'b00:	{next_trig_latch, init_trig_latch} <= 2'b00;		// do nothing
+				2'b01:	{next_trig_latch, init_trig_latch} <= 2'b01;		// precalculate
+				2'b10:	{next_trig_latch, init_trig_latch} <= 2'b10;		// exponentiate
+				2'b11:	{next_trig_latch, init_trig_latch} <= 2'b01;		// 'init' has priority over 'next'
+			endcase
 
 		// ready flag logic
    always @(posedge clk or negedge rst_n)
 		//
-		if (rst_n == 1'b0)							ready_reg <= 1'b0;	// reset flag to default state
+		if (rst_n == 1'b0)									ready_reg <= 1'b0;	// reset flag to default state
 		else case (fsm_state)
-			FSM_STATE_IDLE:	if (init_trig)		ready_reg <= 1'b0;	// clear flag when operation is started
-			FSM_STATE_STOP:	if (!ready_reg)	ready_reg <= 1'b1;	// set flag after operation is finished
+			FSM_STATE_IDLE:	if (init_trig)				ready_reg <= 1'b0;	// clear flag when operation is started
+			FSM_STATE_STOP:	if (init_trig_latch)		ready_reg <= 1'b1;	// set flag after operation is finished
 		endcase
 
 		// valid flag logic
    always @(posedge clk or negedge rst_n)
 		//
-		if (rst_n == 1'b0)							valid_reg <= 1'b0;	// reset flag to default state
+		if (rst_n == 1'b0)									valid_reg <= 1'b0;	// reset flag to default state
 		else case (fsm_state)
-			FSM_STATE_IDLE:	if (next_trig)		valid_reg <= 1'b0;	// clear flag when operation is started
-			FSM_STATE_STOP:	if (!valid_reg)	valid_reg <= 1'b1;	// set flag after operation is finished
+			FSM_STATE_IDLE:	if (next_trig)				valid_reg <= 1'b0;	// clear flag when operation is started
+			FSM_STATE_STOP:	if (next_trig_latch)		valid_reg <= 1'b1;	// set flag after operation is finished
 		endcase
 
 	
@@ -137,14 +151,20 @@ module modexpa7_top #
 	reg	[OPERAND_ADDR_WIDTH+4:0]	exponent_num_bits_latch;
 
 		// save number of words in modulus when pre-calculation has been triggered,
-		// i.e. user has apparently loaded a new modulus into the core
+		// i.e. user has apparently loaded a new modulus into the core
+		//
+		// we also need to update modulus length when user wants to exponentiate,
+		// because he could have done precomputation for some modulus, then used
+		// a different length modulus and then reverted back the original modulus
+		// without doing precomputation (dammit, spent whole day chasing this bug :(
 	always @(posedge clk)
 		//
-		if (fsm_next_state == FSM_STATE_PRECALC_START)
+		if ((fsm_next_state == FSM_STATE_PRECALC_START) ||
+			 (fsm_next_state == FSM_STATE_EXPONENT_START))
 			modulus_num_words_latch <= modulus_num_words;
 
 		// save number of bits in exponent when exponentiation has been triggered,
-		// i.e. user has loaded a new message into the core and wants exponentiate
+		// i.e. user has loaded a new message into the core and wants to exponentiate
 	always @(posedge clk)
 		//
 		if (fsm_next_state == FSM_STATE_EXPONENT_START)
diff --git a/src/stm32/modexpa7_driver_sample.c b/src/stm32/modexpa7_driver_sample.c
index 390c949..e1de2bd 100644
--- a/src/stm32/modexpa7_driver_sample.c
+++ b/src/stm32/modexpa7_driver_sample.c
@@ -59,12 +59,19 @@
 #define CORE_ADDR_BUFFER_BITS			(0x13 << 2)
 #define CORE_ADDR_ARRAY_BITS			(0x14 << 2)
 
+		// operand bank size
+#define BANK_LENGTH		0x200		// 0x200 = 512 bytes = 4096 bits
 
 		// locations of operand buffers
-#define CORE_ADDR_BANK_MODULUS		(0x800 + 0 * 0x200)
-#define CORE_ADDR_BANK_MESSAGE		(0x800 + 1 * 0x200)
-#define CORE_ADDR_BANK_EXPONENT		(0x800 + 2 * 0x200)
-#define CORE_ADDR_BANK_RESULT			(0x800 + 3 * 0x200)
+#define CORE_ADDR_BANK_MODULUS		(BANK_LENGTH * (8 + 0))
+#define CORE_ADDR_BANK_MESSAGE		(BANK_LENGTH * (8 + 1))
+#define CORE_ADDR_BANK_EXPONENT		(BANK_LENGTH * (8 + 2))
+#define CORE_ADDR_BANK_RESULT			(BANK_LENGTH * (8 + 3))
+
+#define CORE_ADDR_BANK_MODULUS_COEFF_OUT			(BANK_LENGTH * (8 + 4))
+#define CORE_ADDR_BANK_MODULUS_COEFF_IN				(BANK_LENGTH * (8 + 5))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT	(BANK_LENGTH * (8 + 6))
+#define CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN		(BANK_LENGTH * (8 + 7))
 
 		// bit maps
 #define CORE_CONTROL_BIT_INIT		0x00000001
@@ -75,6 +82,27 @@
 
 #define CORE_MODE_BIT_CRT				0x00000002
 
+		/*
+		 * zero operands
+		 */
+#define Z_384 \
+	{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_192 \
+	{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000}
+
+#define Z_512 \
+	{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000}
+
+#define Z_256 \
+	{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+	 0x00000000, 0x00000000, 0x00000000, 0x00000000}
 
 		/*
 		 * test vectors
@@ -83,11 +111,15 @@ static const uint32_t m_384[]	= M_384;
 static const uint32_t n_384[]	= N_384;
 static const uint32_t d_384[]	= D_384;
 static const uint32_t s_384[]	= S_384;
+static uint32_t n_coeff_384[]	= Z_384;
+static uint32_t factor_384[]	= Z_384;
 
 static const uint32_t m_512[]	= M_512;
 static const uint32_t n_512[]	= N_512;
 static const uint32_t d_512[]	= D_512;
 static const uint32_t s_512[]	= S_512;
+static uint32_t n_coeff_512[]	= Z_512;
+static uint32_t factor_512[]	= Z_512;
 
 static const uint32_t p_192[]		= P_192;
 static const uint32_t q_192[]		= Q_192;
@@ -95,6 +127,10 @@ static const uint32_t dp_192[]	= DP_192;
 static const uint32_t dq_192[]	= DQ_192;
 static const uint32_t mp_192[]	= MP_192;
 static const uint32_t mq_192[]	= MQ_192;
+static uint32_t p_coeff_192[]		= Z_192;
+static uint32_t q_coeff_192[]		= Z_192;
+static uint32_t factor_p_192[]	= Z_192;
+static uint32_t factor_q_192[]	= Z_192;
 
 static const uint32_t p_256[]		= P_256;
 static const uint32_t q_256[]		= Q_256;
@@ -102,7 +138,10 @@ static const uint32_t dp_256[]	= DP_256;
 static const uint32_t dq_256[]	= DQ_256;
 static const uint32_t mp_256[]	= MP_256;
 static const uint32_t mq_256[]	= MQ_256;
-
+static uint32_t p_coeff_256[]		= Z_256;
+static uint32_t q_coeff_256[]		= Z_256;
+static uint32_t factor_p_256[]	= Z_256;
+static uint32_t factor_q_256[]	= Z_256;
 
 
 		/*
@@ -110,16 +149,25 @@ static const uint32_t mq_256[]	= MQ_256;
 		 */
 void toggle_yellow_led(void);
 
-void setup_modexpa7(	const uint32_t *n, size_t l);
+void setup_modexpa7(	const uint32_t *n,
+														uint32_t *coeff,
+														uint32_t *factor,
+														size_t		l);
 
-int test_modexpa7(		const uint32_t *m,
+int test_modexpa7(		const uint32_t *n,
+											const uint32_t *m,
 											const uint32_t *d,
 											const uint32_t *s,
+											const uint32_t *coeff,
+											const uint32_t *factor,
 											      size_t    l);
 
-int test_modexpa7_crt(		const uint32_t *m,
+int test_modexpa7_crt(		const uint32_t *n,
+													const uint32_t *m,
 													const uint32_t *d,
 													const uint32_t *s,
+													const uint32_t *coeff,
+													const uint32_t *factor,
 																size_t    l);
 
 
@@ -148,10 +196,10 @@ int main()
 		fmc_read_32(CORE_ADDR_NAME1,   &core_name1);
 		fmc_read_32(CORE_ADDR_VERSION, &core_version);
 			
-				// must be "mode", "xpa7", "0.20"
+				// must be "mode", "xpa7", "0.25"
 		if (	(core_name0   != 0x6D6F6465) ||
 					(core_name1   != 0x78706137) ||
-					(core_version != 0x302E3230))
+					(core_version != 0x302E3235))
 		{
 				led_off(LED_GREEN);
 				led_on(LED_RED);
@@ -164,61 +212,63 @@ int main()
 	
 			// largest supported operand width, systolic array "power"
 		fmc_read_32(CORE_ADDR_BUFFER_BITS, &core_buffer_bits);
-		fmc_read_32(CORE_ADDR_ARRAY_BITS,  &core_array_bits);		
+		fmc_read_32(CORE_ADDR_ARRAY_BITS,  &core_array_bits);
+
+			//
+			// do pre-computation for all the moduli and store speed-up quantities,
+			// note that each key requires three precomputations: one for the entire
+			// public key and two for each of the corresponding private key components
+			//
+			// we set the 'init' control bit, wait for `ready' status bit to go high,
+			// then retrieve the calculated values from the corresponding "output" banks
+			//
+			// we turn off the green led and turn the yellow led during the process to
+			// get an idea of how long it takes
+			//
+	
+		led_off(LED_GREEN);
+		led_on(LED_YELLOW);
+
+			// 384-bit key and 192-bit primes
+		setup_modexpa7(n_384, n_coeff_384, factor_384,   384);
+		setup_modexpa7(p_192, p_coeff_192, factor_p_192, 192);
+		setup_modexpa7(q_192, q_coeff_192, factor_q_192, 192);
+		
+			// 512-bit key and 256-bit primes
+		setup_modexpa7(n_512, n_coeff_512, factor_512,   512);
+		setup_modexpa7(p_256, p_coeff_256, factor_p_256, 256);
+		setup_modexpa7(q_256, q_coeff_256, factor_q_256, 256);
+		
+		led_off(LED_YELLOW);
+		led_on(LED_GREEN);
+
 		
 			// repeat forever
 		while (1)
-		{
-						// New modulus requires precomputation of modulus-dependent
-						// speed-up coefficient, this must be done once per new
-						// modulus, i.e. when we're repeatedly signing with the
-						// same key, we only need to do precomputation once before
-						// starting the very first signing operation.
-			
+		{			
 						// fresh start
 				ok = 1;
-			
-				{		
-								// run precomputation of modulus-dependent factor for the 384-bit modulus
-						setup_modexpa7(n_384, 384);
-					
-								// try signing the message from the 384-bit test vector
-						ok = ok && test_modexpa7(m_384, d_384, s_384, 384);
-				}
-				{				
-								// run precomputation of modulus-dependent factor for the 512-bit modulus
-						setup_modexpa7(n_512, 512);
-					
-								// try signing the message from the 512-bit test vector
-						ok = ok && test_modexpa7(m_512, d_512, s_512, 512);
-				}
 
-				{				
-								// run precomputation of modulus-dependent factor for the first 192-bit part of 384-bit modulus
-						setup_modexpa7(p_192, 192);
-					
+				{
+								// try signing the message with the 384-bit test vector
+						ok = ok && test_modexpa7(n_384, m_384, d_384, s_384, n_coeff_384, factor_384, 384);
+
 								// try signing 384-bit base using 192-bit exponent
-						ok = ok && test_modexpa7_crt(m_384, dp_192, mp_192, 192);
-					
-								// run precomputation of modulus-dependent factor for the second 192-bit part of 384-bit modulus
-						setup_modexpa7(q_192, 192);
+						ok = ok && test_modexpa7_crt(p_192, m_384, dp_192, mp_192, p_coeff_192, factor_p_192, 192);
 					
 								// try signing 384-bit base using 192-bit exponent
-						ok = ok && test_modexpa7_crt(m_384, dq_192, mq_192, 192);
+						ok = ok && test_modexpa7_crt(q_192, m_384, dq_192, mq_192, q_coeff_192, factor_q_192, 192);
 				}
+
+				{
+								// try signing the message with the 512-bit test vector
+						ok = ok && test_modexpa7(n_512, m_512, d_512, s_512, n_coeff_512, factor_512, 512);			
 				
-				{				
-								// run precomputation of modulus-dependent factor for the first 256-bit part of 512-bit modulus
-						setup_modexpa7(p_256, 256);
-					
 								// try signing 512-bit base using 256-bit exponent
-						ok = ok && test_modexpa7_crt(m_512, dp_256, mp_256, 256);
-					
-								// run precomputation of modulus-dependent factor for the second 256-bit part of 512-bit modulus
-						setup_modexpa7(q_256, 256);
+						ok = ok && test_modexpa7_crt(p_256, m_512, dp_256, mp_256, p_coeff_256, factor_p_256, 256);
 					
 								// try signing 512-bit base using 256-bit exponent
-						ok = ok && test_modexpa7_crt(m_512, dq_256, mq_256, 256);
+						ok = ok && test_modexpa7_crt(q_256, m_512, dq_256, mq_256, q_coeff_256, factor_q_256, 256);
 				}
 				
 						// turn on the red led to indicate something went wrong
@@ -234,15 +284,18 @@ int main()
 
 
 		/*
-		 * Load new modulus and do the necessary precomputations.
+		 * Load new modulus and do all the necessary precomputations.
 		 */
 void setup_modexpa7(	const uint32_t *n,
+														uint32_t *coeff,
+														uint32_t *factor,
 										        size_t    l)
 {
 		size_t i, num_words;
 		uint32_t num_bits;
 		uint32_t reg_control, reg_status;
 		uint32_t n_word;
+		uint32_t coeff_word, factor_word;
 		uint32_t dummy_num_cyc;		
 	
 			// determine numbers of 32-bit words
@@ -250,10 +303,9 @@ void setup_modexpa7(	const uint32_t *n,
 	
 			// set modulus width
 		num_bits = l;
-		fmc_write_32(CORE_ADDR_MODULUS_BITS,  &num_bits);
+		fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
 	
-			// fill modulus bank (the least significant word
-			// is at the lowest offset)
+			// fill modulus bank (the least significant word is at the lowest offset)
 		for (i=0; i<num_words; i++)
 		{		n_word = n[i];
 				fmc_write_32(CORE_ADDR_BANK_MODULUS  + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
@@ -273,42 +325,70 @@ void setup_modexpa7(	const uint32_t *n,
 				fmc_read_32(CORE_ADDR_STATUS, &reg_status);
 		}
 		while (!(reg_status & CORE_STATUS_BIT_READY));
+		
+				// retrieve the modulus-dependent coefficient and Montgomery factor
+				// from the corresponding core "output" banks and store them for later use
+		for (i=0; i<num_words; i++)
+		{
+				fmc_read_32(CORE_ADDR_BANK_MODULUS_COEFF_OUT + i * sizeof(uint32_t), &coeff_word);	
+				coeff[i] = coeff_word;
+
+				fmc_read_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_OUT + i * sizeof(uint32_t), &factor_word);
+				factor[i] = factor_word;
+		}
 }
 
 
 		//
 		// Sign the message and compare it against the correct reference value.
 		//
-int test_modexpa7(	const uint32_t *m,
+int test_modexpa7(	const uint32_t *n,	
+										const uint32_t *m,
 										const uint32_t *d,
 										const uint32_t *s,
+										const uint32_t *coeff,
+										const uint32_t *factor,
 										      size_t    l)
 {
 		size_t i, num_words;
 		uint32_t num_bits;
 		uint32_t reg_control, reg_status;
-		uint32_t m_word, d_word, s_word;
+		uint32_t n_word, m_word, d_word, s_word;
+		uint32_t coeff_word, factor_word;
 		uint32_t dummy_num_cyc;		
 		uint32_t mode;
 		
 				// determine numbers of 32-bit words
 		num_words = l >> 5;
 	
-				// set exponent width
+				// set modulus width, exponent width
 		num_bits = l;
-		fmc_write_32(CORE_ADDR_EXPONENT_BITS,  &num_bits);
+		fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+		fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
 	
 				// disable CRT mode
 		mode = 0;
 		fmc_write_32(CORE_ADDR_MODE, &mode);
 	
-				// fill message and exponent banks (the least significant
-				// word is at the lowest offset)
+				// fill modulus, message and exponent banks (the least significant
+				// word is at the lowest offset), we also need to fill "input" core
+				// banks with previously pre-calculated and saved modulus-dependent
+				// speed-up coefficient and Montgomery factor
 		for (i=0; i<num_words; i++)
-		{		m_word = m[i];
+		{		
+				n_word = n[i];
+				m_word = m[i];
 				d_word = d[i];
+
+				fmc_write_32(CORE_ADDR_BANK_MODULUS  + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
 				fmc_write_32(CORE_ADDR_BANK_MESSAGE  + ((num_words - (i + 1)) * sizeof(uint32_t)), &m_word);
 				fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+
+				coeff_word = coeff[i];
+				factor_word = factor[i];
+			
+				fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN     + i * sizeof(uint32_t), &coeff_word);
+				fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
 		}
 
 				// clear 'next' control bit, then set 'next' control bit again
@@ -331,8 +411,7 @@ int test_modexpa7(	const uint32_t *m,
 		{		
 				fmc_read_32(CORE_ADDR_BANK_RESULT + (i * sizeof(uint32_t)), &s_word);
 			
-				if (s_word != s[num_words - (i + 1)])
-					return 0;
+				if (s_word != s[num_words - (i + 1)]) return 0;
 		}
 	
 				// everything went just fine
@@ -340,34 +419,49 @@ int test_modexpa7(	const uint32_t *m,
 }
 
 
-int test_modexpa7_crt(	const uint32_t *m,
+int test_modexpa7_crt(	const uint32_t *n,
+												const uint32_t *m,
 												const uint32_t *d,
 												const uint32_t *s,
+												const uint32_t *coeff,
+												const uint32_t *factor,
 															size_t    l)
 {
 		size_t i, num_words;
 		uint32_t num_bits;
 		uint32_t reg_control, reg_status;
-		uint32_t m_word, d_word, s_word;
+		uint32_t n_word, m_word, d_word, s_word;
+		uint32_t coeff_word, factor_word;
 		uint32_t dummy_num_cyc;		
 		uint32_t mode;
 		
 				// determine numbers of 32-bit words
 		num_words = l >> 5;
 	
-				// set exponent width
+				// set modulus width, exponent width
 		num_bits = l;
-		fmc_write_32(CORE_ADDR_EXPONENT_BITS,  &num_bits);
+		fmc_write_32(CORE_ADDR_MODULUS_BITS, &num_bits);
+		fmc_write_32(CORE_ADDR_EXPONENT_BITS, &num_bits);
 	
 				// enable CRT mode
 		mode = CORE_MODE_BIT_CRT;
 		fmc_write_32(CORE_ADDR_MODE, &mode);
 	
-				// fill exponent bank (the least significant word
-				// is at the lowest offset)
+				// fill modulus and exponent banks (the least significant word is at
+				// the lowest offset), we also need to fill "input" core banks with
+				// previously pre-calculated and saved modulus-dependent speed-up
+				// coefficient and Montgomery factor
 		for (i=0; i<num_words; i++)
-		{		d_word = d[i];
+		{		n_word = n[i];
+				d_word = d[i];
+				fmc_write_32(CORE_ADDR_BANK_MODULUS  + ((num_words - (i + 1)) * sizeof(uint32_t)), &n_word);
 				fmc_write_32(CORE_ADDR_BANK_EXPONENT + ((num_words - (i + 1)) * sizeof(uint32_t)), &d_word);
+			
+				coeff_word = coeff[i];
+				factor_word = factor[i];
+			
+				fmc_write_32(CORE_ADDR_BANK_MODULUS_COEFF_IN     + i * sizeof(uint32_t), &coeff_word);
+				fmc_write_32(CORE_ADDR_BANK_MONTGOMERY_FACTOR_IN + i * sizeof(uint32_t), &factor_word);
 		}
 
 				// fill message bank (the least significant word
diff --git a/src/tb/tb_wrapper.v b/src/tb/tb_wrapper.v
index fae0934..054333e 100644
--- a/src/tb/tb_wrapper.v
+++ b/src/tb/tb_wrapper.v
@@ -2,6 +2,13 @@
 
 module tb_wrapper;
 
+
+		//
+		// Test Vectors
+		//
+	`include "modexp_fpga_model_vectors.v";
+
+
 		/*
 		 * Settings
 		 */
@@ -25,7 +32,7 @@ module tb_wrapper;
 		  */
 	reg											bus_cs;
 	reg											bus_we;
-	reg	[USE_OPERAND_ADDR_WIDTH+2:0]	bus_addr;
+	reg	[USE_OPERAND_ADDR_WIDTH+3:0]	bus_addr;
 	reg	[                    32-1:0]	bus_wr_data;
 	wire	[                    32-1:0]	bus_rd_data;
 
@@ -47,7 +54,10 @@ module tb_wrapper;
 		.read_data	(bus_rd_data)
 	);
 
+	integer i;
 	reg	[31: 0]	tmp;
+	reg	[383:0]	shreg;
+	reg				poll;
 	initial begin
 		//
 		rst_n = 0;
@@ -74,11 +84,95 @@ module tb_wrapper;
 		write_reg('h11, 32'd384);	// MODULUS_BITS
 		read_reg ('h11, tmp);
 		//
+		write_reg('h10, 32'd0);		// MODE
+		read_reg ('h10, tmp);
+		//
+		// pre-calculate 384-bit quantities
+		//
+		shreg = N_384;
+		for (i=0; i<384/32; i=i+1) begin
+			write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+			shreg = shreg >> 32;
+		end
+		//
+		write_reg('h08, 32'd0);		// CONTROL.init = 0
+		write_reg('h08, 32'd1);		// CONTROL.init = 1
+		//
+		poll = 1;
+		while (poll) begin
+			#10;
+			read_reg('h09, tmp);		// tmp = STATUS
+			poll = ~tmp[0];			// poll = STATUS.ready
+		end
+		//
+		// fill banks
+		//
+		for (i=0; i<384/32; i=i+1) begin
+			read_bank(3'b100, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+			write_bank(3'b101, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+			read_bank(3'b110, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+			write_bank(3'b111, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+		end
+		//
+		shreg = M_384;
+		for (i=0; i<384/32; i=i+1) begin
+			write_bank(3'b001, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+			shreg = shreg >> 32;
+		end
+		//
+		shreg = D_384;
+		for (i=0; i<384/32; i=i+1) begin
+			write_bank(3'b010, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+			shreg = shreg >> 32;
+		end
+		//
+		// wipe
+		//
+		shreg = {384{1'b0}};
+		for (i=0; i<384/32; i=i+1) begin
+			write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+			shreg = shreg >> 32;
+		end
+		//
+		write_reg('h08, 32'd0);		// CONTROL.init = 0
+		write_reg('h08, 32'd1);		// CONTROL.init = 1
+		//
+		poll = 1;
+		while (poll) begin
+			#10;
+			read_reg('h09, tmp);		// tmp = STATUS
+			poll = ~tmp[0];			// poll = STATUS.ready
+		end
+		//
+		// restore
+		//
+		shreg = N_384;
+		for (i=0; i<384/32; i=i+1) begin
+			write_bank(3'b000, i[USE_OPERAND_ADDR_WIDTH-1:0], shreg[31:0]);
+			shreg = shreg >> 32;
+		end
+		//
+		//
+		//
+		write_reg('h08, 32'd0);		// CONTROL.next = 0
+		write_reg('h08, 32'd2);		// CONTROL.next = 1
+		//
+		poll = 1;
+		while (poll) begin
+			#10;
+			read_reg('h09, tmp);		// tmp = STATUS
+			poll = ~tmp[1];			// poll = STATUS.valid
+		end
+		//
+		for (i=0; i<384/32; i=i+1) begin
+			read_bank(3'b011, i[USE_OPERAND_ADDR_WIDTH-1:0], tmp);
+			shreg = {tmp, shreg[383:32]};
+		end
 		//
 	end
 	
 	task read_reg;
-		input		[USE_OPERAND_ADDR_WIDTH+1:0]	addr;
+		input		[USE_OPERAND_ADDR_WIDTH+2:0]	addr;
 		output	[                    32-1:0]	data;
 		begin
 			bus_cs = 1;
@@ -89,9 +183,23 @@ module tb_wrapper;
 			data = bus_rd_data;
 		end
 	endtask
+	
+	task read_bank;
+		input		[                       2:0]	bank;
+		input		[USE_OPERAND_ADDR_WIDTH-1:0]	addr;
+		output	[                    32-1:0]	data;
+		begin
+			bus_cs = 1;
+			bus_addr = {1'b1, bank, addr};
+			#10;
+			bus_cs = 0;
+			bus_addr = 'bX;
+			data = bus_rd_data;
+		end
+	endtask
 
 	task write_reg;
-		input		[USE_OPERAND_ADDR_WIDTH+1:0]	addr;
+		input		[USE_OPERAND_ADDR_WIDTH+2:0]	addr;
 		input		[                    32-1:0]	data;
 		begin
 			bus_cs = 1;
@@ -104,6 +212,21 @@ module tb_wrapper;
 			bus_addr = 'bX;
 		end
 	endtask
-      
+	
+	task write_bank;
+		input		[                       2:0]	bank;
+		input		[USE_OPERAND_ADDR_WIDTH-1:0]	addr;
+		input		[                    32-1:0]	data;
+		begin
+			bus_cs = 1;
+			bus_we = 1;
+			bus_addr = {1'b1, bank, addr};
+			bus_wr_data = data;
+			#10;
+			bus_cs = 0;
+			bus_we = 0;
+			bus_addr = 'bX;
+		end
+	endtask
 endmodule
 



More information about the Commits mailing list