[Cryptech-Commits] [user/shatov/modexpng] 14/14: Update STM32 demo driver.

git at cryptech.is git at cryptech.is
Tue Feb 11 13:04:36 UTC 2020


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit d35ff846a870c66406b251fac1ce20aafb43af96
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Feb 11 15:58:29 2020 +0300

    Update STM32 demo driver.
---
 stm32/modexpng_driver_sample.c | 134 ++++++++++++++++++++++++++---------------
 1 file changed, 87 insertions(+), 47 deletions(-)

diff --git a/stm32/modexpng_driver_sample.c b/stm32/modexpng_driver_sample.c
index f455b55..d87926a 100644
--- a/stm32/modexpng_driver_sample.c
+++ b/stm32/modexpng_driver_sample.c
@@ -199,15 +199,55 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
 		
 #define sign_using_crt(k,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,qinv,s,xm,ym) \
 			 _sign_handler  (k,1,f,m,n,nf,nc,x,y,p,q,pf,pc,qf,qc,dp,dq,NULL,qinv,s,xm,ym)
+
 		
-		
+//
+// dirty workarounds
+//
+#define _ntohl(n) (((((unsigned long)(n) & 0xFF))       << 24)| \
+                   ((((unsigned long)(n) & 0xFF00))     << 8) | \
+                   ((((unsigned long)(n) & 0xFF0000))   >> 8) | \
+                   ((((unsigned long)(n) & 0xFF000000)) >> 24))
+
+#define _htonl(n) (((((unsigned long)(n) & 0xFF))       << 24)| \
+                   ((((unsigned long)(n) & 0xFF00))     << 8) | \
+                   ((((unsigned long)(n) & 0xFF0000))   >> 8) | \
+                   ((((unsigned long)(n) & 0xFF000000)) >> 24))
+
+
+//
+// Core Offset
+//									 
+#define MODEXPNG_CORE_NUM 0x26
+
+
+//
+// more dirty workarounds
+//
+static void _fmc_read_32(uint32_t from_addr, uint32_t *to_ptr)
+{
+		uint32_t src_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + from_addr) & FMC_FPGA_ADDR_MASK);
+		uint32_t t = *((uint32_t *)src_addr);
+		*to_ptr = _ntohl(t);
+}
+
+static void _fmc_write_32(uint32_t to_addr, uint32_t value)
+{
+		uint32_t t = _htonl(value);
+		uint32_t dst_addr = FMC_FPGA_BASE_ADDR + (((256 << 2) * MODEXPNG_CORE_NUM + to_addr) & FMC_FPGA_ADDR_MASK);
+		*(uint32_t *)dst_addr = t;
+}
+
+
 //
 // test routine
 //
 int main()
 {
-		int ok;
-		int first_run;
+		int ok, first_run;
+		long long int iters;
+	
+		ok = sizeof iters;
 	
 		// initialize
 		stm_init();
@@ -224,9 +264,9 @@ int main()
 		uint32_t core_name1;
 		uint32_t core_version;
 	
-		fmc_read_32(CORE_ADDR_NAME0,   &core_name0);
-		fmc_read_32(CORE_ADDR_NAME1,   &core_name1);
-		fmc_read_32(CORE_ADDR_VERSION, &core_version);
+		_fmc_read_32(CORE_ADDR_NAME0,   &core_name0);
+		_fmc_read_32(CORE_ADDR_NAME1,   &core_name1);
+		_fmc_read_32(CORE_ADDR_VERSION, &core_version);
 	
 		// "mode", "xpng"
 		if ((core_name0 != 0x6D6F6465) || (core_name1 != 0x78706E67))
@@ -237,7 +277,7 @@ int main()
 		
 		// check, that reference code works correctly
 		ok = 1;
-		
+		/**/
 		ok = ok && check_montgomery_factor(1024, N_1024, N_FACTOR_1024);
 		ok = ok && check_montgomery_factor( 512, P_1024, P_FACTOR_1024);
 		ok = ok && check_montgomery_factor( 512, Q_1024, Q_FACTOR_1024);
@@ -247,17 +287,17 @@ int main()
 		ok = ok && check_montgomery_factor(4096, N_4096, N_FACTOR_4096);
 		ok = ok && check_montgomery_factor(2048, P_4096, P_FACTOR_4096);
 		ok = ok && check_montgomery_factor(2048, Q_4096, Q_FACTOR_4096);
-
+		/**//**/
 		ok = ok && check_modulus_coeff(1024, N_1024, N_COEFF_1024);
 		ok = ok && check_modulus_coeff( 512, P_1024, P_COEFF_1024);
 		ok = ok && check_modulus_coeff( 512, Q_1024, Q_COEFF_1024);
 		ok = ok && check_modulus_coeff(2048, N_2048, N_COEFF_2048);
 		ok = ok && check_modulus_coeff(1024, P_2048, P_COEFF_2048);
 		ok = ok && check_modulus_coeff(1024, Q_2048, Q_COEFF_2048);
-//	ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec)
+//  ok = ok && check_modulus_coeff(4096, N_4096, N_COEFF_4096); // SLOW (~20 sec)
 		ok = ok && check_modulus_coeff(2048, P_4096, P_COEFF_4096);
 		ok = ok && check_modulus_coeff(2048, Q_4096, Q_COEFF_4096);
-
+		/**/
 		if (!ok)
 		{		led_off(LED_GREEN);
 				led_on(LED_RED);
@@ -265,14 +305,14 @@ int main()
 		}
 		
 		// repeat forever
-		ok = 1, first_run = 1;
+		ok = 1, first_run = 1, iters = 0;
 		while (1)
-		{
+		{	
 				ok = ok && sign_without_crt(1024, first_run,
 						M_1024,  N_1024, N_FACTOR_1024, N_COEFF_1024,
 						X_1024,  Y_1024, D_1024,        S_1024,
 						XM_1024, YM_1024);
-
+				
 				ok = ok && sign_without_crt(2048, first_run,
 						M_2048,  N_2048, N_FACTOR_2048, N_COEFF_2048,
 						X_2048,  Y_2048, D_2048,        S_2048,
@@ -282,14 +322,14 @@ int main()
 						M_4096,  N_4096, N_FACTOR_4096, N_COEFF_4096,
 						X_4096,  Y_4096, D_4096,        S_4096,
 						XM_4096, YM_4096);
-
+			
 				ok = ok && sign_using_crt(1024, first_run,
 						M_1024,        N_1024,       N_FACTOR_1024, N_COEFF_1024,
 						X_1024,        Y_1024,       P_1024,        Q_1024,
 						P_FACTOR_1024, P_COEFF_1024, Q_FACTOR_1024, Q_COEFF_1024,
 						DP_1024,       DQ_1024,      QINV_1024,     S_1024,
 						XM_1024,       YM_1024);
-
+				
 				ok = ok && sign_using_crt(2048, first_run,
 						M_2048,        N_2048,       N_FACTOR_2048, N_COEFF_2048,
 						X_2048,        Y_2048,       P_2048,        Q_2048,
@@ -303,13 +343,13 @@ int main()
 						P_FACTOR_4096, P_COEFF_4096, Q_FACTOR_4096, Q_COEFF_4096,
 						DP_4096,       DQ_4096,      QINV_4096,     S_4096,
 						XM_4096,       YM_4096);
-
+				
 				if (!ok)
 				{		led_off(LED_GREEN);
 						led_on(LED_RED);
 				}
 
-				first_run = 0;
+				first_run = 0, iters++;
 				
 				toggle_yellow_led();
 		}
@@ -395,33 +435,33 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
 		// note, that n_coeff is one word larger, than the modulus, so we need a single
 		// extra write after the word-by-word loop
 		for (i=0, j=num_words-1; i<num_words; i++, j--)
-		{		              fmc_write_32(CORE_ADDR_BANK_M        + i * sizeof(uint32_t), m[j]);
-				              fmc_write_32(CORE_ADDR_BANK_N        + i * sizeof(uint32_t), n[j]);
-				              fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]);
-				              fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[j+1]);	// mind the +1
-			                fmc_write_32(CORE_ADDR_BANK_X        + i * sizeof(uint32_t), x[j]);
-			                fmc_write_32(CORE_ADDR_BANK_Y        + i * sizeof(uint32_t), y[j]);
-			  if (!use_crt) fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), d[j]);
-				else					fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), 0);
+		{		              _fmc_write_32(CORE_ADDR_BANK_M        + i * sizeof(uint32_t), m[j]);
+				              _fmc_write_32(CORE_ADDR_BANK_N        + i * sizeof(uint32_t), n[j]);
+				              _fmc_write_32(CORE_ADDR_BANK_N_FACTOR + i * sizeof(uint32_t), n_factor[j]);
+				              _fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[j+1]);	// mind the +1
+			                _fmc_write_32(CORE_ADDR_BANK_X        + i * sizeof(uint32_t), x[j]);
+			                _fmc_write_32(CORE_ADDR_BANK_Y        + i * sizeof(uint32_t), y[j]);
+			  if (!use_crt) _fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), d[j]);
+				else					_fmc_write_32(CORE_ADDR_BANK_D        + i * sizeof(uint32_t), 0);
 		}
-		fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[0]);	// j+1 is 0 by now, i is num_words
-		
+		_fmc_write_32(CORE_ADDR_BANK_N_COEFF  + i * sizeof(uint32_t), n_coeff[0]);	// j+1 is 0 by now, i is num_words
+				
 		// also fill in all the input values necessary for CRT mode
 		// again, we need to write a pair of extra words for p_coeff and q_coeff after the loop
 		if (use_crt)
 		{		for (i=0, j=num_words_half-1; i<num_words_half; i++, j--)
-				{		fmc_write_32(CORE_ADDR_BANK_P        + i * sizeof(uint32_t), p[j]);
-						fmc_write_32(CORE_ADDR_BANK_Q        + i * sizeof(uint32_t), q[j]);
-						fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]);
-						fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1!
-						fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]);
-						fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1!
-						fmc_write_32(CORE_ADDR_BANK_DP       + i * sizeof(uint32_t), dp[j]);
-						fmc_write_32(CORE_ADDR_BANK_DQ       + i * sizeof(uint32_t), dq[j]);
-						fmc_write_32(CORE_ADDR_BANK_QINV     + i * sizeof(uint32_t), qinv[j]);
+				{		_fmc_write_32(CORE_ADDR_BANK_P        + i * sizeof(uint32_t), p[j]);
+						_fmc_write_32(CORE_ADDR_BANK_Q        + i * sizeof(uint32_t), q[j]);
+						_fmc_write_32(CORE_ADDR_BANK_P_FACTOR + i * sizeof(uint32_t), p_factor[j]);
+						_fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[j+1]); // mind the +1!
+						_fmc_write_32(CORE_ADDR_BANK_Q_FACTOR + i * sizeof(uint32_t), q_factor[j]);
+						_fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[j+1]); // mind the +1!
+						_fmc_write_32(CORE_ADDR_BANK_DP       + i * sizeof(uint32_t), dp[j]);
+						_fmc_write_32(CORE_ADDR_BANK_DQ       + i * sizeof(uint32_t), dq[j]);
+						_fmc_write_32(CORE_ADDR_BANK_QINV     + i * sizeof(uint32_t), qinv[j]);
 			  }
-				fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[0]);	// j+1 is 0 by now, i is num_words_half
-				fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[0]);	// j+1 is 0 by now, i is num_words_half
+				_fmc_write_32(CORE_ADDR_BANK_P_COEFF  + i * sizeof(uint32_t), p_coeff[0]);	// j+1 is 0 by now, i is num_words_half
+				_fmc_write_32(CORE_ADDR_BANK_Q_COEFF  + i * sizeof(uint32_t), q_coeff[0]);	// j+1 is 0 by now, i is num_words_half
 		}
 		
 		// set parameters (there's no need to divide key length by two when CRT is enabled,
@@ -430,22 +470,22 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
 		reg_modulus_bits  = key_length;
 		reg_exponent_bits = key_length;
 		
-		fmc_write_32(CORE_ADDR_MODE,          reg_mode);
-		fmc_write_32(CORE_ADDR_MODULUS_BITS,  reg_modulus_bits);
-		fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits);
+		_fmc_write_32(CORE_ADDR_MODE,          reg_mode);
+		_fmc_write_32(CORE_ADDR_MODULUS_BITS,  reg_modulus_bits);
+		_fmc_write_32(CORE_ADDR_EXPONENT_BITS, reg_exponent_bits);
 	
 		// clear 'next' control bit, then set 'next' control bit again to trigger new operation
 		reg_control = 0;
-		fmc_write_32(CORE_ADDR_CONTROL, reg_control);
+		_fmc_write_32(CORE_ADDR_CONTROL, reg_control);
 		reg_control = CORE_CONTROL_BIT_NEXT;
-		fmc_write_32(CORE_ADDR_CONTROL, reg_control);
+		_fmc_write_32(CORE_ADDR_CONTROL, reg_control);
 
 		// wait for 'ready' status bit to be set, also turn on the blue LED while the
 		// core is busy to allow precise measurement with a scope
 		num_cyc = 0;
 		do
 		{		num_cyc++;
-				fmc_read_32(CORE_ADDR_STATUS, &reg_status);
+				_fmc_read_32(CORE_ADDR_STATUS, &reg_status);
 		}
 		while (!(reg_status & CORE_STATUS_BIT_VALID));
 		
@@ -460,9 +500,9 @@ int _sign_handler(uint32_t key_length, uint32_t use_crt, uint32_t first_run,
 		// always stay the same, so we always verify it
 		uint32_t s_word, xm_word, ym_word;
 		for (i=0, j=num_words-1; i<num_words; i++, j--)
-		{		fmc_read_32(CORE_ADDR_BANK_S  + i * sizeof(uint32_t), &s_word);
-				fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word);
-				fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word);
+		{		_fmc_read_32(CORE_ADDR_BANK_S  + i * sizeof(uint32_t), &s_word);
+				_fmc_read_32(CORE_ADDR_BANK_XM + i * sizeof(uint32_t), &xm_word);
+				_fmc_read_32(CORE_ADDR_BANK_YM + i * sizeof(uint32_t), &ym_word);
 
 				if (s_word != s[j]) return 0;
 				



More information about the Commits mailing list