[Cryptech-Commits] [user/shatov/curve25519_fpga_model] 01/04: Reworked Ed25519-specific microcode, added magic markers to allow automatic parsing.

git at cryptech.is git at cryptech.is
Mon Oct 15 13:17:10 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/curve25519_fpga_model.

commit 266984f6cb40b79e924eb31a48f055484b5626e2
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 15 16:01:34 2018 +0300

    Reworked Ed25519-specific microcode, added magic markers to allow automatic
    parsing.
---
 ed25519/ed25519_fpga_curve_microcode.cpp | 164 +++++++++++++++++++------------
 1 file changed, 100 insertions(+), 64 deletions(-)

diff --git a/ed25519/ed25519_fpga_curve_microcode.cpp b/ed25519/ed25519_fpga_curve_microcode.cpp
index 25826c5..37bbd1f 100644
--- a/ed25519/ed25519_fpga_curve_microcode.cpp
+++ b/ed25519/ed25519_fpga_curve_microcode.cpp
@@ -139,7 +139,10 @@ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPG
 	K_INT.words[FPGA_OPERAND_NUM_WORDS-1] &= 0x3FFFFFFF;
 	K_INT.words[FPGA_OPERAND_NUM_WORDS-1] |= 0x40000000;
 
-		// initialization
+
+	/* BEGIN_MICROCODE: PREPARE */
+
+		// initialize
 	uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI);
 	uop_move(BANK_HI, CONST_ONE,  BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI);
 	uop_move(BANK_HI, CONST_ONE,  BANK_LO, CYCLE_R0_Z, BUF_LO, BUF_HI);
@@ -150,6 +153,8 @@ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPG
 	uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_R1_Z, BUF_LO, BUF_HI);
 	uop_move(BANK_HI, CONST_G_T, BANK_LO, CYCLE_R1_T, BUF_LO, BUF_HI);
 
+	/* END_MICROCODE */
+
 		// multiply
 	for (word_count=0; word_count<FPGA_OPERAND_NUM_WORDS; word_count++)
 	{
@@ -163,6 +168,8 @@ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPG
 			{
 				// U = R0
 				// V = R1
+                    
+				/* BEGIN_MICROCODE: BEFORE_ROUND_K1 */
 
 				uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_U_X, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI);
@@ -172,12 +179,16 @@ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPG
 				uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_V_T, BUF_LO, BUF_HI);
+
+				/* END_MICROCODE */
 			}
 			else
 			{
 				// U = R1
 				// V = R0
 
+				/* BEGIN_MICROCODE: BEFORE_ROUND_K0 */
+
 				uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_V_X, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_V_Y, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_V_Z, BUF_LO, BUF_HI);
@@ -186,111 +197,136 @@ void fpga_curve_ed25519_base_scalar_multiply_microcode(const FPGA_BUFFER *K, FPG
 				uop_move(BANK_LO, CYCLE_R1_Y, BANK_HI, CYCLE_U_Y, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R1_Z, BANK_HI, CYCLE_U_Z, BUF_LO, BUF_HI);
 				uop_move(BANK_LO, CYCLE_R1_T, BANK_HI, CYCLE_U_T, BUF_LO, BUF_HI);
+
+				/* END_MICROCODE */
 			}
 
-				// S = double(U)
-			uop_calc(MUL, BANK_HI,  CYCLE_U_X, CYCLE_U_X, BANK_LO, PROC_A, BUF_LO, BUF_HI,    MOD_2P);		// fpga_modular_mul(P_X, P_X,  &A, &CURVE25519_2P);
-			uop_calc(MUL, BANK_HI,  CYCLE_U_Y, CYCLE_U_Y, BANK_LO, PROC_B, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_mul(P_Y, P_Y,  &B, &CURVE25519_2P);
+			/* BEGIN_MICROCODE: DURING_ROUND */
 
-			uop_calc(MUL, BANK_HI,  CYCLE_U_Z, CYCLE_U_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_mul(P_Z, P_Z,  &I, &CURVE25519_2P);
-			uop_calc(ADD, BANK_LO,  PROC_I,    PROC_I,    BANK_HI, PROC_C, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_add( &I,  &I,  &C, &CURVE25519_2P);
-			uop_calc(ADD, BANK_HI,  CYCLE_U_X, CYCLE_U_Y, BANK_LO, PROC_I, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_add(P_X, P_Y,  &I, &CURVE25519_2P);
-			uop_calc(MUL, BANK_LO,  PROC_I,    PROC_I,    BANK_HI, PROC_D, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_mul( &I,  &I,  &D, &CURVE25519_2P);
+				// S = double(U)
+			uop_calc(MUL, BANK_HI,  CYCLE_U_X, CYCLE_U_X, BANK_LO, PROC_A, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(MUL, BANK_HI,  CYCLE_U_Y, CYCLE_U_Y, BANK_LO, PROC_B, BUF_LO, BUF_HI,     MOD_2P); 
+			uop_calc(MUL, BANK_HI,  CYCLE_U_Z, CYCLE_U_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(ADD, BANK_LO,  PROC_I,    PROC_I,    BANK_HI, PROC_C, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(ADD, BANK_HI,  CYCLE_U_X, CYCLE_U_Y, BANK_LO, PROC_I, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(MUL, BANK_LO,  PROC_I,    PROC_I,    BANK_HI, PROC_D, BUF_LO, BUF_HI,     MOD_2P);
 
-			uop_calc(ADD, BANK_LO,  PROC_A,    PROC_B,    BANK_HI, PROC_H, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_add( &A,  &B,  &H, &CURVE25519_2P);
-			uop_calc(SUB, BANK_HI,  PROC_H,    PROC_D,    BANK_LO, PROC_E, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_sub( &H,  &D,  &E, &CURVE25519_2P);
-			uop_calc(SUB, BANK_LO,  PROC_A,    PROC_B,    BANK_HI, PROC_G, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_sub( &A,  &B,  &G, &CURVE25519_2P);
-			uop_calc(ADD, BANK_HI,  PROC_C,    PROC_G,    BANK_LO, PROC_F, BUF_LO, BUF_HI,     MOD_2P);		// fpga_modular_add( &C,  &G,  &F, &CURVE25519_2P);
+			uop_calc(ADD, BANK_LO,  PROC_A,    PROC_B,    BANK_HI, PROC_H, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(SUB, BANK_HI,  PROC_H,    PROC_D,    BANK_LO, PROC_E, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(SUB, BANK_LO,  PROC_A,    PROC_B,    BANK_HI, PROC_G, BUF_LO, BUF_HI,     MOD_2P);
+			uop_calc(ADD, BANK_HI,  PROC_C,    PROC_G,    BANK_LO, PROC_F, BUF_LO, BUF_HI,     MOD_2P);
 
-			uop_move2(BANK_HI, PROC_G, PROC_H, BANK_LO, PROC_G, PROC_H, BUF_LO, BUF_HI);
+			uop_move(BANK_HI, PROC_G, BANK_LO, PROC_G, BUF_LO, BUF_HI);
+			uop_move(BANK_HI, PROC_H, BANK_LO, PROC_H, BUF_LO, BUF_HI);
 
-			uop_calc(MUL, BANK_LO,  PROC_E,    PROC_F,    BANK_HI, CYCLE_S_X, BUF_LO, BUF_HI,  MOD_2P);		// fpga_modular_mul( &E,  &F, Q_X, &CURVE25519_2P);
-			uop_calc(MUL, BANK_LO,  PROC_G,    PROC_H,    BANK_HI, CYCLE_S_Y, BUF_LO, BUF_HI,  MOD_2P);		// fpga_modular_mul( &G,  &H, Q_Y, &CURVE25519_2P);
+			uop_calc(MUL, BANK_LO,  PROC_E,    PROC_F,    BANK_HI, CYCLE_S_X, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO,  PROC_G,    PROC_H,    BANK_HI, CYCLE_S_Y, BUF_LO, BUF_HI,  MOD_2P);
 
-			uop_calc(MUL, BANK_LO,  PROC_E,    PROC_H,    BANK_HI, CYCLE_S_T, BUF_LO, BUF_HI,  MOD_2P);		// fpga_modular_mul( &E,  &H, Q_T, &CURVE25519_2P);
-			uop_calc(MUL, BANK_LO,  PROC_F,    PROC_G,    BANK_HI, CYCLE_S_Z, BUF_LO, BUF_HI,  MOD_2P);		// fpga_modular_mul( &F,  &G, Q_Z, &CURVE25519_2P);
+			uop_calc(MUL, BANK_LO,  PROC_E,    PROC_H,    BANK_HI, CYCLE_S_T, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO,  PROC_F,    PROC_G,    BANK_HI, CYCLE_S_Z, BUF_LO, BUF_HI,  MOD_2P);
 
 				// T = add(S, V)
-			uop_calc(SUB, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_sub(S_Y, S_X,  &I, &CURVE25519_2P);	// I = (qy - qx) % mod
-			uop_calc(ADD, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add(V_Y, V_X,  &J, &CURVE25519_2P);	// J = (py + px) % mod
-			uop_calc(MUL, BANK_LO, PROC_I,    PROC_J,    BANK_HI, PROC_A, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &I,  &J,  &A, &CURVE25519_2P);	// A = ( I *  J) % mod
-
-			uop_calc(ADD, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add(S_Y, S_X,  &I, &CURVE25519_2P);	// I = (qy + qx) % mod
-			uop_calc(SUB, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_sub(V_Y, V_X,  &J, &CURVE25519_2P);	// J = (py - px) % mod
-			uop_calc(MUL, BANK_LO, PROC_I,    PROC_J,    BANK_HI, PROC_B, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &I,  &J,  &B, &CURVE25519_2P);	// B = ( I *  J) % mod
-
-			uop_calc(MUL, BANK_HI, CYCLE_S_Z, CYCLE_V_T, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul(S_Z, V_T,  &I, &CURVE25519_2P);	// I = (qz * pt) % mod
-			uop_calc(ADD, BANK_LO, PROC_I,    PROC_I,    BANK_HI, PROC_C, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add( &I,  &I,  &C, &CURVE25519_2P);	// C = ( I +  I) % mod
-			uop_calc(MUL, BANK_HI, CYCLE_S_T, CYCLE_V_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul(S_T, V_Z,  &I, &CURVE25519_2P);	// I = (qt * pz) % mod
-			uop_calc(ADD, BANK_LO, PROC_I,    PROC_I,    BANK_HI, PROC_D, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add( &I,  &I,  &D, &CURVE25519_2P);	// D = ( I + I) % mod
-
-			uop_calc(ADD, BANK_HI, PROC_C,    PROC_D,    BANK_LO, PROC_E, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add( &D,  &C,  &E, &CURVE25519_2P);	// E = (D + C) % mod
-			uop_calc(SUB, BANK_HI, PROC_B,    PROC_A,    BANK_LO, PROC_F, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_sub( &B,  &A,  &F, &CURVE25519_2P);	// F = (B - A) % mod
-			uop_calc(ADD, BANK_HI, PROC_B,    PROC_A,    BANK_LO, PROC_G, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_add( &B,  &A,  &G, &CURVE25519_2P);	// G = (B + A) % mod
-			uop_calc(SUB, BANK_HI, PROC_D,    PROC_C,    BANK_LO, PROC_H, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_sub( &D,  &C,  &H, &CURVE25519_2P);	// H = (D - C) % mod
-
-			uop_calc(MUL, BANK_LO, PROC_E,    PROC_F,    BANK_HI, CYCLE_T_X, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &E,  &F, R_X, &CURVE25519_2P);	// rx = (E * F) % mod
-			uop_calc(MUL, BANK_LO, PROC_G,    PROC_H,    BANK_HI, CYCLE_T_Y, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &G,  &H, R_Y, &CURVE25519_2P);	// ry = (G * H) % mod
-			uop_calc(MUL, BANK_LO, PROC_E,    PROC_H,    BANK_HI, CYCLE_T_T, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &E,  &H, R_T, &CURVE25519_2P);	// rt = (E * H) % mod
-			uop_calc(MUL, BANK_LO, PROC_F,    PROC_G,    BANK_HI, CYCLE_T_Z, BUF_LO, BUF_HI,  MOD_2P);	//fpga_modular_mul( &F,  &G, R_Z, &CURVE25519_2P);	// rz = (F * G) % mod	
+			uop_calc(SUB, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(ADD, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO, PROC_I,    PROC_J,    BANK_HI, PROC_A, BUF_LO, BUF_HI,  MOD_2P);
+
+			uop_calc(ADD, BANK_HI, CYCLE_S_Y, CYCLE_S_X, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(SUB, BANK_HI, CYCLE_V_Y, CYCLE_V_X, BANK_LO, PROC_J, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO, PROC_I,    PROC_J,    BANK_HI, PROC_B, BUF_LO, BUF_HI,  MOD_2P);
+
+			uop_calc(MUL, BANK_HI, CYCLE_S_Z, CYCLE_V_T, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(ADD, BANK_LO, PROC_I,    PROC_I,    BANK_HI, PROC_C, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_HI, CYCLE_S_T, CYCLE_V_Z, BANK_LO, PROC_I, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(ADD, BANK_LO, PROC_I,    PROC_I,    BANK_HI, PROC_D, BUF_LO, BUF_HI,  MOD_2P);
+
+			uop_calc(ADD, BANK_HI, PROC_C,    PROC_D,    BANK_LO, PROC_E, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(SUB, BANK_HI, PROC_B,    PROC_A,    BANK_LO, PROC_F, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(ADD, BANK_HI, PROC_B,    PROC_A,    BANK_LO, PROC_G, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(SUB, BANK_HI, PROC_D,    PROC_C,    BANK_LO, PROC_H, BUF_LO, BUF_HI,  MOD_2P);
+
+			uop_calc(MUL, BANK_LO, PROC_E,    PROC_F,    BANK_HI, CYCLE_T_X, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO, PROC_G,    PROC_H,    BANK_HI, CYCLE_T_Y, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO, PROC_E,    PROC_H,    BANK_HI, CYCLE_T_T, BUF_LO, BUF_HI,  MOD_2P);
+			uop_calc(MUL, BANK_LO, PROC_F,    PROC_G,    BANK_HI, CYCLE_T_Z, BUF_LO, BUF_HI,  MOD_2P);
+
+			/* END_MICROCODE */
 			
 			if (k_bit)
 			{
 				// R0 = T
 
-				uop_move2(BANK_HI, CYCLE_T_X, CYCLE_T_Y, BANK_LO, CYCLE_R0_X, CYCLE_R0_Y, BUF_LO, BUF_HI);
-				uop_move2(BANK_HI, CYCLE_T_Z, CYCLE_T_T, BANK_LO, CYCLE_R0_Z, CYCLE_R0_T, BUF_LO, BUF_HI);
+				/* BEGIN_MICROCODE: AFTER_ROUND_K1 */
+
+				uop_move(BANK_HI, CYCLE_T_X, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_Y, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_Z, BANK_LO, CYCLE_R0_Z, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_T, BANK_LO, CYCLE_R0_T, BUF_LO, BUF_HI);
+
+				/* END_MICROCODE */
 			}
 			else
 			{
 				// R1 = T
 
-				uop_move2(BANK_HI, CYCLE_T_X, CYCLE_T_Y, BANK_LO, CYCLE_R1_X, CYCLE_R1_Y, BUF_LO, BUF_HI);
-				uop_move2(BANK_HI, CYCLE_T_Z, CYCLE_T_T, BANK_LO, CYCLE_R1_Z, CYCLE_R1_T, BUF_LO, BUF_HI);
+				/* BEGIN_MICROCODE: AFTER_ROUND_K0 */
+
+				uop_move(BANK_HI, CYCLE_T_X, BANK_LO, CYCLE_R1_X, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_Y, BANK_LO, CYCLE_R1_Y, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_Z, BANK_LO, CYCLE_R1_Z, BUF_LO, BUF_HI);
+				uop_move(BANK_HI, CYCLE_T_T, BANK_LO, CYCLE_R1_T, BUF_LO, BUF_HI);
+
+				/* END_MICROCODE */
 			}
 		}
 	}
 
+	/* BEGIN_MICROCODE: BEFORE_INVERSION */
+
 		// inversion expects result to be in LO: T1
-	uop_move2(BANK_LO, CYCLE_R0_Z, CYCLE_R0_Z, BANK_HI, CYCLE_R0_Z, CYCLE_R0_Z, BUF_LO, BUF_HI);	
-	uop_move2(BANK_HI, CYCLE_R0_Z, CYCLE_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI);	
+	uop_move(BANK_LO, CYCLE_R0_Z, BANK_HI, CYCLE_R0_Z, BUF_LO, BUF_HI);
+	uop_move(BANK_HI, CYCLE_R0_Z, BANK_LO, INVERT_T_1, BUF_LO, BUF_HI);
+
+	/* END_MICROCODE */
 
 		// just call piece of microcode
 	fpga_modular_inv_microcode(BUF_LO, BUF_HI);
 
-		// inversion places result in HI: R1
-		// coordinates are in HI: R0_X, R0_Y
+	/* BEGIN_MICROCODE: AFTER_INVERSION */
 
-	uop_move2(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI);
+		// inversion places result in HI: R1
+		// coordinates are in LO: R0_X, R0_Y
+	uop_move(BANK_HI, INVERT_R1, BANK_LO, INVERT_R1, BUF_LO, BUF_HI);
 	uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_X, BANK_HI, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_2P);
 	uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0_Y, BANK_HI, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_2P);
 
+	/* END_MICROCODE */
+
+	/* BEGIN_MICROCODE: FINAL_REDUCTION */
+
 		// finally reduce to just 1*P
-	uop_calc(ADD, BANK_HI, CYCLE_R0_X, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_1P);	// !!!
-	uop_calc(ADD, BANK_HI, CYCLE_R0_Y, CONST_ZERO, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_1P);	// !!!
+	uop_calc(ADD, BANK_HI, CYCLE_R0_X, CONST_ZERO, BANK_LO, CYCLE_R0_X, BUF_LO, BUF_HI, MOD_1P);
+	uop_calc(ADD, BANK_HI, CYCLE_R0_Y, CONST_ZERO, BANK_LO, CYCLE_R0_Y, BUF_LO, BUF_HI, MOD_1P);
 
-		// poke sign bit
+	/* END_MICROCODE */
+
+	// poke sign bit
 	BUF_LO[CYCLE_R0_Y].words[FPGA_OPERAND_NUM_WORDS-1] |=
 		(FPGA_WORD)((BUF_LO[CYCLE_R0_X].words[0] & (FPGA_WORD)1) << 31);
 
-		// store result
-	uop_stor(BUF_LO, BUF_HI, BANK_LO, CYCLE_R0_Y, QY);
+	/* BEGIN_MICROCODE: HANDLE_SIGN */
 
+	uop_move(BANK_LO, CYCLE_R0_X, BANK_HI, CYCLE_R0_X, BUF_LO, BUF_HI);
 
+	/* END_MICROCODE */
 
-	/*
-		// process "sign" of x, see this Cryptography Stack Exchange
-		// answer for more details:
-		//
-		// https://crypto.stackexchange.com/questions/58921/decoding-a-ed25519-key-per-rfc8032
-		//
-		// the short story is that odd values of x are negative, so we
-		// just copy the lsb of x into msb of y
-	R0_Y.words[FPGA_OPERAND_NUM_WORDS-1] |= (R0_X.words[0] & (FPGA_WORD)1) << 31;
+	/* BEGIN_MICROCODE: OUTPUT */
+
+	uop_move(BANK_LO, CYCLE_R0_Y, BANK_HI, CYCLE_R0_Y, BUF_LO, BUF_HI);
+
+	/* END_MICROCODE */
 
 		// store result
-	fpga_multiword_copy(&R0_Y, QY);
-	*/
+	uop_stor(BUF_LO, BUF_HI, BANK_LO, CYCLE_R0_Y, QY);
 }
 
 



More information about the Commits mailing list