[Cryptech-Commits] [user/shatov/x25519_fpga_model] 01/02: Microcode version. Works, but needs cleanup.

git at cryptech.is git at cryptech.is
Mon Jun 4 20:44:33 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/x25519_fpga_model.

commit 52bda013638cebbb3de74ee4f8c52b80262e477c
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Sun Jun 3 14:32:18 2018 +0300

    Microcode version. Works, but needs cleanup.
---
 x25519_fpga_curve.h             |   13 +-
 x25519_fpga_curve_abstract.cpp  |    2 +-
 x25519_fpga_curve_microcode.cpp | 1031 +++++++++++++++++++++++++++++++++++++++
 x25519_fpga_model.cpp           |    6 +
 4 files changed, 1050 insertions(+), 2 deletions(-)

diff --git a/x25519_fpga_curve.h b/x25519_fpga_curve.h
index be647c5..1d6914c 100644
--- a/x25519_fpga_curve.h
+++ b/x25519_fpga_curve.h
@@ -56,12 +56,23 @@ extern FPGA_BUFFER X25519_G_X;	// the base point
 extern FPGA_BUFFER X25519_A24;	// coefficient (A + 2) / 4
 
 
+//------------------------------------------------------------------------------
+// Implementation switch
+//------------------------------------------------------------------------------
+#ifdef USE_MICROCODE
+#define fpga_curve_scalar_multiply fpga_curve_scalar_multiply_microcode
+#else
+#define fpga_curve_scalar_multiply fpga_curve_scalar_multiply_abstract
+#endif
+
+
 //------------------------------------------------------------------------------
 // Prototypes
 //------------------------------------------------------------------------------
 void	fpga_curve_init				();
 
-void	fpga_curve_scalar_multiply	(FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+void	fpga_curve_scalar_multiply_abstract		(FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+void	fpga_curve_scalar_multiply_microcode	(FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
 
 void	fpga_curve_ladder_step		(FPGA_BUFFER *P_X,
 									 FPGA_BUFFER *R0_X_in,  FPGA_BUFFER *R0_Z_in,
diff --git a/x25519_fpga_curve_abstract.cpp b/x25519_fpga_curve_abstract.cpp
index b182ca3..769759a 100644
--- a/x25519_fpga_curve_abstract.cpp
+++ b/x25519_fpga_curve_abstract.cpp
@@ -82,7 +82,7 @@ void fpga_curve_init()
 // https://eprint.iacr.org/2017/264.pdf
 //
 //------------------------------------------------------------------------------
-void fpga_curve_scalar_multiply(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
+void fpga_curve_scalar_multiply_abstract(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
 //------------------------------------------------------------------------------
 {
 	int word_count, bit_count;	// counters
diff --git a/x25519_fpga_curve_microcode.cpp b/x25519_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..7d40d70
--- /dev/null
+++ b/x25519_fpga_curve_microcode.cpp
@@ -0,0 +1,1031 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_microcode.cpp
+// ---------------------------------------------------
+// Elliptic curve arithmetic procedures for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Locals
+//------------------------------------------------------------------------------
+static FPGA_BUFFER LADDER_R0_X;
+static FPGA_BUFFER LADDER_R0_Z;
+
+static FPGA_BUFFER LADDER_R1_X;
+static FPGA_BUFFER LADDER_R1_Z;
+
+static FPGA_BUFFER LADDER_T0_X;
+static FPGA_BUFFER LADDER_T0_Z;
+
+static FPGA_BUFFER LADDER_T1_X;
+static FPGA_BUFFER LADDER_T1_Z;
+
+static FPGA_BUFFER LADDER_S0;
+static FPGA_BUFFER LADDER_S1;
+static FPGA_BUFFER LADDER_D0;
+static FPGA_BUFFER LADDER_D1;
+static FPGA_BUFFER LADDER_QS0;
+static FPGA_BUFFER LADDER_QD0;
+static FPGA_BUFFER LADDER_S0D1;
+static FPGA_BUFFER LADDER_S1D0;
+static FPGA_BUFFER LADDER_TS;
+static FPGA_BUFFER LADDER_TD;
+static FPGA_BUFFER LADDER_QTD;
+static FPGA_BUFFER LADDER_T0;
+static FPGA_BUFFER LADDER_TA;
+static FPGA_BUFFER LADDER_T1;
+
+static FPGA_BUFFER REDUCE_R1;
+static FPGA_BUFFER REDUCE_R2;
+static FPGA_BUFFER REDUCE_T_1;
+static FPGA_BUFFER REDUCE_T_10;
+static FPGA_BUFFER REDUCE_T_1001;
+static FPGA_BUFFER REDUCE_T_1011;
+static FPGA_BUFFER REDUCE_T_X5;
+static FPGA_BUFFER REDUCE_T_X10;
+static FPGA_BUFFER REDUCE_T_X20;
+static FPGA_BUFFER REDUCE_T_X40;
+static FPGA_BUFFER REDUCE_T_X50;
+static FPGA_BUFFER REDUCE_T_X100;
+
+
+//------------------------------------------------------------------------------
+// Error Handle
+//------------------------------------------------------------------------------
+#define uop_fatal(msg)	{(void)printf("%s\n",msg);exit(EXIT_FAILURE);}
+
+
+//------------------------------------------------------------------------------
+// Storage Buffers
+//------------------------------------------------------------------------------
+static FPGA_BUFFER BANK_INT[4][64];
+static bool bank_flags[4][64];
+static FPGA_BUFFER BANK_EXT_X;
+static FPGA_BUFFER BANK_EXT_Y;
+
+//------------------------------------------------------------------------------
+enum UOP_BANK
+//------------------------------------------------------------------------------
+{
+	UOP_BANK_INT_A	= 0,
+	UOP_BANK_INT_B	= 1,
+	UOP_BANK_INT_C	= 2,
+	UOP_BANK_INT_D	= 3,
+	UOP_BANK_EXT
+};
+
+//------------------------------------------------------------------------------
+enum UOP_SRC_OPERAND
+//------------------------------------------------------------------------------
+{
+	UOP_SRC_INT_LADDER_R0_X		=  0,
+	UOP_SRC_INT_LADDER_R0_Z		=  1,
+
+	UOP_SRC_INT_LADDER_R1_X		=  2,
+	UOP_SRC_INT_LADDER_R1_Z		=  3,
+
+	UOP_SRC_INT_LADDER_T0_X		=  4,
+	UOP_SRC_INT_LADDER_T0_Z		=  5,
+
+	UOP_SRC_INT_LADDER_T1_X		=  6,
+	UOP_SRC_INT_LADDER_T1_Z		=  7,
+
+	UOP_SRC_INT_LADDER_S0		=  8,
+	UOP_SRC_INT_LADDER_S1		=  9,
+	UOP_SRC_INT_LADDER_D0		= 10,
+	UOP_SRC_INT_LADDER_D1		= 11,
+	UOP_SRC_INT_LADDER_QS0		= 12,
+	UOP_SRC_INT_LADDER_QD0		= 13,
+	UOP_SRC_INT_LADDER_S0D1		= 14,
+	UOP_SRC_INT_LADDER_S1D0		= 15,
+	UOP_SRC_INT_LADDER_TS		= 16,
+	UOP_SRC_INT_LADDER_TD		= 17,
+	UOP_SRC_INT_LADDER_QTD		= 18,
+	UOP_SRC_INT_LADDER_T0		= 19,
+	UOP_SRC_INT_LADDER_TA		= 20,
+	UOP_SRC_INT_LADDER_T1		= 21,
+
+	UOP_SRC_INT_REDUCE_R1		= 22,
+	UOP_SRC_INT_REDUCE_R2		= 23,
+	UOP_SRC_INT_REDUCE_T_1		= 24,
+	UOP_SRC_INT_REDUCE_T_10		= 25,
+	UOP_SRC_INT_REDUCE_T_1001	= 26,
+	UOP_SRC_INT_REDUCE_T_1011	= 27,
+	UOP_SRC_INT_REDUCE_T_X5		= 28,
+	UOP_SRC_INT_REDUCE_T_X10	= 29,
+	UOP_SRC_INT_REDUCE_T_X20	= 30,
+	UOP_SRC_INT_REDUCE_T_X40	= 31,
+	UOP_SRC_INT_REDUCE_T_X50	= 32,
+	UOP_SRC_INT_REDUCE_T_X100	= 33,
+
+	UOP_SRC_EXT_ZERO,
+	UOP_SRC_EXT_ONE,
+
+	UOP_SRC_EXT_A24,
+
+	UOP_SRC_EXT_X
+};
+
+
+//------------------------------------------------------------------------------
+enum UOP_DST_OPERAND
+//------------------------------------------------------------------------------
+{
+	UOP_DST_INT_LADDER_R0_X		=  0,
+	UOP_DST_INT_LADDER_R0_Z		=  1,
+
+	UOP_DST_INT_LADDER_R1_X		=  2,
+	UOP_DST_INT_LADDER_R1_Z		=  3,
+
+	UOP_DST_INT_LADDER_T0_X		=  4,
+	UOP_DST_INT_LADDER_T0_Z		=  5,
+
+	UOP_DST_INT_LADDER_T1_X		=  6,
+	UOP_DST_INT_LADDER_T1_Z		=  7,
+
+	UOP_DST_INT_LADDER_S0		=  8,
+	UOP_DST_INT_LADDER_S1		=  9,
+	UOP_DST_INT_LADDER_D0		= 10,
+	UOP_DST_INT_LADDER_D1		= 11,
+	UOP_DST_INT_LADDER_QS0		= 12,
+	UOP_DST_INT_LADDER_QD0		= 13,
+	UOP_DST_INT_LADDER_S0D1		= 14,
+	UOP_DST_INT_LADDER_S1D0		= 15,
+	UOP_DST_INT_LADDER_TS		= 16,
+	UOP_DST_INT_LADDER_TD		= 17,
+	UOP_DST_INT_LADDER_QTD		= 18,
+	UOP_DST_INT_LADDER_T0		= 19,
+	UOP_DST_INT_LADDER_TA		= 20,
+	UOP_DST_INT_LADDER_T1		= 21,
+
+	UOP_DST_INT_REDUCE_R1		= 22,
+	UOP_DST_INT_REDUCE_R2		= 23,
+	UOP_DST_INT_REDUCE_T_1		= 24,
+	UOP_DST_INT_REDUCE_T_10		= 25,
+	UOP_DST_INT_REDUCE_T_1001	= 26,
+	UOP_DST_INT_REDUCE_T_1011	= 27,
+	UOP_DST_INT_REDUCE_T_X5		= 28,
+	UOP_DST_INT_REDUCE_T_X10	= 29,
+	UOP_DST_INT_REDUCE_T_X20	= 30,
+	UOP_DST_INT_REDUCE_T_X40	= 31,
+	UOP_DST_INT_REDUCE_T_X50	= 32,
+	UOP_DST_INT_REDUCE_T_X100	= 33,
+
+	UOP_DST_EXT_Y,
+
+	UOP_DST_DUMMY
+};
+
+void dump_fpga_buffer(const char *msg, const FPGA_BUFFER *buf)
+{
+	printf("%s", msg);
+	for (int i=FPGA_OPERAND_NUM_WORDS; i>0; i--)
+		printf("%08x ", buf->words[i]);
+	printf("\n");
+}
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+static void uop_move	(UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+						 UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+						 UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+						 UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+
+static void uop_add		(UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+						 UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+						 UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+						 UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
+						 FPGA_BUFFER *modulus);
+
+static void uop_sub		(UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+						 UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+						 UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+						 UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
+						 FPGA_BUFFER *modulus);
+
+static void uop_mul		(UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+						 UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+						 UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+						 UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+
+static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer);
+static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand);
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_scalar_multiply_microcode(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+	int word_count, bit_count, cyc_count;	// counters
+
+
+	int i, j;
+	for (i=0; i<4; i++)
+		for (j=0; j<64; j++)
+			bank_flags[i][j] = false;
+
+
+		// pre
+	fpga_multiword_copy(PX, &BANK_EXT_X);
+
+		// initialization
+	
+
+	// fpga_multiword_copy(&X25519_ONE,  &LADDER_R0_X);
+ 	// fpga_multiword_copy(&X25519_ZERO, &LADDER_R0_Z);
+	uop_move	(UOP_BANK_EXT,   UOP_SRC_EXT_ONE,     UOP_BANK_EXT,   UOP_SRC_EXT_ZERO,
+				 UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+
+	// fpga_multiword_copy(PX,           &LADDER_R1_X);
+	// fpga_multiword_copy(&X25519_ONE,  &LADDER_R1_Z);
+	uop_move	(UOP_BANK_EXT,   UOP_SRC_EXT_X,       UOP_BANK_EXT,   UOP_SRC_EXT_ONE,
+				 UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
+	//bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, &LADDER_R1_X);
+	//bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z, &LADDER_R1_Z);
+
+
+	FPGA_WORD k_word;
+	bool k_bit, s = false;
+
+	for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+	{
+		for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+		{
+			k_word = K->words[word_count - 1] >> (bit_count - 1);
+			k_bit = (k_word & (FPGA_WORD)1) == 1;
+
+			if (s == k_bit)
+				uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+							 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+			else
+				uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
+							 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+
+			if (s == k_bit)
+				uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
+							 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+			else
+				uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+							 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+
+			s = k_bit;
+
+			uop_add		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_S0,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_S0,
+						 &X25519_2P);
+
+			uop_add		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_S1,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_S1,
+						 &X25519_2P);
+
+			uop_sub		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_D0,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_D0,
+						 &X25519_2P);
+
+			uop_sub		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_D1,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_D1,
+						 &X25519_2P);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0,  UOP_BANK_INT_B, UOP_SRC_INT_LADDER_S0,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QS0);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_D0,  UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_QD0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QD0);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0,   UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D1,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_S0D1, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S0D1);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S1,   UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S1D0);
+
+			uop_add		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_TS,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_TS,
+						 &X25519_2P);
+
+			uop_sub		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_TD,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_TD,
+						 &X25519_2P);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TD,   UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TD,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_QTD,  UOP_BANK_INT_D, UOP_DST_INT_LADDER_QTD);
+
+			uop_sub		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0,  UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_T0,   UOP_BANK_INT_B, UOP_DST_INT_LADDER_T0,
+						 &X25519_2P);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0,  UOP_BANK_EXT,   UOP_SRC_EXT_A24,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_TA,  UOP_BANK_INT_D, UOP_DST_INT_LADDER_TA);
+
+			uop_add		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_TA,  UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_T1,  UOP_BANK_INT_B, UOP_DST_INT_LADDER_T1,
+						 &X25519_2P);
+
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0,  UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_X);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0,   UOP_BANK_INT_B, UOP_SRC_INT_LADDER_T1,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_Z, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TS,  UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TS,
+						 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X,  UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_X);
+
+			uop_mul		(UOP_BANK_EXT, UOP_SRC_EXT_X, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QTD,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_Z,  UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+
+			uop_move	(UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+						 UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+		}
+	}
+	
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
+	//bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
+
+		// since the lower three bits of the private key are always 000,
+		// the result is in R0X, R0z
+
+		// conversion to affine coordinates
+	//fpga_multiword_copy(&LADDER_R0_Z, &REDUCE_T_1);
+
+	uop_add		(UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1,
+				 &X25519_2P);
+
+	uop_move	(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_1);
+
+	//uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+		//		 UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1);
+	
+
+	//
+	//fpga_modular_mul(&REDUCE_T_1, &REDUCE_T_1, &REDUCE_T_10, &X25519_2P);
+	//
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1,  UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_10);
+
+
+//	fpga_modular_mul(&REDUCE_T_10, &REDUCE_T_10, &REDUCE_R1, &X25519_2P);
+//	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+//	fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1, &REDUCE_T_1001, &X25519_2P);
+
+	uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1,   UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+	uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2,    UOP_BANK_INT_B,  UOP_SRC_INT_REDUCE_T_1,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1001);
+
+
+	//
+	//fpga_modular_mul(&REDUCE_T_1001, &REDUCE_T_10, &REDUCE_T_1011, &X25519_2P);
+	//
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_10);
+
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1001, UOP_BANK_INT_D,  UOP_SRC_INT_REDUCE_T_10,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, UOP_BANK_INT_B,  UOP_DST_INT_REDUCE_T_1011);
+
+
+
+	//fpga_modular_mul(&REDUCE_T_1011, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
+	//fpga_modular_mul(&REDUCE_R1, &REDUCE_T_1001, &REDUCE_T_X5, &X25519_2P);
+
+	uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_1011, UOP_BANK_INT_B,  UOP_SRC_INT_REDUCE_T_1011,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D,      UOP_DST_INT_REDUCE_R1);
+
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1,   UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1001,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X5);
+
+
+
+
+	//
+	//fpga_multiword_copy(&REDUCE_T_X5, &REDUCE_R1);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X5,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	//bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, &REDUCE_R1);
+
+	for (cyc_count=0; cyc_count<5; cyc_count++)
+	{	if (!(cyc_count % 2))
+	
+			//fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			//fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+			
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+
+
+	//fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X5, &REDUCE_T_X10, &X25519_2P);
+	//
+	uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2,    UOP_BANK_INT_B,  UOP_SRC_INT_REDUCE_T_X5,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10, UOP_BANK_INT_D,  UOP_DST_INT_REDUCE_T_X10);
+
+
+
+	//fpga_multiword_copy(&REDUCE_T_X10, &REDUCE_R1);
+	//for (cyc_count=0; cyc_count<10; cyc_count++)
+	//{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+		//else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+	//}
+	//fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X20, &X25519_2P);
+
+	uop_move	(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X10,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+	
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	for (cyc_count=0; cyc_count<10; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D,  UOP_SRC_INT_REDUCE_R1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_B,  UOP_DST_INT_REDUCE_T_X20);
+
+
+
+
+	
+	
+	//
+	//fpga_multiword_copy(&REDUCE_T_X20, &REDUCE_R1);
+	//for (cyc_count=0; cyc_count<20; cyc_count++)
+	//{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+		//else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+	//}
+	//fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X20, &REDUCE_T_X40, &X25519_2P);
+
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X20);
+
+	for (cyc_count=0; cyc_count<20; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_D,  UOP_SRC_INT_REDUCE_R1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40, UOP_BANK_INT_B,  UOP_DST_INT_REDUCE_T_X40);
+
+	
+	//
+//	fpga_multiword_copy(&REDUCE_T_X40, &REDUCE_R1);
+//	for (cyc_count=0; cyc_count<10; cyc_count++)
+//	{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+//		else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+//	}
+//	fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X50, &X25519_2P);
+	//
+	
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X40, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X40,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	//uop_move	(UOP_BANK_INT_, UOP_SRC_INT_REDUCE_, UOP_BANK_INT_, UOP_SRC_INT_REDUCE_,
+		//		 UOP_BANK_INT_, UOP_DST_INT_REDUCE_, UOP_BANK_INT_, UOP_DST_INT_REDUCE_);
+
+	for (cyc_count=0; cyc_count<10; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D,  UOP_SRC_INT_REDUCE_T_X10,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X50);
+
+
+
+
+
+
+
+	
+//	fpga_multiword_copy(&REDUCE_T_X50, &REDUCE_R1);
+//	for (cyc_count=0; cyc_count<50; cyc_count++)
+//	{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+//		else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+//	}
+//	fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X50, &REDUCE_T_X100, &X25519_2P);
+	//
+	
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X50);
+
+	for (cyc_count=0; cyc_count<50; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D,  UOP_SRC_INT_REDUCE_T_X50,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X100);
+
+
+
+
+
+
+
+
+	
+	//fpga_multiword_copy(&REDUCE_T_X100, &REDUCE_R1);
+	//for (cyc_count=0; cyc_count<100; cyc_count++)
+	//{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+		//else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+	//}
+	//
+	//fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X100, &REDUCE_R2, &X25519_2P);
+	//
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1,     UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X100);
+
+	for (cyc_count=0; cyc_count<100; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X100,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	
+	
+//	for (cyc_count=0; cyc_count<50; cyc_count++)
+//	{	if ((cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);	// !!! (swapped sides)
+//		else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+//	}
+	//
+	for (cyc_count=0; cyc_count<50; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+
+
+
+	
+	//fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X50, &REDUCE_R1, &X25519_2P);
+	//fpga_multiword_copy(&REDUCE_R1, &REDUCE_R2);
+	
+	uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X50,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+
+	//for (cyc_count=0; cyc_count<5; cyc_count++)
+	//{	if (!(cyc_count % 2))	fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+	//	else			fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+	//}
+
+	for (cyc_count=0; cyc_count<5; cyc_count++)
+	{	if (!(cyc_count % 2))
+			uop_mul		(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+						 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+		else		
+			uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+						 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+	}
+
+
+	// ��������� � R2
+
+	//
+	//fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
+	//fpga_modular_mul(&LADDER_R0_X, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+
+
+	uop_mul		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1011,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+	uop_move	(UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+				 UOP_BANK_INT_C, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_R0_Z);
+
+	uop_mul		(UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_R0_X,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1,    &REDUCE_T_1);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10,   &REDUCE_T_10);
+	//bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, &REDUCE_T_1001);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, &REDUCE_T_1011);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5,   &REDUCE_T_X5);
+	//bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10,  &REDUCE_T_X10);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20,  &REDUCE_T_X20);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40,  &REDUCE_T_X40);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50,  &REDUCE_T_X50);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, &REDUCE_T_X100);
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2,     &REDUCE_R2);
+
+
+	//bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, &REDUCE_R2);
+
+	//
+	//fpga_modular_add(&REDUCE_R2, &X25519_ZERO, QX, &X25519_1P);	// 1P!
+
+	uop_add		(UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
+				 UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1,
+				 &X25519_1P);
+
+	uop_move	(UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+				 UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_EXT, UOP_DST_EXT_Y);
+
+	// DST_EXT_Y -> QX
+	bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, QX);
+
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_move	(UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+						 UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+						 UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+						 UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+//------------------------------------------------------------------------------
+{
+	if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+	if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+	if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+	if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+	if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+	if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+	if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+	FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+	FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+	if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+	if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+	if (src_bank_x == UOP_BANK_EXT)
+	{	switch(src_operand_x)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_x = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_x = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_x = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_x = &X25519_A24; break;
+		}
+	}
+
+	if (src_bank_y == UOP_BANK_EXT)
+	{	switch(src_operand_y)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_y = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_y = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_y = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_y = &X25519_A24; break;
+		}
+	}
+
+	if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+	if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+	if (dst_bank_x == UOP_BANK_EXT)
+	{	switch(dst_operand_x)
+		{	case UOP_DST_EXT_Y:		ptr_dst_x = &BANK_EXT_Y; break;
+		}
+	}
+
+	if (dst_bank_y == UOP_BANK_EXT)
+	{	switch(dst_operand_y)
+		{	case UOP_DST_EXT_Y:		ptr_dst_y = &BANK_EXT_Y; break;
+		}
+	}
+
+	fpga_multiword_copy(ptr_src_x, ptr_dst_x);
+	fpga_multiword_copy(ptr_src_y, ptr_dst_y);
+
+	if (dst_bank_x != UOP_BANK_EXT)	bank_flags[dst_bank_x][dst_operand_x] = true;
+	if (dst_bank_y != UOP_BANK_EXT)	bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_add		(UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+						 UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+						 UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+						 UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
+						 FPGA_BUFFER *modulus)
+//------------------------------------------------------------------------------
+{
+	if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+	if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+	if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+	if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+	if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+	if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+	if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+	FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+	FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+	if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+	if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+	if (src_bank_x == UOP_BANK_EXT)
+	{	switch(src_operand_x)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_x = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_x = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_x = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_x = &X25519_A24; break;
+		}
+	}
+
+	if (src_bank_y == UOP_BANK_EXT)
+	{	switch(src_operand_y)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_y = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_y = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_y = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_y = &X25519_A24; break;
+		}
+	}
+
+	if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+	if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+	if (dst_bank_x == UOP_BANK_EXT)
+	{	switch(dst_operand_x)
+		{	case UOP_DST_EXT_Y:		ptr_dst_x = &BANK_EXT_Y; break;
+		}
+	}
+
+	if (dst_bank_y == UOP_BANK_EXT)
+	{	switch(dst_operand_y)
+		{	case UOP_DST_EXT_Y:		ptr_dst_y = &BANK_EXT_Y; break;
+		}
+	}
+
+	FPGA_BUFFER S;
+	fpga_modular_add(ptr_src_x, ptr_src_y, &S, modulus);
+	fpga_multiword_copy(&S, ptr_dst_x);
+	fpga_multiword_copy(&S, ptr_dst_y);
+
+	if (dst_bank_x != UOP_BANK_EXT)	bank_flags[dst_bank_x][dst_operand_x] = true;
+	if (dst_bank_y != UOP_BANK_EXT)	bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_sub		(UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+						 UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+						 UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+						 UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
+						 FPGA_BUFFER *modulus)
+//------------------------------------------------------------------------------
+{
+	if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+	if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+	if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+	if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+	if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+	if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+	if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+	FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+	FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+	if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+	if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+	if (src_bank_x == UOP_BANK_EXT)
+	{	switch(src_operand_x)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_x = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_x = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_x = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_x = &X25519_A24; break;
+		}
+	}
+
+	if (src_bank_y == UOP_BANK_EXT)
+	{	switch(src_operand_y)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_y = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_y = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_y = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_y = &X25519_A24; break;
+		}
+	}
+
+	if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+	if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+	if (dst_bank_x == UOP_BANK_EXT)
+	{	switch(dst_operand_x)
+		{	case UOP_DST_EXT_Y:		ptr_dst_x = &BANK_EXT_Y; break;
+		}
+	}
+
+	if (dst_bank_y == UOP_BANK_EXT)
+	{	switch(dst_operand_y)
+		{	case UOP_DST_EXT_Y:		ptr_dst_y = &BANK_EXT_Y; break;
+		}
+	}
+
+	FPGA_BUFFER D;
+	fpga_modular_sub(ptr_src_x, ptr_src_y, &D, modulus);
+	fpga_multiword_copy(&D, ptr_dst_x);
+	fpga_multiword_copy(&D, ptr_dst_y);
+
+	if (dst_bank_x != UOP_BANK_EXT)	bank_flags[dst_bank_x][dst_operand_x] = true;
+	if (dst_bank_y != UOP_BANK_EXT)	bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_mul		(UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+						 UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+						 UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+						 UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+//------------------------------------------------------------------------------
+{
+	if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+	if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+	if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+	if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+	if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+	if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+	if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+	FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+	FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+	if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+	if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+	if (src_bank_x == UOP_BANK_EXT)
+	{	switch(src_operand_x)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_x = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_x = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_x = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_x = &X25519_A24; break;
+		}
+	}
+
+	if (src_bank_y == UOP_BANK_EXT)
+	{	switch(src_operand_y)
+		{	case UOP_SRC_EXT_ZERO:	ptr_src_y = &X25519_ZERO; break;
+			case UOP_SRC_EXT_ONE:	ptr_src_y = &X25519_ONE; break;
+			case UOP_SRC_EXT_X:		ptr_src_y = &BANK_EXT_X; break;
+			case UOP_SRC_EXT_A24:	ptr_src_y = &X25519_A24; break;
+		}
+	}
+
+	if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+	if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+	if (dst_bank_x == UOP_BANK_EXT)
+	{	switch(dst_operand_x)
+		{	case UOP_DST_EXT_Y:		ptr_dst_x = &BANK_EXT_Y; break;
+		}
+	}
+
+	if (dst_bank_y == UOP_BANK_EXT)
+	{	switch(dst_operand_y)
+		{	case UOP_DST_EXT_Y:		ptr_dst_y = &BANK_EXT_Y; break;
+		}
+	}
+
+	FPGA_BUFFER P;
+	fpga_modular_mul(ptr_src_x, ptr_src_y, &P, &X25519_2P);
+	fpga_multiword_copy(&P, ptr_dst_x);
+	fpga_multiword_copy(&P, ptr_dst_y);
+
+	if (dst_bank_x != UOP_BANK_EXT)	bank_flags[dst_bank_x][dst_operand_x] = true;
+	if (dst_bank_y != UOP_BANK_EXT)	bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer)
+//------------------------------------------------------------------------------
+{
+	if (bank == UOP_BANK_EXT) uop_fatal("ERROR: bank2buffer(): bank == UOP_BANK_EXT!");
+	if (operand == UOP_DST_EXT_Y) uop_fatal("ERROR: bank2buffer(): operand == UOP_DST_EXT_Y!");
+	if (!bank_flags[bank][operand])
+		uop_fatal("ERROR: bank2buffer(): !bank_flags[bank][operand]!");
+
+	fpga_multiword_copy(&BANK_INT[bank][operand], buffer);
+}
+
+
+//------------------------------------------------------------------------------
+static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand)
+//------------------------------------------------------------------------------
+{
+	if (bank == UOP_BANK_EXT) uop_fatal("ERROR: buffer2bank(): bank == UOP_BANK_EXT!");
+	if (operand == UOP_SRC_EXT_ZERO) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ZERO!");
+	if (operand == UOP_SRC_EXT_ONE) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ONE!");
+	if (operand == UOP_SRC_EXT_X) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_X!");
+
+	fpga_multiword_copy(buffer, &BANK_INT[bank][operand]);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_model.cpp b/x25519_fpga_model.cpp
index 1f2101a..0e06712 100644
--- a/x25519_fpga_model.cpp
+++ b/x25519_fpga_model.cpp
@@ -37,6 +37,12 @@
 //------------------------------------------------------------------------------
 
 
+//------------------------------------------------------------------------------
+// Mode Switch
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
 //------------------------------------------------------------------------------
 // Headers
 //------------------------------------------------------------------------------



More information about the Commits mailing list