[Cryptech-Commits] [user/shatov/x25519_fpga_model] 01/02: Microcode version. Works, but needs cleanup.
git at cryptech.is
git at cryptech.is
Mon Jun 4 20:44:33 UTC 2018
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/x25519_fpga_model.
commit 52bda013638cebbb3de74ee4f8c52b80262e477c
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Sun Jun 3 14:32:18 2018 +0300
Microcode version. Works, but needs cleanup.
---
x25519_fpga_curve.h | 13 +-
x25519_fpga_curve_abstract.cpp | 2 +-
x25519_fpga_curve_microcode.cpp | 1031 +++++++++++++++++++++++++++++++++++++++
x25519_fpga_model.cpp | 6 +
4 files changed, 1050 insertions(+), 2 deletions(-)
diff --git a/x25519_fpga_curve.h b/x25519_fpga_curve.h
index be647c5..1d6914c 100644
--- a/x25519_fpga_curve.h
+++ b/x25519_fpga_curve.h
@@ -56,12 +56,23 @@ extern FPGA_BUFFER X25519_G_X; // the base point
extern FPGA_BUFFER X25519_A24; // coefficient (A + 2) / 4
+//------------------------------------------------------------------------------
+// Implementation switch
+//------------------------------------------------------------------------------
+#ifdef USE_MICROCODE
+#define fpga_curve_scalar_multiply fpga_curve_scalar_multiply_microcode
+#else
+#define fpga_curve_scalar_multiply fpga_curve_scalar_multiply_abstract
+#endif
+
+
//------------------------------------------------------------------------------
// Prototypes
//------------------------------------------------------------------------------
void fpga_curve_init ();
-void fpga_curve_scalar_multiply (FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+void fpga_curve_scalar_multiply_abstract (FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+void fpga_curve_scalar_multiply_microcode (FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
void fpga_curve_ladder_step (FPGA_BUFFER *P_X,
FPGA_BUFFER *R0_X_in, FPGA_BUFFER *R0_Z_in,
diff --git a/x25519_fpga_curve_abstract.cpp b/x25519_fpga_curve_abstract.cpp
index b182ca3..769759a 100644
--- a/x25519_fpga_curve_abstract.cpp
+++ b/x25519_fpga_curve_abstract.cpp
@@ -82,7 +82,7 @@ void fpga_curve_init()
// https://eprint.iacr.org/2017/264.pdf
//
//------------------------------------------------------------------------------
-void fpga_curve_scalar_multiply(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
+void fpga_curve_scalar_multiply_abstract(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
//------------------------------------------------------------------------------
{
int word_count, bit_count; // counters
diff --git a/x25519_fpga_curve_microcode.cpp b/x25519_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..7d40d70
--- /dev/null
+++ b/x25519_fpga_curve_microcode.cpp
@@ -0,0 +1,1031 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_microcode.cpp
+// ---------------------------------------------------
+// Elliptic curve arithmetic procedures for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Locals
+//------------------------------------------------------------------------------
+static FPGA_BUFFER LADDER_R0_X;
+static FPGA_BUFFER LADDER_R0_Z;
+
+static FPGA_BUFFER LADDER_R1_X;
+static FPGA_BUFFER LADDER_R1_Z;
+
+static FPGA_BUFFER LADDER_T0_X;
+static FPGA_BUFFER LADDER_T0_Z;
+
+static FPGA_BUFFER LADDER_T1_X;
+static FPGA_BUFFER LADDER_T1_Z;
+
+static FPGA_BUFFER LADDER_S0;
+static FPGA_BUFFER LADDER_S1;
+static FPGA_BUFFER LADDER_D0;
+static FPGA_BUFFER LADDER_D1;
+static FPGA_BUFFER LADDER_QS0;
+static FPGA_BUFFER LADDER_QD0;
+static FPGA_BUFFER LADDER_S0D1;
+static FPGA_BUFFER LADDER_S1D0;
+static FPGA_BUFFER LADDER_TS;
+static FPGA_BUFFER LADDER_TD;
+static FPGA_BUFFER LADDER_QTD;
+static FPGA_BUFFER LADDER_T0;
+static FPGA_BUFFER LADDER_TA;
+static FPGA_BUFFER LADDER_T1;
+
+static FPGA_BUFFER REDUCE_R1;
+static FPGA_BUFFER REDUCE_R2;
+static FPGA_BUFFER REDUCE_T_1;
+static FPGA_BUFFER REDUCE_T_10;
+static FPGA_BUFFER REDUCE_T_1001;
+static FPGA_BUFFER REDUCE_T_1011;
+static FPGA_BUFFER REDUCE_T_X5;
+static FPGA_BUFFER REDUCE_T_X10;
+static FPGA_BUFFER REDUCE_T_X20;
+static FPGA_BUFFER REDUCE_T_X40;
+static FPGA_BUFFER REDUCE_T_X50;
+static FPGA_BUFFER REDUCE_T_X100;
+
+
+//------------------------------------------------------------------------------
+// Error Handle
+//------------------------------------------------------------------------------
+#define uop_fatal(msg) {(void)printf("%s\n",msg);exit(EXIT_FAILURE);}
+
+
+//------------------------------------------------------------------------------
+// Storage Buffers
+//------------------------------------------------------------------------------
+static FPGA_BUFFER BANK_INT[4][64];
+static bool bank_flags[4][64];
+static FPGA_BUFFER BANK_EXT_X;
+static FPGA_BUFFER BANK_EXT_Y;
+
+//------------------------------------------------------------------------------
+enum UOP_BANK
+//------------------------------------------------------------------------------
+{
+ UOP_BANK_INT_A = 0,
+ UOP_BANK_INT_B = 1,
+ UOP_BANK_INT_C = 2,
+ UOP_BANK_INT_D = 3,
+ UOP_BANK_EXT
+};
+
+//------------------------------------------------------------------------------
+enum UOP_SRC_OPERAND
+//------------------------------------------------------------------------------
+{
+ UOP_SRC_INT_LADDER_R0_X = 0,
+ UOP_SRC_INT_LADDER_R0_Z = 1,
+
+ UOP_SRC_INT_LADDER_R1_X = 2,
+ UOP_SRC_INT_LADDER_R1_Z = 3,
+
+ UOP_SRC_INT_LADDER_T0_X = 4,
+ UOP_SRC_INT_LADDER_T0_Z = 5,
+
+ UOP_SRC_INT_LADDER_T1_X = 6,
+ UOP_SRC_INT_LADDER_T1_Z = 7,
+
+ UOP_SRC_INT_LADDER_S0 = 8,
+ UOP_SRC_INT_LADDER_S1 = 9,
+ UOP_SRC_INT_LADDER_D0 = 10,
+ UOP_SRC_INT_LADDER_D1 = 11,
+ UOP_SRC_INT_LADDER_QS0 = 12,
+ UOP_SRC_INT_LADDER_QD0 = 13,
+ UOP_SRC_INT_LADDER_S0D1 = 14,
+ UOP_SRC_INT_LADDER_S1D0 = 15,
+ UOP_SRC_INT_LADDER_TS = 16,
+ UOP_SRC_INT_LADDER_TD = 17,
+ UOP_SRC_INT_LADDER_QTD = 18,
+ UOP_SRC_INT_LADDER_T0 = 19,
+ UOP_SRC_INT_LADDER_TA = 20,
+ UOP_SRC_INT_LADDER_T1 = 21,
+
+ UOP_SRC_INT_REDUCE_R1 = 22,
+ UOP_SRC_INT_REDUCE_R2 = 23,
+ UOP_SRC_INT_REDUCE_T_1 = 24,
+ UOP_SRC_INT_REDUCE_T_10 = 25,
+ UOP_SRC_INT_REDUCE_T_1001 = 26,
+ UOP_SRC_INT_REDUCE_T_1011 = 27,
+ UOP_SRC_INT_REDUCE_T_X5 = 28,
+ UOP_SRC_INT_REDUCE_T_X10 = 29,
+ UOP_SRC_INT_REDUCE_T_X20 = 30,
+ UOP_SRC_INT_REDUCE_T_X40 = 31,
+ UOP_SRC_INT_REDUCE_T_X50 = 32,
+ UOP_SRC_INT_REDUCE_T_X100 = 33,
+
+ UOP_SRC_EXT_ZERO,
+ UOP_SRC_EXT_ONE,
+
+ UOP_SRC_EXT_A24,
+
+ UOP_SRC_EXT_X
+};
+
+
+//------------------------------------------------------------------------------
+enum UOP_DST_OPERAND
+//------------------------------------------------------------------------------
+{
+ UOP_DST_INT_LADDER_R0_X = 0,
+ UOP_DST_INT_LADDER_R0_Z = 1,
+
+ UOP_DST_INT_LADDER_R1_X = 2,
+ UOP_DST_INT_LADDER_R1_Z = 3,
+
+ UOP_DST_INT_LADDER_T0_X = 4,
+ UOP_DST_INT_LADDER_T0_Z = 5,
+
+ UOP_DST_INT_LADDER_T1_X = 6,
+ UOP_DST_INT_LADDER_T1_Z = 7,
+
+ UOP_DST_INT_LADDER_S0 = 8,
+ UOP_DST_INT_LADDER_S1 = 9,
+ UOP_DST_INT_LADDER_D0 = 10,
+ UOP_DST_INT_LADDER_D1 = 11,
+ UOP_DST_INT_LADDER_QS0 = 12,
+ UOP_DST_INT_LADDER_QD0 = 13,
+ UOP_DST_INT_LADDER_S0D1 = 14,
+ UOP_DST_INT_LADDER_S1D0 = 15,
+ UOP_DST_INT_LADDER_TS = 16,
+ UOP_DST_INT_LADDER_TD = 17,
+ UOP_DST_INT_LADDER_QTD = 18,
+ UOP_DST_INT_LADDER_T0 = 19,
+ UOP_DST_INT_LADDER_TA = 20,
+ UOP_DST_INT_LADDER_T1 = 21,
+
+ UOP_DST_INT_REDUCE_R1 = 22,
+ UOP_DST_INT_REDUCE_R2 = 23,
+ UOP_DST_INT_REDUCE_T_1 = 24,
+ UOP_DST_INT_REDUCE_T_10 = 25,
+ UOP_DST_INT_REDUCE_T_1001 = 26,
+ UOP_DST_INT_REDUCE_T_1011 = 27,
+ UOP_DST_INT_REDUCE_T_X5 = 28,
+ UOP_DST_INT_REDUCE_T_X10 = 29,
+ UOP_DST_INT_REDUCE_T_X20 = 30,
+ UOP_DST_INT_REDUCE_T_X40 = 31,
+ UOP_DST_INT_REDUCE_T_X50 = 32,
+ UOP_DST_INT_REDUCE_T_X100 = 33,
+
+ UOP_DST_EXT_Y,
+
+ UOP_DST_DUMMY
+};
+
+void dump_fpga_buffer(const char *msg, const FPGA_BUFFER *buf)
+{
+ printf("%s", msg);
+ for (int i=FPGA_OPERAND_NUM_WORDS; i>0; i--)
+ printf("%08x ", buf->words[i]);
+ printf("\n");
+}
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+static void uop_move (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+ UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+ UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+ UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+
+static void uop_add (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+ UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+ UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+ UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
+ FPGA_BUFFER *modulus);
+
+static void uop_sub (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+ UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+ UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+ UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
+ FPGA_BUFFER *modulus);
+
+static void uop_mul (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
+ UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
+ UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
+ UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+
+static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer);
+static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand);
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_scalar_multiply_microcode(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+ int word_count, bit_count, cyc_count; // counters
+
+
+ int i, j;
+ for (i=0; i<4; i++)
+ for (j=0; j<64; j++)
+ bank_flags[i][j] = false;
+
+
+ // pre
+ fpga_multiword_copy(PX, &BANK_EXT_X);
+
+ // initialization
+
+
+ // fpga_multiword_copy(&X25519_ONE, &LADDER_R0_X);
+ // fpga_multiword_copy(&X25519_ZERO, &LADDER_R0_Z);
+ uop_move (UOP_BANK_EXT, UOP_SRC_EXT_ONE, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+
+ // fpga_multiword_copy(PX, &LADDER_R1_X);
+ // fpga_multiword_copy(&X25519_ONE, &LADDER_R1_Z);
+ uop_move (UOP_BANK_EXT, UOP_SRC_EXT_X, UOP_BANK_EXT, UOP_SRC_EXT_ONE,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
+ //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, &LADDER_R1_X);
+ //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z, &LADDER_R1_Z);
+
+
+ FPGA_WORD k_word;
+ bool k_bit, s = false;
+
+ for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+ {
+ for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+ {
+ k_word = K->words[word_count - 1] >> (bit_count - 1);
+ k_bit = (k_word & (FPGA_WORD)1) == 1;
+
+ if (s == k_bit)
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+ else
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+
+ if (s == k_bit)
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+ else
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+
+ s = k_bit;
+
+ uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_S0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_S0,
+ &X25519_2P);
+
+ uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_S1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_S1,
+ &X25519_2P);
+
+ uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_D0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_D0,
+ &X25519_2P);
+
+ uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_D1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_D1,
+ &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_S0,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QS0);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_D0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_QD0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QD0);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D1,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_S0D1, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S0D1);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S1, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S1D0);
+
+ uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_TS, UOP_BANK_INT_B, UOP_DST_INT_LADDER_TS,
+ &X25519_2P);
+
+ uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_TD, UOP_BANK_INT_B, UOP_DST_INT_LADDER_TD,
+ &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TD, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TD,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_QTD, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QTD);
+
+ uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_T0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_T0,
+ &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0, UOP_BANK_EXT, UOP_SRC_EXT_A24,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_TA, UOP_BANK_INT_D, UOP_DST_INT_LADDER_TA);
+
+ uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_TA, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_T1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_T1,
+ &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_X);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_T1,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_Z, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TS, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TS,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_X);
+
+ uop_mul (UOP_BANK_EXT, UOP_SRC_EXT_X, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QTD,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_Z, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+
+ uop_move (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
+ UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+ }
+ }
+
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
+ //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
+
+ // since the lower three bits of the private key are always 000,
+ // the result is in R0X, R0z
+
+ // conversion to affine coordinates
+ //fpga_multiword_copy(&LADDER_R0_Z, &REDUCE_T_1);
+
+ uop_add (UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1,
+ &X25519_2P);
+
+ uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_1);
+
+ //uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+ // UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1);
+
+
+ //
+ //fpga_modular_mul(&REDUCE_T_1, &REDUCE_T_1, &REDUCE_T_10, &X25519_2P);
+ //
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_10);
+
+
+// fpga_modular_mul(&REDUCE_T_10, &REDUCE_T_10, &REDUCE_R1, &X25519_2P);
+// fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+// fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1, &REDUCE_T_1001, &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1001);
+
+
+ //
+ //fpga_modular_mul(&REDUCE_T_1001, &REDUCE_T_10, &REDUCE_T_1011, &X25519_2P);
+ //
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_10);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1001, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_10,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_1011);
+
+
+
+ //fpga_modular_mul(&REDUCE_T_1011, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
+ //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_1001, &REDUCE_T_X5, &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_1011, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1011,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1001,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X5);
+
+
+
+
+ //
+ //fpga_multiword_copy(&REDUCE_T_X5, &REDUCE_R1);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X5,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, &REDUCE_R1);
+
+ for (cyc_count=0; cyc_count<5; cyc_count++)
+ { if (!(cyc_count % 2))
+
+ //fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ //fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+
+
+ //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X5, &REDUCE_T_X10, &X25519_2P);
+ //
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X5,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X10);
+
+
+
+ //fpga_multiword_copy(&REDUCE_T_X10, &REDUCE_R1);
+ //for (cyc_count=0; cyc_count<10; cyc_count++)
+ //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+ //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+ //}
+ //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X20, &X25519_2P);
+
+ uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X10,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ for (cyc_count=0; cyc_count<10; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X20);
+
+
+
+
+
+
+ //
+ //fpga_multiword_copy(&REDUCE_T_X20, &REDUCE_R1);
+ //for (cyc_count=0; cyc_count<20; cyc_count++)
+ //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+ //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+ //}
+ //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X20, &REDUCE_T_X40, &X25519_2P);
+
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X20);
+
+ for (cyc_count=0; cyc_count<20; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X40);
+
+
+ //
+// fpga_multiword_copy(&REDUCE_T_X40, &REDUCE_R1);
+// for (cyc_count=0; cyc_count<10; cyc_count++)
+// { if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+// }
+// fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X50, &X25519_2P);
+ //
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X40, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X40,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ //uop_move (UOP_BANK_INT_, UOP_SRC_INT_REDUCE_, UOP_BANK_INT_, UOP_SRC_INT_REDUCE_,
+ // UOP_BANK_INT_, UOP_DST_INT_REDUCE_, UOP_BANK_INT_, UOP_DST_INT_REDUCE_);
+
+ for (cyc_count=0; cyc_count<10; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X10,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X50);
+
+
+
+
+
+
+
+
+// fpga_multiword_copy(&REDUCE_T_X50, &REDUCE_R1);
+// for (cyc_count=0; cyc_count<50; cyc_count++)
+// { if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+// }
+// fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X50, &REDUCE_T_X100, &X25519_2P);
+ //
+
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X50);
+
+ for (cyc_count=0; cyc_count<50; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X50,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X100);
+
+
+
+
+
+
+
+
+
+ //fpga_multiword_copy(&REDUCE_T_X100, &REDUCE_R1);
+ //for (cyc_count=0; cyc_count<100; cyc_count++)
+ //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+ //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+ //}
+ //
+ //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X100, &REDUCE_R2, &X25519_2P);
+ //
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X100);
+
+ for (cyc_count=0; cyc_count<100; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X100,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+
+
+// for (cyc_count=0; cyc_count<50; cyc_count++)
+// { if ((cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P); // !!! (swapped sides)
+// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+// }
+ //
+ for (cyc_count=0; cyc_count<50; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+
+
+
+
+ //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X50, &REDUCE_R1, &X25519_2P);
+ //fpga_multiword_copy(&REDUCE_R1, &REDUCE_R2);
+
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X50,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+
+ //for (cyc_count=0; cyc_count<5; cyc_count++)
+ //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+ // else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
+ //}
+
+ for (cyc_count=0; cyc_count<5; cyc_count++)
+ { if (!(cyc_count % 2))
+ uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ else
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ }
+
+
+ // ��������� � R2
+
+ //
+ //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
+ //fpga_modular_mul(&LADDER_R0_X, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
+
+
+ uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1011,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+
+ uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
+ UOP_BANK_INT_C, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_R0_Z);
+
+ uop_mul (UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_R0_X,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+
+
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1, &REDUCE_T_1);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10, &REDUCE_T_10);
+ //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, &REDUCE_T_1001);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, &REDUCE_T_1011);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5, &REDUCE_T_X5);
+ //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10, &REDUCE_T_X10);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20, &REDUCE_T_X20);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40, &REDUCE_T_X40);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50, &REDUCE_T_X50);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, &REDUCE_T_X100);
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, &REDUCE_R2);
+
+
+ //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, &REDUCE_R2);
+
+ //
+ //fpga_modular_add(&REDUCE_R2, &X25519_ZERO, QX, &X25519_1P); // 1P!
+
+ uop_add (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
+ UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1,
+ &X25519_1P);
+
+ uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
+ UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_EXT, UOP_DST_EXT_Y);
+
+ // DST_EXT_Y -> QX
+ bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, QX);
+
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_move (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+ UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+ UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+ UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+//------------------------------------------------------------------------------
+{
+ if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+ if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+ if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+ if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+ if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+ if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+ if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+ FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+ FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+ if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+ if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+ if (src_bank_x == UOP_BANK_EXT)
+ { switch(src_operand_x)
+ { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
+ }
+ }
+
+ if (src_bank_y == UOP_BANK_EXT)
+ { switch(src_operand_y)
+ { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
+ }
+ }
+
+ if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+ if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+ if (dst_bank_x == UOP_BANK_EXT)
+ { switch(dst_operand_x)
+ { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
+ }
+ }
+
+ if (dst_bank_y == UOP_BANK_EXT)
+ { switch(dst_operand_y)
+ { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
+ }
+ }
+
+ fpga_multiword_copy(ptr_src_x, ptr_dst_x);
+ fpga_multiword_copy(ptr_src_y, ptr_dst_y);
+
+ if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
+ if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_add (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+ UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+ UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+ UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
+ FPGA_BUFFER *modulus)
+//------------------------------------------------------------------------------
+{
+ if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+ if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+ if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+ if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+ if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+ if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+ if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+ FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+ FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+ if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+ if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+ if (src_bank_x == UOP_BANK_EXT)
+ { switch(src_operand_x)
+ { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
+ }
+ }
+
+ if (src_bank_y == UOP_BANK_EXT)
+ { switch(src_operand_y)
+ { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
+ }
+ }
+
+ if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+ if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+ if (dst_bank_x == UOP_BANK_EXT)
+ { switch(dst_operand_x)
+ { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
+ }
+ }
+
+ if (dst_bank_y == UOP_BANK_EXT)
+ { switch(dst_operand_y)
+ { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
+ }
+ }
+
+ FPGA_BUFFER S;
+ fpga_modular_add(ptr_src_x, ptr_src_y, &S, modulus);
+ fpga_multiword_copy(&S, ptr_dst_x);
+ fpga_multiword_copy(&S, ptr_dst_y);
+
+ if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
+ if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_sub (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+ UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+ UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+ UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
+ FPGA_BUFFER *modulus)
+//------------------------------------------------------------------------------
+{
+ if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+ if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+ if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+ if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+ if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+ if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+ if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+ FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+ FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+ if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+ if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+ if (src_bank_x == UOP_BANK_EXT)
+ { switch(src_operand_x)
+ { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
+ }
+ }
+
+ if (src_bank_y == UOP_BANK_EXT)
+ { switch(src_operand_y)
+ { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
+ }
+ }
+
+ if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+ if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+ if (dst_bank_x == UOP_BANK_EXT)
+ { switch(dst_operand_x)
+ { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
+ }
+ }
+
+ if (dst_bank_y == UOP_BANK_EXT)
+ { switch(dst_operand_y)
+ { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
+ }
+ }
+
+ FPGA_BUFFER D;
+ fpga_modular_sub(ptr_src_x, ptr_src_y, &D, modulus);
+ fpga_multiword_copy(&D, ptr_dst_x);
+ fpga_multiword_copy(&D, ptr_dst_y);
+
+ if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
+ if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void uop_mul (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
+ UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
+ UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
+ UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+//------------------------------------------------------------------------------
+{
+ if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
+ if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
+
+ if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+
+ if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
+ if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
+
+ if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
+ if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
+
+ FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
+ FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
+
+ if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
+ if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
+
+ if (src_bank_x == UOP_BANK_EXT)
+ { switch(src_operand_x)
+ { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
+ }
+ }
+
+ if (src_bank_y == UOP_BANK_EXT)
+ { switch(src_operand_y)
+ { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
+ case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
+ case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
+ case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
+ }
+ }
+
+ if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
+ if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+
+ if (dst_bank_x == UOP_BANK_EXT)
+ { switch(dst_operand_x)
+ { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
+ }
+ }
+
+ if (dst_bank_y == UOP_BANK_EXT)
+ { switch(dst_operand_y)
+ { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
+ }
+ }
+
+ FPGA_BUFFER P;
+ fpga_modular_mul(ptr_src_x, ptr_src_y, &P, &X25519_2P);
+ fpga_multiword_copy(&P, ptr_dst_x);
+ fpga_multiword_copy(&P, ptr_dst_y);
+
+ if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
+ if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+}
+
+
+//------------------------------------------------------------------------------
+static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer)
+//------------------------------------------------------------------------------
+{
+ if (bank == UOP_BANK_EXT) uop_fatal("ERROR: bank2buffer(): bank == UOP_BANK_EXT!");
+ if (operand == UOP_DST_EXT_Y) uop_fatal("ERROR: bank2buffer(): operand == UOP_DST_EXT_Y!");
+ if (!bank_flags[bank][operand])
+ uop_fatal("ERROR: bank2buffer(): !bank_flags[bank][operand]!");
+
+ fpga_multiword_copy(&BANK_INT[bank][operand], buffer);
+}
+
+
+//------------------------------------------------------------------------------
+static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand)
+//------------------------------------------------------------------------------
+{
+ if (bank == UOP_BANK_EXT) uop_fatal("ERROR: buffer2bank(): bank == UOP_BANK_EXT!");
+ if (operand == UOP_SRC_EXT_ZERO) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ZERO!");
+ if (operand == UOP_SRC_EXT_ONE) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ONE!");
+ if (operand == UOP_SRC_EXT_X) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_X!");
+
+ fpga_multiword_copy(buffer, &BANK_INT[bank][operand]);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_model.cpp b/x25519_fpga_model.cpp
index 1f2101a..0e06712 100644
--- a/x25519_fpga_model.cpp
+++ b/x25519_fpga_model.cpp
@@ -37,6 +37,12 @@
//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+// Mode Switch
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
//------------------------------------------------------------------------------
// Headers
//------------------------------------------------------------------------------
More information about the Commits
mailing list