[Cryptech-Commits] [user/shatov/x25519_fpga_model] 02/02: Cleaned up and optimized microcode.
git at cryptech.is
git at cryptech.is
Mon Jun 4 20:44:34 UTC 2018
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/x25519_fpga_model.
commit 23cc981edb675625484eaff13e440781045a4973
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Jun 4 22:00:57 2018 +0300
Cleaned up and optimized microcode.
I'm pretty sure that Montgomery ladder is as fast as possible, the overhead
is only one bank swap operation which is ~0,8%. I suspect that conversion to
affine coordinates might have some potential for improvement, it does 15 swap
operations, but given that it also does 254 multiplications, the overhead is
~0,6%. In his original paper Bernstein estimated conversion to be ~7% of entire
X25519 computation, in that sense the potential improvement in modular
inversion is negligeable.
---
x25519_fpga_curve_microcode.cpp | 1144 +++++++++++----------------------------
1 file changed, 323 insertions(+), 821 deletions(-)
diff --git a/x25519_fpga_curve_microcode.cpp b/x25519_fpga_curve_microcode.cpp
index 7d40d70..11c8cc6 100644
--- a/x25519_fpga_curve_microcode.cpp
+++ b/x25519_fpga_curve_microcode.cpp
@@ -43,55 +43,12 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
+#include <string.h>
#include "x25519_fpga_model.h"
//------------------------------------------------------------------------------
-// Locals
-//------------------------------------------------------------------------------
-static FPGA_BUFFER LADDER_R0_X;
-static FPGA_BUFFER LADDER_R0_Z;
-
-static FPGA_BUFFER LADDER_R1_X;
-static FPGA_BUFFER LADDER_R1_Z;
-
-static FPGA_BUFFER LADDER_T0_X;
-static FPGA_BUFFER LADDER_T0_Z;
-
-static FPGA_BUFFER LADDER_T1_X;
-static FPGA_BUFFER LADDER_T1_Z;
-
-static FPGA_BUFFER LADDER_S0;
-static FPGA_BUFFER LADDER_S1;
-static FPGA_BUFFER LADDER_D0;
-static FPGA_BUFFER LADDER_D1;
-static FPGA_BUFFER LADDER_QS0;
-static FPGA_BUFFER LADDER_QD0;
-static FPGA_BUFFER LADDER_S0D1;
-static FPGA_BUFFER LADDER_S1D0;
-static FPGA_BUFFER LADDER_TS;
-static FPGA_BUFFER LADDER_TD;
-static FPGA_BUFFER LADDER_QTD;
-static FPGA_BUFFER LADDER_T0;
-static FPGA_BUFFER LADDER_TA;
-static FPGA_BUFFER LADDER_T1;
-
-static FPGA_BUFFER REDUCE_R1;
-static FPGA_BUFFER REDUCE_R2;
-static FPGA_BUFFER REDUCE_T_1;
-static FPGA_BUFFER REDUCE_T_10;
-static FPGA_BUFFER REDUCE_T_1001;
-static FPGA_BUFFER REDUCE_T_1011;
-static FPGA_BUFFER REDUCE_T_X5;
-static FPGA_BUFFER REDUCE_T_X10;
-static FPGA_BUFFER REDUCE_T_X20;
-static FPGA_BUFFER REDUCE_T_X40;
-static FPGA_BUFFER REDUCE_T_X50;
-static FPGA_BUFFER REDUCE_T_X100;
-
-
-//------------------------------------------------------------------------------
-// Error Handle
+// Error Handler
//------------------------------------------------------------------------------
#define uop_fatal(msg) {(void)printf("%s\n",msg);exit(EXIT_FAILURE);}
@@ -99,159 +56,113 @@ static FPGA_BUFFER REDUCE_T_X100;
//------------------------------------------------------------------------------
// Storage Buffers
//------------------------------------------------------------------------------
-static FPGA_BUFFER BANK_INT[4][64];
-static bool bank_flags[4][64];
-static FPGA_BUFFER BANK_EXT_X;
-static FPGA_BUFFER BANK_EXT_Y;
+static FPGA_BUFFER BUF_LO[64];
+static FPGA_BUFFER BUF_HI[64];
+
+static bool buf_flag_lo[64];
+static bool buf_flag_hi[64];
+
//------------------------------------------------------------------------------
enum UOP_BANK
//------------------------------------------------------------------------------
{
- UOP_BANK_INT_A = 0,
- UOP_BANK_INT_B = 1,
- UOP_BANK_INT_C = 2,
- UOP_BANK_INT_D = 3,
- UOP_BANK_EXT
+ BANK_LO, BANK_HI
};
+
//------------------------------------------------------------------------------
-enum UOP_SRC_OPERAND
+enum UOP_OPERAND
//------------------------------------------------------------------------------
{
- UOP_SRC_INT_LADDER_R0_X = 0,
- UOP_SRC_INT_LADDER_R0_Z = 1,
-
- UOP_SRC_INT_LADDER_R1_X = 2,
- UOP_SRC_INT_LADDER_R1_Z = 3,
-
- UOP_SRC_INT_LADDER_T0_X = 4,
- UOP_SRC_INT_LADDER_T0_Z = 5,
-
- UOP_SRC_INT_LADDER_T1_X = 6,
- UOP_SRC_INT_LADDER_T1_Z = 7,
-
- UOP_SRC_INT_LADDER_S0 = 8,
- UOP_SRC_INT_LADDER_S1 = 9,
- UOP_SRC_INT_LADDER_D0 = 10,
- UOP_SRC_INT_LADDER_D1 = 11,
- UOP_SRC_INT_LADDER_QS0 = 12,
- UOP_SRC_INT_LADDER_QD0 = 13,
- UOP_SRC_INT_LADDER_S0D1 = 14,
- UOP_SRC_INT_LADDER_S1D0 = 15,
- UOP_SRC_INT_LADDER_TS = 16,
- UOP_SRC_INT_LADDER_TD = 17,
- UOP_SRC_INT_LADDER_QTD = 18,
- UOP_SRC_INT_LADDER_T0 = 19,
- UOP_SRC_INT_LADDER_TA = 20,
- UOP_SRC_INT_LADDER_T1 = 21,
-
- UOP_SRC_INT_REDUCE_R1 = 22,
- UOP_SRC_INT_REDUCE_R2 = 23,
- UOP_SRC_INT_REDUCE_T_1 = 24,
- UOP_SRC_INT_REDUCE_T_10 = 25,
- UOP_SRC_INT_REDUCE_T_1001 = 26,
- UOP_SRC_INT_REDUCE_T_1011 = 27,
- UOP_SRC_INT_REDUCE_T_X5 = 28,
- UOP_SRC_INT_REDUCE_T_X10 = 29,
- UOP_SRC_INT_REDUCE_T_X20 = 30,
- UOP_SRC_INT_REDUCE_T_X40 = 31,
- UOP_SRC_INT_REDUCE_T_X50 = 32,
- UOP_SRC_INT_REDUCE_T_X100 = 33,
-
- UOP_SRC_EXT_ZERO,
- UOP_SRC_EXT_ONE,
-
- UOP_SRC_EXT_A24,
-
- UOP_SRC_EXT_X
+ CONST_ZERO = 0,
+ CONST_ONE = 1,
+ CONST_A24 = 2,
+
+ LADDER_R0_X = 3,
+ LADDER_R0_Z = 4,
+
+ LADDER_R1_X = 5,
+ LADDER_R1_Z = 6,
+
+ LADDER_T0_X = 7,
+ LADDER_T0_Z = 8,
+
+ LADDER_T1_X = 9,
+ LADDER_T1_Z = 10,
+
+ LADDER_S0 = 11,
+ LADDER_S1 = 12,
+
+ LADDER_D0 = 13,
+ LADDER_D1 = 14,
+
+ LADDER_QS0 = 15,
+ LADDER_QD0 = 16,
+
+ LADDER_S0D1 = 17,
+ LADDER_S1D0 = 18,
+
+ LADDER_TS = 19,
+ LADDER_TD = 20,
+
+ LADDER_QTD = 21,
+
+ LADDER_T0 = 22,
+ LADDER_TA = 23,
+ LADDER_T1 = 24,
+
+ LADDER_P_X = 25,
+
+ LADDER_DUMMY = 26,
+
+ REDUCE_R1 = 27,
+ REDUCE_R2 = 28,
+
+ REDUCE_T_1 = 29,
+ REDUCE_T_10 = 30,
+ REDUCE_T_1001 = 31,
+ REDUCE_T_1011 = 32,
+
+ REDUCE_T_X5 = 33,
+ REDUCE_T_X10 = 34,
+ REDUCE_T_X20 = 35,
+ REDUCE_T_X40 = 36,
+ REDUCE_T_X50 = 37,
+ REDUCE_T_X100 = 38
};
//------------------------------------------------------------------------------
-enum UOP_DST_OPERAND
+enum UOP_MODULUS
//------------------------------------------------------------------------------
{
- UOP_DST_INT_LADDER_R0_X = 0,
- UOP_DST_INT_LADDER_R0_Z = 1,
-
- UOP_DST_INT_LADDER_R1_X = 2,
- UOP_DST_INT_LADDER_R1_Z = 3,
-
- UOP_DST_INT_LADDER_T0_X = 4,
- UOP_DST_INT_LADDER_T0_Z = 5,
-
- UOP_DST_INT_LADDER_T1_X = 6,
- UOP_DST_INT_LADDER_T1_Z = 7,
-
- UOP_DST_INT_LADDER_S0 = 8,
- UOP_DST_INT_LADDER_S1 = 9,
- UOP_DST_INT_LADDER_D0 = 10,
- UOP_DST_INT_LADDER_D1 = 11,
- UOP_DST_INT_LADDER_QS0 = 12,
- UOP_DST_INT_LADDER_QD0 = 13,
- UOP_DST_INT_LADDER_S0D1 = 14,
- UOP_DST_INT_LADDER_S1D0 = 15,
- UOP_DST_INT_LADDER_TS = 16,
- UOP_DST_INT_LADDER_TD = 17,
- UOP_DST_INT_LADDER_QTD = 18,
- UOP_DST_INT_LADDER_T0 = 19,
- UOP_DST_INT_LADDER_TA = 20,
- UOP_DST_INT_LADDER_T1 = 21,
-
- UOP_DST_INT_REDUCE_R1 = 22,
- UOP_DST_INT_REDUCE_R2 = 23,
- UOP_DST_INT_REDUCE_T_1 = 24,
- UOP_DST_INT_REDUCE_T_10 = 25,
- UOP_DST_INT_REDUCE_T_1001 = 26,
- UOP_DST_INT_REDUCE_T_1011 = 27,
- UOP_DST_INT_REDUCE_T_X5 = 28,
- UOP_DST_INT_REDUCE_T_X10 = 29,
- UOP_DST_INT_REDUCE_T_X20 = 30,
- UOP_DST_INT_REDUCE_T_X40 = 31,
- UOP_DST_INT_REDUCE_T_X50 = 32,
- UOP_DST_INT_REDUCE_T_X100 = 33,
-
- UOP_DST_EXT_Y,
-
- UOP_DST_DUMMY
+ MOD_1P,
+ MOD_2P
};
-void dump_fpga_buffer(const char *msg, const FPGA_BUFFER *buf)
-{
- printf("%s", msg);
- for (int i=FPGA_OPERAND_NUM_WORDS; i>0; i--)
- printf("%08x ", buf->words[i]);
- printf("\n");
-}
//------------------------------------------------------------------------------
-// Prototypes
+enum UOP_MATH
//------------------------------------------------------------------------------
-static void uop_move (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
- UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
- UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
- UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+{
+ ADD, SUB, MUL
+};
-static void uop_add (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
- UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
- UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
- UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
- FPGA_BUFFER *modulus);
-static void uop_sub (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
- UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
- UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
- UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2,
- FPGA_BUFFER *modulus);
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+static void uop_move (UOP_BANK src, UOP_OPERAND s_op1, UOP_OPERAND s_op2,
+ UOP_BANK dst, UOP_OPERAND d_op1, UOP_OPERAND d_op2);
-static void uop_mul (UOP_BANK src_bank_1, UOP_SRC_OPERAND src_operand_1,
- UOP_BANK src_bank_2, UOP_SRC_OPERAND src_operand_2,
- UOP_BANK dst_bank_1, UOP_DST_OPERAND dst_operand_1,
- UOP_BANK dst_bank_2, UOP_DST_OPERAND dst_operand_2);
+static void uop_calc (UOP_MATH math,
+ UOP_BANK src, UOP_OPERAND s_op1, UOP_OPERAND s_op2,
+ UOP_BANK dst, UOP_OPERAND d_op,
+ UOP_MODULUS mod);
-static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer);
-static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand);
+static void uop_load (FPGA_BUFFER *mem, UOP_BANK dst, UOP_OPERAND d_op);
+static void uop_stor (UOP_BANK src, UOP_OPERAND s_op, FPGA_BUFFER *mem);
//------------------------------------------------------------------------------
@@ -268,761 +179,352 @@ static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND oper
void fpga_curve_scalar_multiply_microcode(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
//------------------------------------------------------------------------------
{
+ bool k_bit, s; // 1-bit values
+ FPGA_WORD k_word; // current word of multiplier
int word_count, bit_count, cyc_count; // counters
+ // reset bank flags
+ (void)memset(buf_flag_lo, 0, sizeof buf_flag_lo);
+ (void)memset(buf_flag_hi, 0, sizeof buf_flag_hi);
- int i, j;
- for (i=0; i<4; i++)
- for (j=0; j<64; j++)
- bank_flags[i][j] = false;
-
-
- // pre
- fpga_multiword_copy(PX, &BANK_EXT_X);
-
- // initialization
-
+ // initialize internal banks
+ fpga_multiword_copy(&X25519_ZERO, &BUF_LO[CONST_ZERO]);
+ fpga_multiword_copy(&X25519_ZERO, &BUF_HI[CONST_ZERO]);
- // fpga_multiword_copy(&X25519_ONE, &LADDER_R0_X);
- // fpga_multiword_copy(&X25519_ZERO, &LADDER_R0_Z);
- uop_move (UOP_BANK_EXT, UOP_SRC_EXT_ONE, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+ fpga_multiword_copy(&X25519_ONE, &BUF_LO[CONST_ONE]);
+ fpga_multiword_copy(&X25519_ONE, &BUF_HI[CONST_ONE]);
- // fpga_multiword_copy(PX, &LADDER_R1_X);
- // fpga_multiword_copy(&X25519_ONE, &LADDER_R1_Z);
- uop_move (UOP_BANK_EXT, UOP_SRC_EXT_X, UOP_BANK_EXT, UOP_SRC_EXT_ONE,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+ fpga_multiword_copy(&X25519_A24, &BUF_LO[CONST_A24]);
+ fpga_multiword_copy(&X25519_A24, &BUF_HI[CONST_A24]);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
- //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, &LADDER_R1_X);
- //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z, &LADDER_R1_Z);
+ buf_flag_lo[CONST_ZERO] = true;
+ buf_flag_hi[CONST_ZERO] = true;
+ buf_flag_lo[CONST_ONE] = true;
+ buf_flag_hi[CONST_ONE] = true;
+ buf_flag_lo[CONST_A24] = true;
+ buf_flag_hi[CONST_A24] = true;
+ // initialization
+ uop_load(PX, BANK_HI, LADDER_P_X);
+ uop_move(BANK_HI, CONST_ONE, CONST_ZERO, BANK_LO, LADDER_R0_X, LADDER_R0_Z);
+ uop_move(BANK_HI, LADDER_P_X, CONST_ONE, BANK_LO, LADDER_R1_X, LADDER_R1_Z);
- FPGA_WORD k_word;
- bool k_bit, s = false;
-
+ // ladder
+ s = false;
for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
{
for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
{
- k_word = K->words[word_count - 1] >> (bit_count - 1);
- k_bit = (k_word & (FPGA_WORD)1) == 1;
+ k_word = K->words[word_count - 1] >> (bit_count - 1); // current word
+ k_bit = (k_word & (FPGA_WORD)1) == 1; // current bit
- if (s == k_bit)
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
- else
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
+ // inputs are all in LO: R0_X, R0_Z, R1_X, R1_Z
+ // swap if needed
if (s == k_bit)
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R1_Z,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+ { uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z); // HI: T0_X, T0_Z = LO: R0_X, R0_Z
+ uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z); // HI: T1_X, T1_Z = LO: R1_X, R1_Z
+ }
else
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_Z);
+ { uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z); // HI: T0_X, T0_Z = LO: R1_X, R1_Z
+ uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z); // HI: T1_X, T1_Z = LO: R0_X, R0_Z
+ }
+ // remember whether we actually did the swap
s = k_bit;
- uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_S0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_S0,
- &X25519_2P);
-
- uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_S1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_S1,
- &X25519_2P);
+ // run step
+ uop_calc(ADD, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_S0, MOD_2P); // LO: S0 = HI: T0_X + T0_Z
+ uop_calc(ADD, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_S1, MOD_2P); // LO: S1 = HI: T1_X + T1_Z
+ uop_calc(SUB, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_D0, MOD_2P); // LO: D0 = HI: T0_X - T0_Z
+ uop_calc(SUB, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_D1, MOD_2P); // LO: D1 = HI: T1_X - T1_Z
- uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_D0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_D0,
- &X25519_2P);
+ uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_S0, BANK_HI, LADDER_QS0, MOD_2P); // HI: QS0 = LO: S0 * S0
+ uop_calc(MUL, BANK_LO, LADDER_D0, LADDER_D0, BANK_HI, LADDER_QD0, MOD_2P); // HI: QD0 = LO: D0 * D0
+ uop_calc(MUL, BANK_LO, LADDER_S0, LADDER_D1, BANK_HI, LADDER_S0D1, MOD_2P); // HI: S0D1 = LO: S0 * D1
+ uop_calc(MUL, BANK_LO, LADDER_S1, LADDER_D0, BANK_HI, LADDER_S1D0, MOD_2P); // HI: S1D0 = LO: S1 * D0
- uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T1_Z,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_D1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_D1,
- &X25519_2P);
+ uop_calc(ADD, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TS, MOD_2P); // LO: TS = HI: S1D0 + S0D1
+ uop_calc(SUB, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TD, MOD_2P); // LO: TD = HI: S1D0 - S0D1
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_S0,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QS0);
+ uop_calc(MUL, BANK_LO, LADDER_TD, LADDER_TD, BANK_HI, LADDER_QTD, MOD_2P); // HI: QTD = LO: TD * TD
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_D0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_QD0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QD0);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D1,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_S0D1, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S0D1);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_S1, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_D0,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_DST_INT_LADDER_S1D0);
-
- uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_TS, UOP_BANK_INT_B, UOP_DST_INT_LADDER_TS,
- &X25519_2P);
-
- uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_S1D0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_S0D1,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_TD, UOP_BANK_INT_B, UOP_DST_INT_LADDER_TD,
- &X25519_2P);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TD, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TD,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_QTD, UOP_BANK_INT_D, UOP_DST_INT_LADDER_QTD);
-
- uop_sub (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_T0, UOP_BANK_INT_B, UOP_DST_INT_LADDER_T0,
- &X25519_2P);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0, UOP_BANK_EXT, UOP_SRC_EXT_A24,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_TA, UOP_BANK_INT_D, UOP_DST_INT_LADDER_TA);
-
- uop_add (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_TA, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_T1, UOP_BANK_INT_B, UOP_DST_INT_LADDER_T1,
- &X25519_2P);
-
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QS0, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_QD0,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_X);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_T0, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_T1,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_Z, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T0_Z);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_TS, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_TS,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_T1_X);
-
- uop_mul (UOP_BANK_EXT, UOP_SRC_EXT_X, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_QTD,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_Z, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R1_Z);
+ uop_calc(SUB, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_T0, MOD_2P); // LO: T0 = HI: QS0 - QD0
+ uop_calc(MUL, BANK_LO, LADDER_T0, CONST_A24, BANK_HI, LADDER_TA, MOD_2P); // HI: TA = LO: T0 * A24
+ uop_calc(ADD, BANK_HI, LADDER_TA, LADDER_QD0, BANK_LO, LADDER_T1, MOD_2P); // LO: T1 = HI: TA * QD0
+
+ uop_calc(MUL, BANK_HI, LADDER_QS0, LADDER_QD0, BANK_LO, LADDER_R0_X, MOD_2P); // LO: R0_X = HI: QS0 * QD0
+ uop_calc(MUL, BANK_LO, LADDER_T0, LADDER_T1, BANK_HI, LADDER_R0_Z, MOD_2P); // HI: R0_Z = LO: T0 * T1
+ uop_calc(MUL, BANK_LO, LADDER_TS, LADDER_TS, BANK_HI, LADDER_R1_X, MOD_2P); // HI: R1_X = LO: TS * TS
+ uop_calc(MUL, BANK_HI, LADDER_P_X, LADDER_QTD, BANK_LO, LADDER_R1_Z, MOD_2P); // LO: R1_Z = HI: PX * QTD
- uop_move (UOP_BANK_INT_C, UOP_SRC_INT_LADDER_T1_X, UOP_BANK_INT_D, UOP_SRC_INT_LADDER_T0_Z,
- UOP_BANK_INT_A, UOP_DST_INT_LADDER_R1_X, UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z);
+ uop_move(BANK_HI, LADDER_R0_Z, LADDER_R1_X, BANK_LO, LADDER_R0_Z, LADDER_R1_X); // LO: R0_Z, R1_X = HI: R0_Z, R1_X
}
}
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_LADDER_R0_X, &LADDER_R0_X);
- //bank2buffer(UOP_BANK_INT_B, UOP_DST_INT_LADDER_R0_Z, &LADDER_R0_Z);
+ // T_1
+ uop_move(BANK_HI, LADDER_R0_Z, LADDER_R0_Z, BANK_LO, REDUCE_T_1, REDUCE_T_1);
+ uop_move(BANK_LO, REDUCE_T_1, REDUCE_T_1, BANK_HI, REDUCE_T_1, REDUCE_T_1);
- // since the lower three bits of the private key are always 000,
- // the result is in R0X, R0z
+ // T_10
+ uop_calc(MUL, BANK_LO, REDUCE_T_1, REDUCE_T_1, BANK_HI, REDUCE_T_10, MOD_2P);
- // conversion to affine coordinates
- //fpga_multiword_copy(&LADDER_R0_Z, &REDUCE_T_1);
+ // T_1001
+ uop_calc(MUL, BANK_HI, REDUCE_T_10, REDUCE_T_10, BANK_LO, REDUCE_R1, MOD_2P);
+ uop_calc(MUL, BANK_LO, REDUCE_R1, REDUCE_R1, BANK_HI, REDUCE_R2, MOD_2P);
+ uop_calc(MUL, BANK_HI, REDUCE_R2, REDUCE_T_1, BANK_LO, REDUCE_T_1001, MOD_2P);
- uop_add (UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1,
- &X25519_2P);
+ // T_1011
+ uop_move(BANK_HI, REDUCE_T_10, REDUCE_T_10, BANK_LO, REDUCE_T_10, REDUCE_T_10);
+ uop_calc(MUL, BANK_LO, REDUCE_T_1001, REDUCE_T_10, BANK_HI, REDUCE_T_1011, MOD_2P);
- uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_1);
+ // T_X5
+ uop_calc(MUL, BANK_HI, REDUCE_T_1011, REDUCE_T_1011, BANK_LO, REDUCE_R1, MOD_2P);
+ uop_calc(MUL, BANK_LO, REDUCE_R1, REDUCE_T_1001, BANK_HI, REDUCE_T_X5, MOD_2P);
- //uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
- // UOP_BANK_INT_C, UOP_DST_INT_LADDER_T0_X, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1);
-
-
- //
- //fpga_modular_mul(&REDUCE_T_1, &REDUCE_T_1, &REDUCE_T_10, &X25519_2P);
- //
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_10);
-
-
-// fpga_modular_mul(&REDUCE_T_10, &REDUCE_T_10, &REDUCE_R1, &X25519_2P);
-// fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
-// fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1, &REDUCE_T_1001, &X25519_2P);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_1001);
-
-
- //
- //fpga_modular_mul(&REDUCE_T_1001, &REDUCE_T_10, &REDUCE_T_1011, &X25519_2P);
- //
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_10, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_10,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_10, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_10);
-
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_1001, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_10,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_1011);
-
-
-
- //fpga_modular_mul(&REDUCE_T_1011, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
- //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_1001, &REDUCE_T_X5, &X25519_2P);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_1011, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1011,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_1001,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X5);
-
-
-
-
- //
- //fpga_multiword_copy(&REDUCE_T_X5, &REDUCE_R1);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X5, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X5,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, &REDUCE_R1);
-
- for (cyc_count=0; cyc_count<5; cyc_count++)
- { if (!(cyc_count % 2))
-
- //fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
-
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- //fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
+ // T_X10
+ uop_move(BANK_HI, REDUCE_T_X5, REDUCE_T_X5, BANK_LO, REDUCE_R1, REDUCE_R1);
+ for (cyc_count=0; cyc_count<4; cyc_count++)
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_LO, REDUCE_R1, REDUCE_R1, BANK_HI, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_HI, REDUCE_R2, REDUCE_R2, BANK_LO, REDUCE_R1, MOD_2P);
- //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X5, &REDUCE_T_X10, &X25519_2P);
- //
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X5,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X10);
+ uop_calc(MUL, BANK_LO, REDUCE_R1, REDUCE_R1, BANK_HI, REDUCE_R2, MOD_2P);
+ uop_calc(MUL, BANK_HI, REDUCE_R2, REDUCE_T_X5, BANK_LO, REDUCE_T_X10, MOD_2P);
-
-
- //fpga_multiword_copy(&REDUCE_T_X10, &REDUCE_R1);
- //for (cyc_count=0; cyc_count<10; cyc_count++)
- //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
- //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
- //}
- //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X20, &X25519_2P);
-
- uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X10,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ // T_X20
+ uop_move(BANK_LO, REDUCE_T_X10, REDUCE_T_X10, BANK_HI, REDUCE_R1, REDUCE_R1);
+ uop_move(BANK_LO, REDUCE_T_X10, REDUCE_T_X10, BANK_HI, REDUCE_T_X10, REDUCE_T_X10);
for (cyc_count=0; cyc_count<10; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X10, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X20);
-
-
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_T_X10, BANK_LO, REDUCE_T_X20, MOD_2P);
-
-
- //
- //fpga_multiword_copy(&REDUCE_T_X20, &REDUCE_R1);
- //for (cyc_count=0; cyc_count<20; cyc_count++)
- //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
- //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
- //}
- //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X20, &REDUCE_T_X40, &X25519_2P);
-
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X20,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X20, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X20);
+ // T_X40
+ uop_move(BANK_LO, REDUCE_T_X20, REDUCE_T_X20, BANK_HI, REDUCE_R1, REDUCE_R1);
+ uop_move(BANK_LO, REDUCE_T_X20, REDUCE_T_X20, BANK_HI, REDUCE_T_X20, REDUCE_T_X20);
for (cyc_count=0; cyc_count<20; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_T_X20, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X40);
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
-
- //
-// fpga_multiword_copy(&REDUCE_T_X40, &REDUCE_R1);
-// for (cyc_count=0; cyc_count<10; cyc_count++)
-// { if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
-// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
-// }
-// fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X10, &REDUCE_T_X50, &X25519_2P);
- //
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X40, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X40,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_T_X20, BANK_LO, REDUCE_T_X40, MOD_2P);
- //uop_move (UOP_BANK_INT_, UOP_SRC_INT_REDUCE_, UOP_BANK_INT_, UOP_SRC_INT_REDUCE_,
- // UOP_BANK_INT_, UOP_DST_INT_REDUCE_, UOP_BANK_INT_, UOP_DST_INT_REDUCE_);
+ // T_X50
+ uop_move(BANK_LO, REDUCE_T_X40, REDUCE_T_X40, BANK_HI, REDUCE_R1, REDUCE_R1);
for (cyc_count=0; cyc_count<10; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X10,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X50);
-
-
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_T_X10, BANK_LO, REDUCE_T_X50, MOD_2P);
-
-
-
-
-// fpga_multiword_copy(&REDUCE_T_X50, &REDUCE_R1);
-// for (cyc_count=0; cyc_count<50; cyc_count++)
-// { if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
-// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
-// }
-// fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X50, &REDUCE_T_X100, &X25519_2P);
- //
-
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X50, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X50,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X50, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X50);
+ // T_X100
+ uop_move(BANK_LO, REDUCE_T_X50, REDUCE_T_X50, BANK_HI, REDUCE_R1, REDUCE_R1);
+ uop_move(BANK_LO, REDUCE_T_X50, REDUCE_T_X50, BANK_HI, REDUCE_T_X50, REDUCE_T_X50);
for (cyc_count=0; cyc_count<50; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X50,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_T_X100);
-
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_T_X50, BANK_LO, REDUCE_T_X100, MOD_2P);
-
-
-
-
-
-
- //fpga_multiword_copy(&REDUCE_T_X100, &REDUCE_R1);
- //for (cyc_count=0; cyc_count<100; cyc_count++)
- //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
- //else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
- //}
- //
- //fpga_modular_mul(&REDUCE_R1, &REDUCE_T_X100, &REDUCE_R2, &X25519_2P);
- //
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_T_X100, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_X100,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X100, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_T_X100);
+ uop_move(BANK_LO, REDUCE_T_X100, REDUCE_T_X100, BANK_HI, REDUCE_R1, REDUCE_R1);
+ uop_move(BANK_LO, REDUCE_T_X100, REDUCE_T_X100, BANK_HI, REDUCE_T_X100, REDUCE_T_X100);
for (cyc_count=0; cyc_count<100; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X100,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_T_X100, BANK_LO, REDUCE_R2, MOD_2P);
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
-
-
-// for (cyc_count=0; cyc_count<50; cyc_count++)
-// { if ((cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P); // !!! (swapped sides)
-// else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
-// }
- //
for (cyc_count=0; cyc_count<50; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
-
-
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
+ else uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_T_X50, BANK_HI, REDUCE_R1, MOD_2P);
- //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_X50, &REDUCE_R1, &X25519_2P);
- //fpga_multiword_copy(&REDUCE_R1, &REDUCE_R2);
+ for (cyc_count=0; cyc_count<4; cyc_count++)
+ if (!(cyc_count % 2)) uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ else uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_R2, BANK_HI, REDUCE_R1, MOD_2P);
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_T_X50,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
-
- //for (cyc_count=0; cyc_count<5; cyc_count++)
- //{ if (!(cyc_count % 2)) fpga_modular_mul(&REDUCE_R1, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
- // else fpga_modular_mul(&REDUCE_R2, &REDUCE_R2, &REDUCE_R1, &X25519_2P);
- //}
-
- for (cyc_count=0; cyc_count<5; cyc_count++)
- { if (!(cyc_count % 2))
- uop_mul (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
- else
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_R2,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
- }
-
-
- // ��������� � R2
-
- //
- //fpga_modular_mul(&REDUCE_R2, &REDUCE_T_1011, &REDUCE_R1, &X25519_2P);
- //fpga_modular_mul(&LADDER_R0_X, &REDUCE_R1, &REDUCE_R2, &X25519_2P);
-
-
- uop_mul (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_SRC_INT_REDUCE_T_1011,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1);
-
- uop_move (UOP_BANK_INT_A, UOP_SRC_INT_LADDER_R0_X, UOP_BANK_INT_B, UOP_SRC_INT_LADDER_R0_Z,
- UOP_BANK_INT_C, UOP_DST_INT_LADDER_R0_X, UOP_BANK_INT_D, UOP_DST_INT_LADDER_R0_Z);
-
- uop_mul (UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_C, UOP_SRC_INT_LADDER_R0_X,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_INT_B, UOP_DST_INT_REDUCE_R2);
+ uop_calc(MUL, BANK_HI, REDUCE_R1, REDUCE_R1, BANK_LO, REDUCE_R2, MOD_2P);
+ uop_move(BANK_HI, REDUCE_T_1011, REDUCE_T_1011, BANK_LO, REDUCE_T_1011, REDUCE_T_X100);
+ uop_calc(MUL, BANK_LO, REDUCE_R2, REDUCE_T_1011, BANK_HI, REDUCE_R2, MOD_2P);
+ uop_move(BANK_HI, REDUCE_R2, REDUCE_R2, BANK_LO, REDUCE_R2, REDUCE_R2);
+ uop_calc(MUL, BANK_LO, REDUCE_R2, LADDER_R0_X, BANK_HI, REDUCE_R1, MOD_2P);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1, &REDUCE_T_1);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_10, &REDUCE_T_10);
- //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_1001, &REDUCE_T_1001);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_1011, &REDUCE_T_1011);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X5, &REDUCE_T_X5);
- //bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_T_X10, &REDUCE_T_X10);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X20, &REDUCE_T_X20);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X40, &REDUCE_T_X40);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X50, &REDUCE_T_X50);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_T_X100, &REDUCE_T_X100);
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, &REDUCE_R2);
-
-
- //bank2buffer(UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, &REDUCE_R2);
-
- //
- //fpga_modular_add(&REDUCE_R2, &X25519_ZERO, QX, &X25519_1P); // 1P!
-
- uop_add (UOP_BANK_INT_A, UOP_SRC_INT_REDUCE_R2, UOP_BANK_EXT, UOP_SRC_EXT_ZERO,
- UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_DST_INT_REDUCE_R1,
- &X25519_1P);
-
- uop_move (UOP_BANK_INT_C, UOP_SRC_INT_REDUCE_R1, UOP_BANK_INT_D, UOP_SRC_INT_REDUCE_R1,
- UOP_BANK_INT_A, UOP_DST_INT_REDUCE_R2, UOP_BANK_EXT, UOP_DST_EXT_Y);
-
- // DST_EXT_Y -> QX
- bank2buffer(UOP_BANK_INT_C, UOP_DST_INT_REDUCE_R1, QX);
+ // finally reduce to just 1*P
+ uop_calc(ADD, BANK_HI, REDUCE_R1, CONST_ZERO, BANK_LO, REDUCE_R2, MOD_1P); // !!!
+ uop_stor(BANK_LO, REDUCE_R2, QX);
}
//------------------------------------------------------------------------------
-static void uop_move (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
- UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
- UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
- UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+static void uop_move (UOP_BANK src, UOP_OPERAND s_op1, UOP_OPERAND s_op2,
+ UOP_BANK dst, UOP_OPERAND d_op1, UOP_OPERAND d_op2)
//------------------------------------------------------------------------------
{
- if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
- if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
-
- if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
+ FPGA_BUFFER *s_ptr1 = NULL;
+ FPGA_BUFFER *s_ptr2 = NULL;
+ FPGA_BUFFER *d_ptr1 = NULL;
+ FPGA_BUFFER *d_ptr2 = NULL;
- if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
- if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
-
- if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
- if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
-
- FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
- FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
-
- if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
- if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
-
- if (src_bank_x == UOP_BANK_EXT)
- { switch(src_operand_x)
- { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
- }
+ // same bank?
+ if (src == dst) uop_fatal("ERROR: uop_move(): src == dst");
+
+ // same operands?
+ //if (s_op1 == d_op1) uop_fatal("ERROR: uop_move(): s_op1 == s_op2");
+ //if (d_op1 == d_op2) uop_fatal("ERROR: uop_move(): d_op1 == d_op2");
+
+ // source filled?
+ if (src == BANK_LO)
+ { if (!buf_flag_lo[s_op1])
+ uop_fatal("ERROR: uop_move(): !buf_flag_lo[s_op1]");
+ if (!buf_flag_lo[s_op2])
+ uop_fatal("ERROR: uop_move(): !buf_flag_lo[s_op2]");
+ s_ptr1 = &BUF_LO[s_op1];
+ s_ptr2 = &BUF_LO[s_op2];
}
-
- if (src_bank_y == UOP_BANK_EXT)
- { switch(src_operand_y)
- { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
- }
+ if (src == BANK_HI)
+ { if (!buf_flag_hi[s_op1])
+ uop_fatal("ERROR: uop_move(): !buf_flag_hi[s_op1]");
+ if (!buf_flag_hi[s_op2])
+ uop_fatal("ERROR: uop_move(): !buf_flag_hi[s_op2]");
+ s_ptr1 = &BUF_HI[s_op1];
+ s_ptr2 = &BUF_HI[s_op2];
}
- if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
- if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
-
- if (dst_bank_x == UOP_BANK_EXT)
- { switch(dst_operand_x)
- { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
- }
+ if (d_op1 == CONST_ZERO) uop_fatal("ERROR: uop_move(): d_op1 == CONST_ZERO");
+ if (d_op2 == CONST_ZERO) uop_fatal("ERROR: uop_move(): d_op2 == CONST_ZERO");
+ if (d_op1 == CONST_ONE) uop_fatal("ERROR: uop_move(): d_op1 == CONST_ONE");
+ if (d_op2 == CONST_ONE) uop_fatal("ERROR: uop_move(): d_op2 == CONST_ONE");
+ if (d_op1 == CONST_A24) uop_fatal("ERROR: uop_move(): d_op1 == CONST_A24");
+ if (d_op2 == CONST_A24) uop_fatal("ERROR: uop_move(): d_op2 == CONST_A24");
+
+ if (dst == BANK_LO)
+ { buf_flag_lo[d_op1] = true;
+ buf_flag_lo[d_op2] = true;
+ d_ptr1 = &BUF_LO[d_op1];
+ d_ptr2 = &BUF_LO[d_op2];
}
-
- if (dst_bank_y == UOP_BANK_EXT)
- { switch(dst_operand_y)
- { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
- }
+ if (dst == BANK_HI)
+ { buf_flag_hi[d_op1] = true;
+ buf_flag_hi[d_op2] = true;
+ d_ptr1 = &BUF_HI[d_op1];
+ d_ptr2 = &BUF_HI[d_op2];
}
- fpga_multiword_copy(ptr_src_x, ptr_dst_x);
- fpga_multiword_copy(ptr_src_y, ptr_dst_y);
-
- if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
- if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+ fpga_multiword_copy(s_ptr1, d_ptr1);
+ fpga_multiword_copy(s_ptr2, d_ptr2);
}
//------------------------------------------------------------------------------
-static void uop_add (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
- UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
- UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
- UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
- FPGA_BUFFER *modulus)
+static void uop_calc (UOP_MATH math,
+ UOP_BANK src, UOP_OPERAND s_op1, UOP_OPERAND s_op2,
+ UOP_BANK dst, UOP_OPERAND d_op,
+ UOP_MODULUS mod)
//------------------------------------------------------------------------------
{
- if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
- if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
-
- if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
-
- if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
- if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
-
- if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
- if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
-
- FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
- FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
-
- if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
- if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
-
- if (src_bank_x == UOP_BANK_EXT)
- { switch(src_operand_x)
- { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
- }
+ FPGA_BUFFER *s_ptr1 = NULL;
+ FPGA_BUFFER *s_ptr2 = NULL;
+ FPGA_BUFFER *d_ptr = NULL;
+ FPGA_BUFFER *n_ptr = NULL;
+
+ // same bank?
+ if (src == dst)
+ uop_fatal("ERROR: uop_calc(): src == dst");
+
+ // same operands?
+ //if (s_op1 == s_op2)
+ //uop_fatal("ERROR: uop_calc(): s_op1 == s_op2");
+
+ // sources filled?
+ if (src == BANK_LO)
+ { if (!buf_flag_lo[s_op1])
+ uop_fatal("ERROR: uop_calc(): !buf_flag_lo[s_op1]");
+ if (!buf_flag_lo[s_op2])
+ uop_fatal("ERROR: uop_calc(): !buf_flag_lo[s_op2]");
+ s_ptr1 = &BUF_LO[s_op1];
+ s_ptr2 = &BUF_LO[s_op2];
}
-
- if (src_bank_y == UOP_BANK_EXT)
- { switch(src_operand_y)
- { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
- }
+ if (src == BANK_HI)
+ { if (!buf_flag_hi[s_op1])
+ uop_fatal("ERROR: uop_calc(): !buf_flag_hi[s_op1]");
+ if (!buf_flag_hi[s_op2])
+ uop_fatal("ERROR: uop_calc(): !buf_flag_hi[s_op2]");
+ s_ptr1 = &BUF_HI[s_op1];
+ s_ptr2 = &BUF_HI[s_op2];
}
- if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
- if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
+ if (d_op == CONST_ZERO) uop_fatal("ERROR: uop_calc(): d_op == CONST_ZERO");
+ if (d_op == CONST_ONE) uop_fatal("ERROR: uop_calc(): d_op == CONST_ONE");
+ if (d_op == CONST_A24) uop_fatal("ERROR: uop_calc(): d_op == CONST_A24");
- if (dst_bank_x == UOP_BANK_EXT)
- { switch(dst_operand_x)
- { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
- }
+ if (dst == BANK_LO)
+ { buf_flag_lo[d_op] = true;
+ d_ptr = &BUF_LO[d_op];
}
-
- if (dst_bank_y == UOP_BANK_EXT)
- { switch(dst_operand_y)
- { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
- }
+ if (dst == BANK_HI)
+ { buf_flag_hi[d_op] = true;
+ d_ptr = &BUF_HI[d_op];
}
- FPGA_BUFFER S;
- fpga_modular_add(ptr_src_x, ptr_src_y, &S, modulus);
- fpga_multiword_copy(&S, ptr_dst_x);
- fpga_multiword_copy(&S, ptr_dst_y);
+ if (mod == MOD_1P) n_ptr = &X25519_1P;
+ if (mod == MOD_2P) n_ptr = &X25519_2P;
- if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
- if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+ if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr, n_ptr);
+ if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr, n_ptr);
+ if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr, n_ptr);
}
//------------------------------------------------------------------------------
-static void uop_sub (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
- UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
- UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
- UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y,
- FPGA_BUFFER *modulus)
+static void uop_load(FPGA_BUFFER *mem, UOP_BANK dst, UOP_OPERAND d_op)
//------------------------------------------------------------------------------
{
- if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
- if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
-
- if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
-
- if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
- if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
-
- if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
- if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
-
- FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
- FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
-
- if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
- if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
-
- if (src_bank_x == UOP_BANK_EXT)
- { switch(src_operand_x)
- { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
- }
+ if (d_op == CONST_ZERO) uop_fatal("ERROR: uop_load(): d_op1 == CONST_ZERO");
+ if (d_op == CONST_ONE) uop_fatal("ERROR: uop_load(): d_op1 == CONST_ONE");
+ if (d_op == CONST_A24) uop_fatal("ERROR: uop_load(): d_op1 == CONST_A24");
+
+ FPGA_BUFFER *d_ptr = NULL;
+ if (dst == BANK_LO)
+ { d_ptr = &BUF_LO[d_op];
+ buf_flag_lo[d_op] = true;
}
-
- if (src_bank_y == UOP_BANK_EXT)
- { switch(src_operand_y)
- { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
- }
+ if (dst == BANK_HI)
+ { d_ptr = &BUF_HI[d_op];
+ buf_flag_hi[d_op] = true;
}
- if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
- if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
-
- if (dst_bank_x == UOP_BANK_EXT)
- { switch(dst_operand_x)
- { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
- }
- }
-
- if (dst_bank_y == UOP_BANK_EXT)
- { switch(dst_operand_y)
- { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
- }
- }
-
- FPGA_BUFFER D;
- fpga_modular_sub(ptr_src_x, ptr_src_y, &D, modulus);
- fpga_multiword_copy(&D, ptr_dst_x);
- fpga_multiword_copy(&D, ptr_dst_y);
-
- if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
- if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
+ fpga_multiword_copy(mem, d_ptr);
}
//------------------------------------------------------------------------------
-static void uop_mul (UOP_BANK src_bank_x, UOP_SRC_OPERAND src_operand_x,
- UOP_BANK src_bank_y, UOP_SRC_OPERAND src_operand_y,
- UOP_BANK dst_bank_x, UOP_DST_OPERAND dst_operand_x,
- UOP_BANK dst_bank_y, UOP_DST_OPERAND dst_operand_y)
+static void uop_stor(UOP_BANK src, UOP_OPERAND s_op, FPGA_BUFFER *mem)
//------------------------------------------------------------------------------
{
- if ((src_bank_x != UOP_BANK_EXT) && (src_bank_x == src_bank_y)) uop_fatal("ERROR: uop_move(): src_bank_x == src_bank_y!");
- if ((src_bank_y != UOP_BANK_EXT) && (src_bank_y == src_bank_x)) uop_fatal("ERROR: uop_move(): src_bank_y == src_bank_x!");
-
- if (dst_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): dst_bank_x == dst_bank_y!");
-
- if (src_bank_x == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_x!");
- if (src_bank_x == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_x == dst_bank_y!");
-
- if (src_bank_y == dst_bank_x) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_x!");
- if (src_bank_y == dst_bank_y) uop_fatal("ERROR: uop_move(): src_bank_y == dst_bank_y!");
-
- FPGA_BUFFER *ptr_src_x, *ptr_dst_x;
- FPGA_BUFFER *ptr_src_y, *ptr_dst_y;
-
- if (src_bank_x != UOP_BANK_EXT) ptr_src_x = &BANK_INT[src_bank_x][src_operand_x];
- if (src_bank_y != UOP_BANK_EXT) ptr_src_y = &BANK_INT[src_bank_y][src_operand_y];
-
- if (src_bank_x == UOP_BANK_EXT)
- { switch(src_operand_x)
- { case UOP_SRC_EXT_ZERO: ptr_src_x = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_x = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_x = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_x = &X25519_A24; break;
- }
- }
-
- if (src_bank_y == UOP_BANK_EXT)
- { switch(src_operand_y)
- { case UOP_SRC_EXT_ZERO: ptr_src_y = &X25519_ZERO; break;
- case UOP_SRC_EXT_ONE: ptr_src_y = &X25519_ONE; break;
- case UOP_SRC_EXT_X: ptr_src_y = &BANK_EXT_X; break;
- case UOP_SRC_EXT_A24: ptr_src_y = &X25519_A24; break;
- }
- }
-
- if (dst_bank_x != UOP_BANK_EXT) ptr_dst_x = &BANK_INT[dst_bank_x][dst_operand_x];
- if (dst_bank_y != UOP_BANK_EXT) ptr_dst_y = &BANK_INT[dst_bank_y][dst_operand_y];
-
- if (dst_bank_x == UOP_BANK_EXT)
- { switch(dst_operand_x)
- { case UOP_DST_EXT_Y: ptr_dst_x = &BANK_EXT_Y; break;
- }
+ FPGA_BUFFER *s_ptr = NULL;
+ if (src == BANK_LO)
+ { if (!buf_flag_lo[s_op])
+ uop_fatal("ERROR: uop_stor(): !buf_flag_lo[s_op]");
+ s_ptr = &BUF_LO[s_op];
+ buf_flag_lo[s_op] = true;
}
-
- if (dst_bank_y == UOP_BANK_EXT)
- { switch(dst_operand_y)
- { case UOP_DST_EXT_Y: ptr_dst_y = &BANK_EXT_Y; break;
- }
+ if (src == BANK_HI)
+ { if (!buf_flag_hi[s_op])
+ uop_fatal("ERROR: uop_stor(): !buf_flag_hi[s_op]");
+ s_ptr = &BUF_HI[s_op];
+ buf_flag_hi[s_op] = true;
}
- FPGA_BUFFER P;
- fpga_modular_mul(ptr_src_x, ptr_src_y, &P, &X25519_2P);
- fpga_multiword_copy(&P, ptr_dst_x);
- fpga_multiword_copy(&P, ptr_dst_y);
-
- if (dst_bank_x != UOP_BANK_EXT) bank_flags[dst_bank_x][dst_operand_x] = true;
- if (dst_bank_y != UOP_BANK_EXT) bank_flags[dst_bank_y][dst_operand_y] = true;
-}
-
-
-//------------------------------------------------------------------------------
-static void bank2buffer(UOP_BANK bank, UOP_DST_OPERAND operand, FPGA_BUFFER *buffer)
-//------------------------------------------------------------------------------
-{
- if (bank == UOP_BANK_EXT) uop_fatal("ERROR: bank2buffer(): bank == UOP_BANK_EXT!");
- if (operand == UOP_DST_EXT_Y) uop_fatal("ERROR: bank2buffer(): operand == UOP_DST_EXT_Y!");
- if (!bank_flags[bank][operand])
- uop_fatal("ERROR: bank2buffer(): !bank_flags[bank][operand]!");
-
- fpga_multiword_copy(&BANK_INT[bank][operand], buffer);
-}
-
-
-//------------------------------------------------------------------------------
-static void buffer2bank(FPGA_BUFFER *buffer, UOP_BANK bank, UOP_SRC_OPERAND operand)
-//------------------------------------------------------------------------------
-{
- if (bank == UOP_BANK_EXT) uop_fatal("ERROR: buffer2bank(): bank == UOP_BANK_EXT!");
- if (operand == UOP_SRC_EXT_ZERO) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ZERO!");
- if (operand == UOP_SRC_EXT_ONE) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_ONE!");
- if (operand == UOP_SRC_EXT_X) uop_fatal("ERROR: buffer2bank(): operand == UOP_SRC_EXT_X!");
-
- fpga_multiword_copy(buffer, &BANK_INT[bank][operand]);
+ fpga_multiword_copy(s_ptr, mem);
}
More information about the Commits
mailing list