[Cryptech-Commits] [user/shatov/ecdsa_fpga_model] 03/06: * Microcode layer redesigned to take advantage of Montgomery ladder architecture. Instead of R and S there are now two working ("cycle") registers R0 and R1. After every cycle R0+R1 is placed in register S ("sum"), 2*R0|1 (depending on current multiplier bit) is placed in register T. Then the working variables are updated, final result ends up in R0.
git at cryptech.is
git at cryptech.is
Sun Apr 11 14:46:53 UTC 2021
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/ecdsa_fpga_model.
commit 516ca870fd3ad2a87e0ac56f0d453667e021c52d
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Sun Apr 11 17:21:36 2021 +0300
* Microcode layer redesigned to take advantage of Montgomery ladder
architecture. Instead of R and S there are now two working ("cycle") registers
R0 and R1. After every cycle R0+R1 is placed in register S ("sum"), 2*R0|1
(depending on current multiplier bit) is placed in register T. Then the working
variables are updated, final result ends up in R0.
* Due to the change of working registers, modular inversion routines were
updated accordingly.
* Added optional debugging output control
---
ecdsa_fpga_microcode.cpp | 49 ++++++++++++------------
ecdsa_fpga_microcode.h | 96 ++++++++++++++++++++++--------------------------
2 files changed, 68 insertions(+), 77 deletions(-)
diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp
index f02dc8a..2171ac2 100644
--- a/ecdsa_fpga_microcode.cpp
+++ b/ecdsa_fpga_microcode.cpp
@@ -59,10 +59,8 @@ FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
//------------------------------------------------------------------------------
// Global Flags
//------------------------------------------------------------------------------
-bool uop_flagz_sz;
-bool uop_flagz_rz;
-bool uop_flagz_e;
-bool uop_flagz_f;
+bool uop_flagz_r0z;
+bool uop_flagz_r1z;
//------------------------------------------------------------------------------
@@ -96,17 +94,11 @@ void uop_cmpz(UOP_BANK src, int s_op)
switch (s_op)
{
- case CYCLE_SZ:
- uop_flagz_sz = flagz;
+ case CYCLE_R0Z:
+ uop_flagz_r0z = flagz;
break;
- case CYCLE_RZ:
- uop_flagz_rz = flagz;
- break;
- case CYCLE_E:
- uop_flagz_e = flagz;
- break;
- case CYCLE_F:
- uop_flagz_f = flagz;
+ case CYCLE_R1Z:
+ uop_flagz_r1z = flagz;
break;
}
}
@@ -141,6 +133,13 @@ void uop_calc(UOP_MATH math,
if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr);
if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr);
if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr);
+
+#ifdef DUMP_UOP_OUTPUTS
+ if (math == ADD) dump_uop_output("ADD", d_ptr);
+ if (math == SUB) dump_uop_output("SUB", d_ptr);
+ if (math == MUL) dump_uop_output("MUL", d_ptr);
+#endif
+
}
@@ -201,16 +200,16 @@ void fpga_modular_inv23_p256_microcode()
// first obtain intermediate helper quantities (X#)
// mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
- uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+ uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_R0Z);
// compute X2 and mirror to the other bank
- uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_R0Z, INVERT_R1, BANK_LO, INVERT_X2);
uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
// compute X3 and mirror to the other bank
uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_R0Z, BANK_LO, INVERT_X3);
uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
// compute X6 (stored in the lower bank)
@@ -257,7 +256,7 @@ void fpga_modular_inv23_p256_microcode()
uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
uop_repeat();
- uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_R0Z, BANK_HI, INVERT_R1);
uop_cycle(128);
uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
@@ -287,7 +286,7 @@ void fpga_modular_inv23_p256_microcode()
// A3 ends up in the upper bank by itself
uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
- uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0Z, BANK_HI, INVERT_A3);
/* END_MICROCODE */
}
@@ -322,16 +321,16 @@ void fpga_modular_inv23_p384_microcode()
// first obtain intermediate helper quantities (X#)
// mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
- uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+ uop_move(BANK_LO, CYCLE_R0Z, BANK_HI, CYCLE_R0Z);
// compute X2 and mirror to the other bank
- uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_calc(MUL, BANK_LO, CYCLE_R0Z, CYCLE_R0Z, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_R0Z, INVERT_R1, BANK_LO, INVERT_X2);
uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
// compute X3 and mirror to the other bank
uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
- uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_R0Z, BANK_LO, INVERT_X3);
uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
// compute X6 (stored in the lower bank)
@@ -421,7 +420,7 @@ void fpga_modular_inv23_p384_microcode()
// A3 ends up in the upper bank by itself
uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
- uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_R0Z, BANK_HI, INVERT_A3);
/* END_MICROCODE */
}
diff --git a/ecdsa_fpga_microcode.h b/ecdsa_fpga_microcode.h
index f551d96..32e061e 100644
--- a/ecdsa_fpga_microcode.h
+++ b/ecdsa_fpga_microcode.h
@@ -57,56 +57,50 @@ enum UOP_OPERAND
CONST_ZERO, // 0
CONST_ONE, // 1
CONST_DELTA, // 2
-
+
CONST_GX, // 3
CONST_GY, // 4
-
- CONST_HX, // 5
- CONST_HY, // 6
-
- CYCLE_RX, // 7
- CYCLE_RY, // 8
- CYCLE_RZ, // 9
-
- CYCLE_SX, // 10
- CYCLE_SY, // 11
- CYCLE_SZ, // 12
-
- CYCLE_A, // 13
- CYCLE_A2, // 14
- CYCLE_B, // 15
- CYCLE_C, // 16
- CYCLE_C2, // 17
- CYCLE_C2_2, // 18
- CYCLE_D, // 19
- CYCLE_E, // 20
- CYCLE_F, // 21
- CYCLE_G, // 22
- CYCLE_H, // 23
- CYCLE_J, // 24
-
- CYCLE_Z2, // 25
-
- CYCLE_T1, // 26
- CYCLE_T2, // 27
- CYCLE_T3, // 28
- CYCLE_T4, // 29
-
- INVERT_R1, // 30
- INVERT_R2, // 31
-
- INVERT_X2, // 32
- INVERT_X3, // 33
- INVERT_X6, // 34
- INVERT_X12, // 35
- INVERT_X15, // 36
- INVERT_X30, // 37
- INVERT_X32, // 38
- INVERT_X60, // 39
- INVERT_X120, // 40
-
- INVERT_A2, // 41
- INVERT_A3, // 42
+
+ CYCLE_R0X, // 5
+ CYCLE_R0Y, // 6
+ CYCLE_R0Z, // 7
+
+ CYCLE_R1X, // 8
+ CYCLE_R1Y, // 9
+ CYCLE_R1Z, // 10
+
+ CYCLE_SX, // 11
+ CYCLE_SY, // 12
+ CYCLE_SZ, // 13
+
+ CYCLE_TX, // 14
+ CYCLE_TY, // 15
+ CYCLE_TZ, // 16
+
+ CYCLE_T1, // 17
+ CYCLE_T2, // 18
+ CYCLE_T3, // 19
+ CYCLE_T4, // 20
+ CYCLE_T5, // 21
+ CYCLE_T6, // 22
+ CYCLE_T7, // 23
+ CYCLE_T8, // 24
+
+ INVERT_R1, // 25
+ INVERT_R2, // 26
+
+ INVERT_X2, // 27
+ INVERT_X3, // 28
+ INVERT_X6, // 29
+ INVERT_X12, // 30
+ INVERT_X15, // 31
+ INVERT_X30, // 32
+ INVERT_X32, // 33
+ INVERT_X60, // 34
+ INVERT_X120, // 35
+
+ INVERT_A2, // 36
+ INVERT_A3, // 37
ECDSA_UOP_OPERAND_COUNT
};
@@ -129,10 +123,8 @@ extern FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
//------------------------------------------------------------------------------
// Global Flags
//------------------------------------------------------------------------------
-extern bool uop_flagz_sz;
-extern bool uop_flagz_rz;
-extern bool uop_flagz_e;
-extern bool uop_flagz_f;
+extern bool uop_flagz_r0z;
+extern bool uop_flagz_r1z;
//------------------------------------------------------------------------------
More information about the Commits
mailing list