[Cryptech-Commits] [user/shatov/ecdsa_fpga_model] branch master updated: * New hardware architecture
git at cryptech.is
git at cryptech.is
Thu Dec 20 10:49:16 UTC 2018
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/ecdsa_fpga_model.
The following commit(s) were added to refs/heads/master by this push:
new 1f8d13b * New hardware architecture
1f8d13b is described below
commit 1f8d13bf8d2e813f0c5da653c4abffb7a817db9a
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Dec 19 16:03:08 2018 +0300
* New hardware architecture
* Randomized test vector
---
ecdsa_fpga_curve.h | 203 +++++++
ecdsa_fpga_curve_abstract.cpp | 313 ++++++++++
ecdsa_fpga_curve_microcode.cpp | 494 +++++++++++++++
fpga_lowlevel.cpp => ecdsa_fpga_lowlevel.cpp | 272 +++++----
fpga_lowlevel.h => ecdsa_fpga_lowlevel.h | 159 +++--
ecdsa_fpga_microcode.cpp | 432 +++++++++++++
fpga_lowlevel.cpp => ecdsa_fpga_microcode.h | 301 ++++-----
ecdsa_fpga_model.cpp | 539 +++++++++++++++++
fpga_lowlevel.cpp => ecdsa_fpga_model.h | 278 ++++-----
ecdsa_fpga_modular.cpp | 723 ++++++++++++++++++++++
fpga_lowlevel.cpp => ecdsa_fpga_modular.h | 281 ++++-----
fpga_modular.h => ecdsa_fpga_multiword.cpp | 194 +++---
fpga_util.cpp => ecdsa_fpga_multiword.h | 177 +++---
ecdsa_microcode_parser.py | 694 +++++++++++++++++++++
ecdsa_model.h | 205 -------
ecdsa_model_fpga.cpp | 414 -------------
fpga_curve.cpp | 340 -----------
fpga_curve.h | 61 --
fpga_modular.cpp | 838 --------------------------
fpga_util.h | 49 --
test_vectors/charlie_p256.key | 8 +
test_vectors/charlie_p384.key | 9 +
test_vectors/ecdsa_test_vector_nsa.h | 56 ++
test_vectors/ecdsa_test_vector_randomized.h | 29 +
test_vectors/ecdsa_test_vector_randomized.vh | 29 +
test_vectors/format_random_test_vector.py | 327 ++++++++++
test_vectors/regenerate_random_test_vector.py | 91 +++
27 files changed, 4808 insertions(+), 2708 deletions(-)
diff --git a/ecdsa_fpga_curve.h b/ecdsa_fpga_curve.h
new file mode 100644
index 0000000..00448eb
--- /dev/null
+++ b/ecdsa_fpga_curve.h
@@ -0,0 +1,203 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_curve.h
+// ----------------------------------------------
+// Elliptic curve arithmetic procedures for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters (P-256)
+//------------------------------------------------------------------------------
+
+/* Base Point */
+#define ECDSA_P256_GX_INIT \
+ {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, \
+ 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296}
+
+#define ECDSA_P256_GY_INIT \
+ {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, \
+ 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5}
+
+/* Doubled Base Point */
+#define ECDSA_P256_HX_INIT \
+ {0x7cf27b18, 0x8d034f7e, 0x8a523803, 0x04b51ac3, \
+ 0xc08969e2, 0x77f21b35, 0xa60b48fc, 0x47669978}
+
+#define ECDSA_P256_HY_INIT \
+ {0x07775510, 0xdb8ed040, 0x293d9ac6, 0x9f7430db, \
+ 0xba7dade6, 0x3ce98229, 0x9e04b79d, 0x227873d1}
+
+/* Order of the Base Point */
+#define ECDSA_P256_N_INIT \
+ {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, \
+ 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551}
+
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters (P-384)
+//------------------------------------------------------------------------------
+
+/* Base Point */
+#define ECDSA_P384_GX_INIT \
+ {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, \
+ 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, \
+ 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
+
+#define ECDSA_P384_GY_INIT \
+ {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, \
+ 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, \
+ 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f}
+
+/* Doubled Base Point */
+#define ECDSA_P384_HX_INIT \
+ {0x08d99905, 0x7ba3d2d9, 0x69260045, 0xc55b97f0, \
+ 0x89025959, 0xa6f434d6, 0x51d207d1, 0x9fb96e9e, \
+ 0x4fe0e86e, 0xbe0e64f8, 0x5b96a9c7, 0x5295df61}
+
+#define ECDSA_P384_HY_INIT \
+ {0x8e80f1fa, 0x5b1b3ced, 0xb7bfe8df, 0xfd6dba74, \
+ 0xb275d875, 0xbc6cc43e, 0x904e505f, 0x256ab425, \
+ 0x5ffd43e9, 0x4d39e22d, 0x61501e70, 0x0a940e80}
+
+/* Order of the Base Point */
+#define ECDSA_P384_N_INIT \
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, \
+ 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973}
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters Switch
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define ECDSA_GX_INIT ECDSA_P256_GX_INIT
+#define ECDSA_GY_INIT ECDSA_P256_GY_INIT
+#define ECDSA_HX_INIT ECDSA_P256_HX_INIT
+#define ECDSA_HY_INIT ECDSA_P256_HY_INIT
+
+#define ECDSA_N_INIT ECDSA_P256_N_INIT
+
+#elif USE_CURVE == 2
+
+#define ECDSA_GX_INIT ECDSA_P384_GX_INIT
+#define ECDSA_GY_INIT ECDSA_P384_GY_INIT
+#define ECDSA_HX_INIT ECDSA_P384_HX_INIT
+#define ECDSA_HY_INIT ECDSA_P384_HY_INIT
+
+#define ECDSA_N_INIT ECDSA_P384_N_INIT
+
+#else
+
+BAD_CURVE
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER ECDSA_GX, ECDSA_GY;
+extern FPGA_BUFFER ECDSA_HX, ECDSA_HY;
+extern FPGA_BUFFER ECDSA_N;
+
+
+//------------------------------------------------------------------------------
+// Switch
+//------------------------------------------------------------------------------
+#ifdef USE_MICROCODE
+
+#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_microcode
+#define fpga_curve_add_jacobian fpga_curve_add_jacobian_microcode_wrapper
+#define fpga_curve_double_jacobian fpga_curve_double_jacobian_microcode_wrapper
+
+#else
+
+#define fpga_curve_base_scalar_multiply fpga_curve_base_scalar_multiply_abstract
+#define fpga_curve_add_jacobian fpga_curve_add_jacobian_abstract
+#define fpga_curve_double_jacobian fpga_curve_double_jacobian_abstract
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void fpga_curve_init ();
+
+void fpga_curve_base_scalar_multiply_abstract (const FPGA_BUFFER *k,
+ FPGA_BUFFER *qx,
+ FPGA_BUFFER *qy);
+
+void fpga_curve_base_scalar_multiply_microcode (const FPGA_BUFFER *k,
+ FPGA_BUFFER *qx,
+ FPGA_BUFFER *qy);
+
+void fpga_curve_add_jacobian_abstract (const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz);
+
+void fpga_curve_double_jacobian_abstract (const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz);
+
+void fpga_curve_add_jacobian_microcode ();
+
+void fpga_curve_double_jacobian_microcode ();
+
+void fpga_curve_add_jacobian_microcode_wrapper (const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz);
+
+
+void fpga_curve_double_jacobian_microcode_wrapper (const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_fpga_curve_abstract.cpp b/ecdsa_fpga_curve_abstract.cpp
new file mode 100644
index 0000000..5510ac1
--- /dev/null
+++ b/ecdsa_fpga_curve_abstract.cpp
@@ -0,0 +1,313 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_curve_abstract.cpp
+// ----------------------------------------------
+// Elliptic curve arithmetic procedures for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER ECDSA_GX, ECDSA_GY;
+FPGA_BUFFER ECDSA_HX, ECDSA_HY;
+FPGA_BUFFER ECDSA_N;
+
+
+//------------------------------------------------------------------------------
+void fpga_curve_init()
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ FPGA_BUFFER tmp_gx = ECDSA_GX_INIT, tmp_gy = ECDSA_GY_INIT;
+ FPGA_BUFFER tmp_hx = ECDSA_HX_INIT, tmp_hy = ECDSA_HY_INIT;
+ FPGA_BUFFER tmp_n = ECDSA_N_INIT;
+
+ /* fill buffers for large multi-word integers */
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ { ECDSA_GX.words[w] = tmp_gx.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ECDSA_GY.words[w] = tmp_gy.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ECDSA_HX.words[w] = tmp_hx.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ECDSA_HY.words[w] = tmp_hy.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ECDSA_N.words[w] = tmp_n.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve base point scalar multiplication routine.
+//
+// Q(qx,qy) = k * G(px,py)
+//
+// Note, that Q is supposed to be in affine coordinates. Multiplication is done
+// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve
+// Cryptography".
+//
+// WARNING: Though this procedure always does the addition step, it only
+// updates the result when current bit of k is set. It does not take any
+// active measures to keep run-time constant. The main purpose of this model
+// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near
+// production!
+//
+//------------------------------------------------------------------------------
+void fpga_curve_base_scalar_multiply_abstract(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
+//------------------------------------------------------------------------------
+{
+ int word_count, bit_count; // counters
+
+ FPGA_BUFFER rx, ry, rz; // intermediate result
+ FPGA_BUFFER tx, ty, tz; // temporary variable
+
+ /* set initial value of R to point at infinity */
+ fpga_multiword_copy(&ECDSA_ONE, &rx);
+ fpga_multiword_copy(&ECDSA_ONE, &ry);
+ fpga_multiword_copy(&ECDSA_ZERO, &rz);
+
+ /* process bits of k left-to-right */
+ for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+ for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+ {
+ /* calculate T = 2 * R */
+ fpga_curve_double_jacobian_abstract(&rx, &ry, &rz, &tx, &ty, &tz);
+
+ /* always calculate R = T + P for constant-time */
+ fpga_curve_add_jacobian_abstract(&tx, &ty, &tz, &rx, &ry, &rz);
+
+ /* revert to the value of T before addition if the current bit of k is not set */
+ if (!((k->words[word_count-1] >> (bit_count-1)) & 1))
+ { fpga_multiword_copy(&tx, &rx);
+ fpga_multiword_copy(&ty, &ry);
+ fpga_multiword_copy(&tz, &rz);
+ }
+
+ }
+
+ FPGA_BUFFER a2, a3;
+
+ fpga_modular_inv23(&rz, &a2, &a3);
+
+ fpga_modular_mul(&rx, &a2, qx); // qx = px * (pz^-1)^2 (mod q)
+ fpga_modular_mul(&ry, &a3, qy); // qy = py * (pz^-1)^3 (mod q)
+
+ // check, that rz is non-zero (not point at infinity)
+ bool rz_is_zero = fpga_multiword_is_zero(&rz);
+
+ // handle special case (result is point at infinity)
+ if (rz_is_zero)
+ { fpga_multiword_copy(&ECDSA_ZERO, qx);
+ fpga_multiword_copy(&ECDSA_ZERO, qy);
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point doubling routine.
+//
+// R(rx,ry,rz) = 2 * P(px,py,pz)
+//
+// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates,
+// R will be in projective Jacobian coordinates.
+//
+// This routine implements algorithm 3.21 from "Guide to Elliptic Curve
+// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and
+// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much
+// faster, than multiplication.
+//
+// Note, that this routine also handles one special case, namely when P is at
+// infinity.
+//
+// Instead of actual modular division, multiplication by pre-computed constant
+// (2^-1 mod q) is done.
+//
+// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
+// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
+// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
+// then fpga_modular_mul(pz, &t1, &t1) instead.
+//
+// WARNING: Though this procedure always does doubling steps, it does not take
+// any active measures to keep run-time constant. The main purpose of this
+// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere
+// near production!
+//
+//------------------------------------------------------------------------------
+void fpga_curve_double_jacobian_abstract(const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER t1, t2, t3; // temporary variables
+
+ // check, whether P is at infinity
+ bool pz_is_zero = fpga_multiword_is_zero(pz);
+
+ /* 2. */ fpga_multiword_copy(pz, &t1);
+ fpga_modular_mul(pz, &t1, &t1);
+ /* 3. */ fpga_modular_sub(px, &t1, &t2);
+ /* 4. */ fpga_modular_add(px, &t1, &t1);
+ /* 5. */ fpga_modular_mul(&t1, &t2, &t2);
+ /* 6. */ fpga_modular_add(&t2, &t2, &t1);
+ /* */ fpga_modular_add(&t1, &t2, &t2);
+ /* 7. */ fpga_modular_add(py, py, ry);
+ /* 8. */ fpga_modular_mul(pz, ry, rz);
+ /* 9. */ fpga_multiword_copy(ry, &t1);
+ fpga_multiword_copy(ry, &t3);
+ fpga_modular_mul(&t1, &t3, ry);
+ /* 10. */ fpga_modular_mul(px, ry, &t3);
+ /* 11. */ fpga_multiword_copy(ry, &t1);
+ fpga_modular_mul(ry, &t1, &t1);
+ /* 12. */ fpga_modular_mul(&t1, &ECDSA_DELTA, ry);
+ /* 13. */ fpga_multiword_copy(&t2, &t1);
+ fpga_modular_mul(&t1, &t2, rx);
+ /* 14. */ fpga_modular_add(&t3, &t3, &t1);
+ /* 15. */ fpga_modular_sub(rx, &t1, rx);
+ /* 16. */ fpga_modular_sub(&t3, rx, &t1);
+ /* 17. */ fpga_modular_mul(&t1, &t2, &t1);
+ /* 18. */ fpga_modular_sub(&t1, ry, ry);
+
+ // handle special case (input point is at infinity)
+ if (pz_is_zero)
+ { fpga_multiword_copy(&ECDSA_ONE, rx);
+ fpga_multiword_copy(&ECDSA_ONE, ry);
+ fpga_multiword_copy(&ECDSA_ZERO, rz);
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point addition routine.
+//
+// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy)
+//
+// Note, that P(px, py, pz) is supposed to be in projective Jacobian
+// coordinates, while Q(qx,qy) is supposed to be in affine coordinates,
+// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this
+// particular implementation Q is always the base point G.
+//
+// This routine implements algorithm 3.22 from "Guide to Elliptic Curve
+// Cryptography". Differences from the original algorithm:
+//
+// 1) Step 1. is omitted, because point Q is always the base point, which is
+// not at infinity by definition.
+//
+// 2) Step 9.1 just returns the pre-computed double of the base point instead
+// of actually doubling it.
+//
+// Note, that this routine also handles three special cases:
+//
+// 1) P is at infinity
+// 2) P == Q
+// 3) P == -Q
+//
+// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
+// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
+// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
+// then fpga_modular_mul(pz, &t1, &t1) instead.
+//
+// WARNING: This procedure does not take any active measures to keep run-time
+// constant. The main purpose of this model is to help debug Verilog code for
+// FPGA, so *DO NOT* use is anywhere near production!
+//
+//------------------------------------------------------------------------------
+void fpga_curve_add_jacobian_abstract(const FPGA_BUFFER *px,
+ const FPGA_BUFFER *py,
+ const FPGA_BUFFER *pz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER t1, t2, t3, t4; // temporary variables
+
+ bool pz_is_zero = fpga_multiword_is_zero(pz); // Step 2.
+
+ /* 3. */ fpga_multiword_copy(pz, &t1);
+ fpga_modular_mul(pz, &t1, &t1);
+ /* 4. */ fpga_modular_mul(pz, &t1, &t2);
+ /* 5. */ fpga_modular_mul(&t1, &ECDSA_GX, &t1);
+ /* 6. */ fpga_modular_mul(&t2, &ECDSA_GY, &t2);
+ /* 7. */ fpga_modular_sub(&t1, px, &t1);
+ /* 8. */ fpga_modular_sub(&t2, py, &t2);
+
+ bool t1_is_zero = fpga_multiword_is_zero(&t1); // | Step 9.
+ bool t2_is_zero = fpga_multiword_is_zero(&t2); // |
+
+ /* 10. */ fpga_modular_mul(pz, &t1, rz);
+ /* 11. */ fpga_multiword_copy(&t1, &t3);
+ fpga_modular_mul(&t1, &t3, &t3);
+ /* 12. */ fpga_modular_mul(&t1, &t3, &t4);
+ /* 13. */ fpga_modular_mul(px, &t3, &t3);
+ /* 14. */ fpga_modular_add(&t3, &t3, &t1);
+ /* 15. */ fpga_multiword_copy(&t2, rx);
+ fpga_modular_mul(rx, &t2, rx);
+ /* 16. */ fpga_modular_sub(rx, &t1, rx);
+ /* 17. */ fpga_modular_sub(rx, &t4, rx);
+ /* 18. */ fpga_modular_sub(&t3, rx, &t3);
+ /* 19. */ fpga_modular_mul(&t2, &t3, &t3);
+ /* 20. */ fpga_modular_mul(py, &t4, &t4);
+ /* 21. */ fpga_modular_sub(&t3, &t4, ry);
+
+ //
+ // final selection
+ //
+ if (pz_is_zero) // P at infinity ?
+ { fpga_multiword_copy(&ECDSA_GX, rx);
+ fpga_multiword_copy(&ECDSA_GY, ry);
+ fpga_multiword_copy(&ECDSA_ONE, rz);
+ }
+ else if (t1_is_zero) // same x for P and Q ?
+ {
+ fpga_multiword_copy(t2_is_zero ? &ECDSA_HX : &ECDSA_ONE, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O)
+ fpga_multiword_copy(t2_is_zero ? &ECDSA_HY : &ECDSA_ONE, ry); // |
+ fpga_multiword_copy(t2_is_zero ? &ECDSA_ONE : &ECDSA_ZERO, rz); // |
+ }
+}
+
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_fpga_curve_microcode.cpp b/ecdsa_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..553498c
--- /dev/null
+++ b/ecdsa_fpga_curve_microcode.cpp
@@ -0,0 +1,494 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_curve_microcode.cpp
+// ----------------------------------------------
+// Elliptic curve arithmetic procedures for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Required for Microcode Routines
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+//
+// Doubles the point stored in CYCLE_R* and stores the result in CYCLE_S*.
+//
+//------------------------------------------------------------------------------
+void fpga_curve_double_jacobian_microcode()
+//------------------------------------------------------------------------------
+{
+ // fpga_modular_mul(RZ, RZ, RZ2 ); // 2. RZ2 = RZ * RZ
+ // fpga_modular_sub(RX, RZ2, T1 ); // 3. T1 = RX - RZ2
+ // fpga_modular_add(RX, RZ2, T2 ); // 4. T2 = RX + RZ2
+ // fpga_modular_mul(T1, T2, T3 ); // 5. T3 = T1 * T2
+ // fpga_modular_add(T3, T3, T4 ); // 6a. T4 = T3 + T3
+ // fpga_modular_add(T3, T4, A ); // 6b. A = T3 + T4
+ // fpga_modular_add(RY, RY, B ); // 7. B = RY + RY
+ // fpga_modular_mul(B, RZ, SZ ); // 8. SZ = B * RZ [output]
+ // fpga_modular_mul(B, B, C ); // 9. C = B * B
+ // fpga_modular_mul(C, RX, D ); // 10. D = C * RX
+ // fpga_modular_mul(C, C, C2 ); // 11. C2 = C * C
+ // fpga_modular_mul(C2, DELTA, C2_2); // 12. C2_2 = C / 2
+ // fpga_modular_mul(A, A, A2 ); // 13. A2 = A * A
+ // fpga_modular_add(D, D, T1 ); // 14. T1 = D + D
+ // fpga_modular_sub(A2, T1, SX ); // 15. SX = A2 - T1 [output]
+ // fpga_modular_sub(D, SX, T1 ); // 16. T1 = D - SX
+ // fpga_modular_mul(A , T1, T2 ); // 17. T2 = A * T1
+ // fpga_modular_sub(T2, C2_2, SY ); // 18. SY = T2 - C2_2 [output]
+
+ /* BEGIN_MICROCODE: CYCLE_DOUBLE */
+
+ FPGA_BUFFER TEMP;
+
+ uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, CYCLE_Z2);
+ uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer("CYCLE_Z2 = ", &TEMP);
+
+ uop_calc(SUB, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T1);
+ uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP);
+
+ uop_calc(ADD, BANK_HI, CYCLE_RX, CYCLE_Z2, BANK_LO, CYCLE_T2);
+ uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP);
+
+ uop_calc(MUL, BANK_LO, CYCLE_T1, CYCLE_T2, BANK_HI, CYCLE_T3);
+ uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer("CYCLE_T3 = ", &TEMP);
+
+ uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T3, BANK_LO, CYCLE_T4);
+ uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer("CYCLE_T4 = ", &TEMP);
+
+ uop_move( BANK_LO, CYCLE_T4, BANK_HI, CYCLE_T4);
+
+ uop_calc(ADD, BANK_HI, CYCLE_T3, CYCLE_T4, BANK_LO, CYCLE_A);
+ uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer("CYCLE_A = ", &TEMP);
+
+ uop_calc(ADD, BANK_HI, CYCLE_RY, CYCLE_RY, BANK_LO, CYCLE_B);
+ uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer("CYCLE_B = ", &TEMP);
+
+ uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_RZ, BANK_HI, CYCLE_SZ);
+ uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer("CYCLE_SZ = ", &TEMP);
+
+ uop_calc(MUL, BANK_LO, CYCLE_B, CYCLE_B, BANK_HI, CYCLE_C);
+ uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer("CYCLE_C = ", &TEMP);
+
+ uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_RX, BANK_LO, CYCLE_D);
+ uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer("CYCLE_D = ", &TEMP);
+
+ uop_calc(MUL, BANK_HI, CYCLE_C, CYCLE_C, BANK_LO, CYCLE_C2);
+ uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer("CYCLE_C2 = ", &TEMP);
+
+ uop_calc(MUL, BANK_LO, CYCLE_C2, CONST_DELTA, BANK_HI, CYCLE_C2_2);
+ uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer("CYCLE_C2_2 = ", &TEMP);
+
+ uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_A, BANK_HI, CYCLE_A2);
+ uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer("CYCLE_A2 = ", &TEMP);
+
+ uop_calc(ADD, BANK_LO, CYCLE_D, CYCLE_D, BANK_HI, CYCLE_T1);
+ uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP);
+
+ uop_calc(SUB, BANK_HI, CYCLE_A2, CYCLE_T1, BANK_LO, CYCLE_SX);
+ uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer("CYCLE_SX = ", &TEMP);
+
+ uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SX, BANK_HI, CYCLE_T1);
+ uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer("CYCLE_T1 = ", &TEMP);
+
+ uop_move( BANK_HI, CYCLE_T1, BANK_LO, CYCLE_T1);
+
+ uop_calc(MUL, BANK_LO, CYCLE_A, CYCLE_T1, BANK_HI, CYCLE_T2);
+ uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer("CYCLE_T2 = ", &TEMP);
+
+ uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_C2_2, BANK_LO, CYCLE_SY);
+ uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer("CYCLE_SY = ", &TEMP);
+
+ /* END_MICROCODE */
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Adds the base point G to the point stored in CYCLE_S* and stores the result
+// again in CYCLE_R*.
+//
+//------------------------------------------------------------------------------
+void fpga_curve_add_jacobian_microcode()
+{
+ //fpga_modular_mul(SZ, SZ, A) ; // 3. A = SZ * SZ
+ //fpga_modular_mul(A, SZ, B ); // 4. B = A * SZ
+ //fpga_modular_mul(A, &ECDSA_GX, C ); // 5. C = A * GX
+ //fpga_modular_mul(B, &ECDSA_GY, D ); // 6. D = B * GY
+ //fpga_modular_sub(C, SX, E ); // 7. E = C - SX
+ //fpga_modular_sub(D, SY, F ); // 8. F = D - SY
+ //fpga_modular_mul(E, SZ, RZ); // 10. RZ = E * SZ [output]
+ //fpga_modular_mul(E, E, G ); // 11. G = E * E
+ //fpga_modular_mul(E, G, H ); // 12. H = E * G
+ //fpga_modular_mul(G, SX, J ); // 13. J = G * SX
+ //fpga_modular_add(J, J, T1); // 14. T1 = J + J
+ //fpga_modular_mul(F, F, T2); // 15. T2 = F * F
+ //fpga_modular_sub(T2, T1, T3); // 16. T3 = T2 - T1
+ //fpga_modular_sub(T3, H, RX); // 17. RX = T3 - H [output]
+ //fpga_modular_sub(J, RX, T1); // 18. T1 = J - RX
+ //fpga_modular_mul(F, T1, T2); // 19. T2 = F * T1
+ //fpga_modular_mul(H, SY, T3); // 20. T3 = H * SY
+ //fpga_modular_sub(T2, T3, RY); // 21. RY = T2 - T3 [output]
+
+ /* BEGIN_MICROCODE: CYCLE_ADD */
+
+ uop_cmpz( BANK_HI, CYCLE_SZ);
+ uop_move( BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_SZ);
+ uop_calc(MUL, BANK_LO, CYCLE_SZ, CYCLE_SZ, BANK_HI, CYCLE_A);
+ uop_calc(MUL, BANK_HI, CYCLE_A, CYCLE_SZ, BANK_LO, CYCLE_B);
+ uop_move( BANK_LO, CYCLE_B, BANK_HI, CYCLE_B);
+ uop_calc(MUL, BANK_HI, CYCLE_A, CONST_GX, BANK_LO, CYCLE_C);
+ uop_calc(MUL, BANK_HI, CYCLE_B, CONST_GY, BANK_LO, CYCLE_D);
+ uop_calc(SUB, BANK_LO, CYCLE_C, CYCLE_SX, BANK_HI, CYCLE_E);
+ uop_calc(SUB, BANK_LO, CYCLE_D, CYCLE_SY, BANK_HI, CYCLE_F);
+ uop_cmpz( BANK_HI, CYCLE_E);
+ uop_cmpz( BANK_HI, CYCLE_F);
+ uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_SZ, BANK_LO, CYCLE_RZ);
+ uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_E, BANK_LO, CYCLE_G);
+ uop_move( BANK_LO, CYCLE_G, BANK_HI, CYCLE_G);
+ uop_calc(MUL, BANK_HI, CYCLE_E, CYCLE_G, BANK_LO, CYCLE_H);
+ uop_calc(MUL, BANK_LO, CYCLE_G, CYCLE_SX, BANK_HI, CYCLE_J);
+ uop_calc(ADD, BANK_HI, CYCLE_J, CYCLE_J, BANK_LO, CYCLE_T1);
+ uop_calc(MUL, BANK_HI, CYCLE_F, CYCLE_F, BANK_LO, CYCLE_T2);
+ uop_calc(SUB, BANK_LO, CYCLE_T2, CYCLE_T1, BANK_HI, CYCLE_T3);
+ uop_move( BANK_HI, CYCLE_T3, BANK_LO, CYCLE_T3);
+ uop_calc(SUB, BANK_LO, CYCLE_T3, CYCLE_H, BANK_HI, CYCLE_RX);
+ uop_calc(SUB, BANK_HI, CYCLE_J, CYCLE_RX, BANK_LO, CYCLE_T1);
+ uop_move( BANK_HI, CYCLE_F, BANK_LO, CYCLE_F);
+ uop_calc(MUL, BANK_LO, CYCLE_F, CYCLE_T1, BANK_HI, CYCLE_T2);
+ uop_calc(MUL, BANK_LO, CYCLE_H, CYCLE_SY, BANK_HI, CYCLE_T3);
+ uop_calc(SUB, BANK_HI, CYCLE_T2, CYCLE_T3, BANK_LO, CYCLE_RY);
+ uop_move( BANK_LO, CYCLE_RY, BANK_HI, CYCLE_RY);
+
+ /* END_MICROCODE */
+
+ //
+ // handle special corner cases
+ //
+ if (uop_flagz_sz)
+ {
+ /* BEGIN_MICROCODE: CYCLE_ADD_AT_INFINITY */
+
+ uop_move(BANK_LO, CONST_GX, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CONST_GY, BANK_HI, CYCLE_RY);
+ uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+ }
+ else
+ {
+ if (uop_flagz_e)
+ {
+ if (uop_flagz_f)
+ {
+ /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X_SAME_Y */
+
+ uop_move(BANK_LO, CONST_HX, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CONST_HY, BANK_HI, CYCLE_RY);
+ uop_move(BANK_HI, CONST_ONE, BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+ }
+ else
+ {
+ /* BEGIN_MICROCODE: CYCLE_ADD_SAME_X */
+
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY);
+ uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+ }
+ }
+ else
+ {
+ /* BEGIN_MICROCODE: CYCLE_ADD_REGULAR */
+
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T1);
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_T2);
+ uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_T3);
+
+ /* END_MICROCODE */
+ }
+ }
+}
+
+
+#ifdef USE_MICROCODE
+//------------------------------------------------------------------------------
+void fpga_curve_base_scalar_multiply_microcode(const FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
+//------------------------------------------------------------------------------
+{
+ int word_count, bit_count; // counters
+ FPGA_WORD k_word;
+ bool k_bit;
+
+ // initialize internal banks
+ fpga_multiword_copy(&ECDSA_ZERO, &BUF_LO[CONST_ZERO]);
+ fpga_multiword_copy(&ECDSA_ZERO, &BUF_HI[CONST_ZERO]);
+
+ fpga_multiword_copy(&ECDSA_ONE, &BUF_LO[CONST_ONE]);
+ fpga_multiword_copy(&ECDSA_ONE, &BUF_HI[CONST_ONE]);
+
+ fpga_multiword_copy(&ECDSA_DELTA, &BUF_LO[CONST_DELTA]);
+ fpga_multiword_copy(&ECDSA_DELTA, &BUF_HI[CONST_DELTA]);
+
+ fpga_multiword_copy(&ECDSA_GX, &BUF_LO[CONST_GX]);
+ fpga_multiword_copy(&ECDSA_GX, &BUF_HI[CONST_GX]);
+
+ fpga_multiword_copy(&ECDSA_GY, &BUF_LO[CONST_GY]);
+ fpga_multiword_copy(&ECDSA_GY, &BUF_HI[CONST_GY]);
+
+ fpga_multiword_copy(&ECDSA_HX, &BUF_LO[CONST_HX]);
+ fpga_multiword_copy(&ECDSA_HX, &BUF_HI[CONST_HX]);
+
+ fpga_multiword_copy(&ECDSA_HY, &BUF_LO[CONST_HY]);
+ fpga_multiword_copy(&ECDSA_HY, &BUF_HI[CONST_HY]);
+
+ /* BEGIN_MICROCODE: PREPARE */
+
+ // set initial value of R to point at infinity
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CONST_ONE, BANK_HI, CYCLE_RY);
+ uop_move(BANK_HI, CONST_ZERO, BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+
+ /* process bits of k left-to-right */
+ for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+ for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+ {
+ k_word = k->words[word_count-1];
+ k_bit = (k_word & (FPGA_WORD)(1 << (bit_count-1))) > 0;
+
+ // Banks of working cycle operands
+ // -------------------------------
+ // RX: HI
+ // RY: HI
+ // RZ: LO
+
+ // calculate S = 2 * R
+ fpga_curve_double_jacobian_microcode();
+
+ // Banks of working cycle operands
+ // -------------------------------
+ // SX: LO
+ // SY: LO
+ // SZ: HI
+
+ // always calculate R = S * G for constant-time operation
+ fpga_curve_add_jacobian_microcode();
+
+ // Banks of working cycle operands
+ // -------------------------------
+ // RX: HI
+ // RY: HI
+ // RZ: LO
+
+
+ if (!k_bit)
+ {
+ /* BEGIN_MICROCODE: CYCLE_K0 */
+
+ // revert to the value of S before addition if the current bit of k is not set
+ uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY);
+ uop_move(BANK_HI, CYCLE_SZ, BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+ }
+ else
+ {
+ /* BEGIN_MICROCODE: CYCLE_K1 */
+
+ // do dummy overwrite for constant-time operation
+ uop_move(BANK_HI, CYCLE_RX, BANK_LO, CYCLE_SX);
+ uop_move(BANK_HI, CYCLE_RY, BANK_LO, CYCLE_SY);
+ uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_SZ);
+
+ /* END_MICROCODE */
+ }
+
+ FPGA_BUFFER TEMP;
+
+ //printf("wc = %d, bc = %d\n", word_count-1, bit_count-1);
+
+ uop_stor(BANK_LO, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RX = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RY = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_RZ = ", &TEMP);
+
+ uop_stor(BANK_LO, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SX = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SY = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_SZ = ", &TEMP);
+
+ uop_stor(BANK_LO, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_A2 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_B = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_C2_2 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_D = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_E = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_F = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_G = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_H = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_J = ", &TEMP);
+
+ uop_stor(BANK_LO, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_Z2 = ", &TEMP);
+
+ uop_stor(BANK_LO, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T1 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T2 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T3 = ", &TEMP);
+ uop_stor(BANK_LO, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("LO:CYCLE_T4 = ", &TEMP);
+
+ uop_stor(BANK_HI, CYCLE_RX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RX = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_RY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RY = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_RZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_RZ = ", &TEMP);
+
+ uop_stor(BANK_HI, CYCLE_SX, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SX = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_SY, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SY = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_SZ, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_SZ = ", &TEMP);
+
+ uop_stor(BANK_HI, CYCLE_A, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_A2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_A2 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_B, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_B = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_C, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_C2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_C2_2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_C2_2 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_D, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_D = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_E, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_E = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_F, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_F = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_G, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_G = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_H, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_H = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_J, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_J = ", &TEMP);
+
+ uop_stor(BANK_HI, CYCLE_Z2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_Z2 = ", &TEMP);
+
+ uop_stor(BANK_HI, CYCLE_T1, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T1 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_T2, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T2 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_T3, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T3 = ", &TEMP);
+ uop_stor(BANK_HI, CYCLE_T4, &TEMP); print_fpga_buffer_nodelim("HI:CYCLE_T4 = ", &TEMP);
+
+ }
+
+ // now convert to affine coordinates
+ fpga_modular_inv23_microcode();
+
+ /* BEGIN_MICROCODE: CONVERT */
+
+ uop_calc(MUL, BANK_HI, INVERT_A2, CYCLE_RX, BANK_LO, CYCLE_SX);
+ uop_calc(MUL, BANK_HI, INVERT_A3, CYCLE_RY, BANK_LO, CYCLE_SY);
+ uop_cmpz(BANK_LO, CYCLE_RZ);
+
+ /* END_MICROCODE */
+
+ if (uop_flagz_rz)
+ {
+ /* BEGIN_MICROCODE: CONVERT_AT_INFINITY */
+
+ uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CONST_ZERO, BANK_HI, CYCLE_RY);
+
+ /* END_MICROCODE */
+ }
+ else
+ {
+ /* BEGIN_MICROCODE: CONVERT_REGULAR */
+
+ uop_move(BANK_LO, CYCLE_SX, BANK_HI, CYCLE_RX);
+ uop_move(BANK_LO, CYCLE_SY, BANK_HI, CYCLE_RY);
+
+ /* END_MICROCODE */
+ }
+
+ // return
+ uop_stor(BANK_HI, CYCLE_RX, qx);
+ uop_stor(BANK_HI, CYCLE_RY, qy);
+}
+#endif USE_MICROCODE
+
+
+//------------------------------------------------------------------------------
+void fpga_curve_double_jacobian_microcode_wrapper(const FPGA_BUFFER *rx,
+ const FPGA_BUFFER *ry,
+ const FPGA_BUFFER *rz,
+ FPGA_BUFFER *sx,
+ FPGA_BUFFER *sy,
+ FPGA_BUFFER *sz)
+//------------------------------------------------------------------------------
+{
+ uop_load(rx, BANK_HI, CYCLE_RX);
+ uop_load(ry, BANK_HI, CYCLE_RY);
+ uop_load(rz, BANK_LO, CYCLE_RZ);
+
+ fpga_curve_double_jacobian_microcode();
+
+ uop_stor(BANK_LO, CYCLE_SX, sx);
+ uop_stor(BANK_LO, CYCLE_SY, sy);
+ uop_stor(BANK_HI, CYCLE_SZ, sz);
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_curve_add_jacobian_microcode_wrapper(const FPGA_BUFFER *sx,
+ const FPGA_BUFFER *sy,
+ const FPGA_BUFFER *sz,
+ FPGA_BUFFER *rx,
+ FPGA_BUFFER *ry,
+ FPGA_BUFFER *rz)
+//------------------------------------------------------------------------------
+{
+ uop_load(sx, BANK_LO, CYCLE_SX);
+ uop_load(sy, BANK_LO, CYCLE_SY);
+ uop_load(sz, BANK_HI, CYCLE_SZ);
+
+ fpga_curve_add_jacobian_microcode();
+
+ uop_stor(BANK_HI, CYCLE_RX, rx);
+ uop_stor(BANK_HI, CYCLE_RY, ry);
+ uop_stor(BANK_LO, CYCLE_RZ, rz);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_lowlevel.cpp b/ecdsa_fpga_lowlevel.cpp
similarity index 74%
copy from fpga_lowlevel.cpp
copy to ecdsa_fpga_lowlevel.cpp
index 511856a..c7ab322 100644
--- a/fpga_lowlevel.cpp
+++ b/ecdsa_fpga_lowlevel.cpp
@@ -1,137 +1,135 @@
-//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.cpp
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit adder with carry input and carry output.
-//
-// Carries are 1 bit wide.
-//
-// {c_out, s} = x + y + c_in
-//
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
-//------------------------------------------------------------------------------
-{
- // calculate sum
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
-
- // process carry input
- if (c_in) r++;
-
- // store sum
- *s = (FPGA_WORD)r;
-
- // shift sum to obtain carry flag
- r >>= FPGA_WORD_WIDTH;
-
- // store carry
- *c_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit subtractor with borrow input and borrow output.
-//
-// Borrows are 1 bit wide.
-//
-// {b_out, d} = x - y - b_in
-//
-void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
-//------------------------------------------------------------------------------
-{
- // calculate difference
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
-
- // process borrow input
- if (b_in) r--;
-
- // store difference
- *d = (FPGA_WORD)r;
-
- // shift difference to obtain borrow flag
- r >>= FPGA_WORD_WIDTH;
-
- // store borrow
- *b_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 16x16-bit multiplier.
-//
-// Inputs are 16-bit wide, outputs are 32-bit wide.
-//
-// p = x * y
-//
-void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
-//------------------------------------------------------------------------------
-{
- // multiply and store result
- *p = (FPGA_WORD)x * (FPGA_WORD)y;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 48-bit adder without carries.
-//
-// s = (x + y)[47:0]
-//
-void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
-//------------------------------------------------------------------------------
-{
- // add and store result truncated to 48 bits
- *s = (x + y) & FPGA_MASK_ADDER48;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// fpga_ecdsa_lowlevel.cpp
+// -----------------------------------
+// Models of low-level FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 32-bit adder with carry input and carry output.
+//
+// Carries are 1 bit wide.
+//
+// {c_out, s} = x + y + c_in
+//
+//------------------------------------------------------------------------------
+void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
+//------------------------------------------------------------------------------
+{
+ // calculate sum
+ FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
+
+ // process carry input
+ if (c_in) r++;
+
+ // store sum
+ *s = (FPGA_WORD)r;
+
+ // shift sum to obtain carry flag
+ r >>= FPGA_WORD_WIDTH;
+
+ // store carry
+ *c_out = (r != 0);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 32-bit subtractor with borrow input and borrow output.
+//
+// Borrows are 1 bit wide.
+//
+// {b_out, d} = x - y - b_in
+//
+void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
+//------------------------------------------------------------------------------
+{
+ // calculate difference
+ FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
+
+ // process borrow input
+ if (b_in) r--;
+
+ // store difference
+ *d = (FPGA_WORD)r;
+
+ // shift difference to obtain borrow flag
+ r >>= FPGA_WORD_WIDTH;
+
+ // store borrow
+ *b_out = (r != 0);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 16x16-bit multiplier.
+//
+// Inputs are 16-bit wide, output is 32-bit wide.
+//
+// p = x * y
+//
+void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
+//------------------------------------------------------------------------------
+{
+ // multiply and store result
+ *p = (FPGA_WORD)x * (FPGA_WORD)y;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 47-bit adder without carries.
+//
+// s = (x + y)[46:0]
+//
+void fpga_lowlevel_add47(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
+//------------------------------------------------------------------------------
+{
+ // add and store result truncated to 47 bits
+ *s = (x + y) & FPGA_MASK_ADDER47;
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_lowlevel.h b/ecdsa_fpga_lowlevel.h
similarity index 77%
rename from fpga_lowlevel.h
rename to ecdsa_fpga_lowlevel.h
index eeb5c4f..8d0dccd 100644
--- a/fpga_lowlevel.h
+++ b/ecdsa_fpga_lowlevel.h
@@ -1,80 +1,79 @@
-//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.h
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// FPGA Pipeline Settings
-//------------------------------------------------------------------------------
-#define FPGA_WORD_WIDTH (32)
-#define OPERAND_NUM_WORDS (OPERAND_WIDTH / FPGA_WORD_WIDTH)
-
-
-//------------------------------------------------------------------------------
-// Word Types (Normal, Extended, Reduced)
-//------------------------------------------------------------------------------
-typedef uint32_t FPGA_WORD;
-typedef uint64_t FPGA_WORD_EXTENDED;
-typedef uint16_t FPGA_WORD_REDUCED;
-
-
-//------------------------------------------------------------------------------
-// Extended Adder Mask
-//------------------------------------------------------------------------------
-#define FPGA_MASK_ADDER48 ((FPGA_WORD_EXTENDED)0x0000FFFFFFFFFFFF)
-
-
-//------------------------------------------------------------------------------
-struct FPGA_BUFFER
-//------------------------------------------------------------------------------
-{
- FPGA_WORD words[OPERAND_NUM_WORDS];
-};
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out);
-void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out);
-void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p);
-void fpga_lowlevel_add48 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// fpga_ecdsa_lowlevel.h
+// -----------------------------------
+// Models of low-level FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdint.h>
+
+
+//------------------------------------------------------------------------------
+// FPGA Pipeline Settings
+//------------------------------------------------------------------------------
+#define FPGA_WORD_WIDTH (32)
+
+
+//------------------------------------------------------------------------------
+// Word Types (Normal 32-bit, Extended 47-bit, Reduced 16-bit)
+//------------------------------------------------------------------------------
+typedef uint32_t FPGA_WORD;
+typedef uint64_t FPGA_WORD_EXTENDED;
+typedef uint16_t FPGA_WORD_REDUCED;
+
+
+//------------------------------------------------------------------------------
+// Wide 47-bit Adder Mask
+//------------------------------------------------------------------------------
+#define FPGA_MASK_ADDER47 ((FPGA_WORD_EXTENDED)0x00007FFFFFFFFFFF)
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void fpga_lowlevel_add32 (FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out);
+void fpga_lowlevel_sub32 (FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out);
+
+void fpga_lowlevel_mul16 (FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p);
+
+void fpga_lowlevel_add47 (FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_fpga_microcode.cpp b/ecdsa_fpga_microcode.cpp
new file mode 100644
index 0000000..f02dc8a
--- /dev/null
+++ b/ecdsa_fpga_microcode.cpp
@@ -0,0 +1,432 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_microcode.cpp
+// --------------------------------
+// Microcode Architecture for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Required for Microcode Routines
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Global Buffers
+//------------------------------------------------------------------------------
+FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT];
+FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+// Global Flags
+//------------------------------------------------------------------------------
+bool uop_flagz_sz;
+bool uop_flagz_rz;
+bool uop_flagz_e;
+bool uop_flagz_f;
+
+
+//------------------------------------------------------------------------------
+void uop_move(UOP_BANK src, int s_op, UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr = NULL;
+ FPGA_BUFFER *d_ptr = NULL;
+
+ if (src == BANK_LO) s_ptr = &BUF_LO[s_op];
+ if (src == BANK_HI) s_ptr = &BUF_HI[s_op];
+ if (dst == BANK_LO) d_ptr = &BUF_LO[d_op];
+ if (dst == BANK_HI) d_ptr = &BUF_HI[d_op];
+
+ fpga_multiword_copy(s_ptr, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_cmpz(UOP_BANK src, int s_op)
+//------------------------------------------------------------------------------
+{
+ bool flagz;
+
+ FPGA_BUFFER *s_ptr = NULL;
+
+ if (src == BANK_LO) s_ptr = &BUF_LO[s_op];
+ if (src == BANK_HI) s_ptr = &BUF_HI[s_op];
+
+ flagz = fpga_multiword_is_zero(s_ptr);
+
+ switch (s_op)
+ {
+ case CYCLE_SZ:
+ uop_flagz_sz = flagz;
+ break;
+ case CYCLE_RZ:
+ uop_flagz_rz = flagz;
+ break;
+ case CYCLE_E:
+ uop_flagz_e = flagz;
+ break;
+ case CYCLE_F:
+ uop_flagz_f = flagz;
+ break;
+ }
+}
+
+
+//------------------------------------------------------------------------------
+void uop_calc(UOP_MATH math,
+ UOP_BANK src, int s_op1, int s_op2,
+ UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr1 = NULL;
+ FPGA_BUFFER *s_ptr2 = NULL;
+ FPGA_BUFFER *d_ptr = NULL;
+ FPGA_BUFFER *n_ptr = NULL;
+
+ if (src == BANK_LO)
+ { s_ptr1 = &BUF_LO[s_op1];
+ s_ptr2 = &BUF_LO[s_op2];
+ }
+ if (src == BANK_HI)
+ { s_ptr1 = &BUF_HI[s_op1];
+ s_ptr2 = &BUF_HI[s_op2];
+ }
+ if (dst == BANK_LO)
+ { d_ptr = &BUF_LO[d_op];
+ }
+ if (dst == BANK_HI)
+ { d_ptr = &BUF_HI[d_op];
+ }
+
+ if (math == ADD) fpga_modular_add(s_ptr1, s_ptr2, d_ptr);
+ if (math == SUB) fpga_modular_sub(s_ptr1, s_ptr2, d_ptr);
+ if (math == MUL) fpga_modular_mul(s_ptr1, s_ptr2, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *d_ptr = NULL;
+ if (dst == BANK_LO) d_ptr = &BUF_LO[d_op];
+ if (dst == BANK_HI) d_ptr = &BUF_HI[d_op];
+
+ fpga_multiword_copy(mem, d_ptr);
+}
+
+
+//------------------------------------------------------------------------------
+void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem)
+//------------------------------------------------------------------------------
+{
+ FPGA_BUFFER *s_ptr = NULL;
+ if (src == BANK_LO)
+ { s_ptr = &BUF_LO[s_op];
+ }
+ if (src == BANK_HI)
+ { s_ptr = &BUF_HI[s_op];
+ }
+
+ fpga_multiword_copy(s_ptr, mem);
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p256_microcode()
+//------------------------------------------------------------------------------
+//
+// This computes A2 = RZ^-2 and A3 = RZ^-3.
+//
+// RZ is read from the lower bank, A2 and A3 are written to the upper bank.
+//
+//------------------------------------------------------------------------------
+{
+ uop_loop;
+
+ //
+ // operand placement map:
+ //
+ // X1 - LO,HI (RZ)
+ // X2 - LO,HI
+ // X3 - LO,HI
+ // X6 - LO
+ // X12 - HI
+ // X15 - LO,HI
+ // X30 - HI
+ // X32 - LO,HI
+
+ /* BEGIN_MICROCODE: INVERT_P256 */
+
+ // first obtain intermediate helper quantities (X#)
+
+ // mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
+ uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+
+ // compute X2 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
+
+ // compute X3 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
+
+ // compute X6 (stored in the lower bank)
+ uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6);
+
+ // compute X12 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1);
+ uop_cycle(5);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12);
+
+ // compute X15 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15);
+ uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15);
+
+ // compute X30 (stored in the upper bank)
+ uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1);
+ uop_cycle(14);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30);
+
+ // compute X32 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X2, BANK_LO, INVERT_X32);
+ uop_move(BANK_LO, INVERT_X32, BANK_HI, INVERT_X32);
+
+ // now compute the final results
+
+ uop_calc(MUL, BANK_LO, INVERT_X32, INVERT_X32, BANK_HI, INVERT_R1);
+
+ uop_cycle(31);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_LO, INVERT_R2, CYCLE_RZ, BANK_HI, INVERT_R1);
+
+ uop_cycle(128);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X32, BANK_LO, INVERT_R2);
+
+ uop_cycle(32);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X32, BANK_HI, INVERT_R1);
+
+ uop_cycle(30);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X30, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+
+ // move A2 into the upper bank
+ uop_move(BANK_LO, INVERT_R2, BANK_HI, INVERT_A2);
+
+ // A3 ends up in the upper bank by itself
+ uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+
+ /* END_MICROCODE */
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p384_microcode()
+//------------------------------------------------------------------------------
+//
+// This computes A2 = RZ^-2 and A3 = RZ^-3.
+//
+// RZ is read from the lower bank, A2 and A3 are written to the upper bank.
+//
+//------------------------------------------------------------------------------
+{
+ uop_loop;
+
+ //
+ // operand placement map:
+ //
+ // X1 - LO,HI (RZ)
+ // X2 - LO,HI
+ // X3 - LO,HI
+ // X6 - LO
+ // X12 - HI
+ // X15 - LO,HI
+ // X30 - HI
+ // X32 - LO,HI
+
+ /* BEGIN_MICROCODE: INVERT_P384 */
+
+ // first obtain intermediate helper quantities (X#)
+
+ // mirror X1 to HI bank (don't waste time copying to X1, just use RZ)
+ uop_move(BANK_LO, CYCLE_RZ, BANK_HI, CYCLE_RZ);
+
+ // compute X2 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, CYCLE_RZ, CYCLE_RZ, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, CYCLE_RZ, INVERT_R1, BANK_LO, INVERT_X2);
+ uop_move(BANK_LO, INVERT_X2, BANK_HI, INVERT_X2);
+
+ // compute X3 and mirror to the other bank
+ uop_calc(MUL, BANK_LO, INVERT_X2, INVERT_X2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, CYCLE_RZ, BANK_LO, INVERT_X3);
+ uop_move(BANK_LO, INVERT_X3, BANK_HI, INVERT_X3);
+
+ // compute X6 (stored in the lower bank)
+ uop_calc(MUL, BANK_LO, INVERT_X3, INVERT_X3, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_calc(MUL, BANK_HI, INVERT_R1, INVERT_X3, BANK_LO, INVERT_X6);
+
+ // compute X12 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X6, INVERT_X6, BANK_HI, INVERT_R1);
+ uop_cycle(5);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X6, BANK_HI, INVERT_X12);
+
+ // compute X15 and mirror to the other bank
+ uop_calc(MUL, BANK_HI, INVERT_X12, INVERT_X12, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X3, BANK_HI, INVERT_X15);
+ uop_move(BANK_HI, INVERT_X15, BANK_LO, INVERT_X15);
+
+ // compute X30 (stored in the upper bank)
+ uop_calc(MUL, BANK_HI, INVERT_X15, INVERT_X15, BANK_LO, INVERT_R1);
+ uop_cycle(14);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X15, BANK_HI, INVERT_X30);
+
+ // compute X60 (stored in the lower bank)
+ uop_calc(MUL, BANK_HI, INVERT_X30, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_cycle(29);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_X60);
+
+ // compute X120 (stored in the upper bank)
+ uop_calc(MUL, BANK_LO, INVERT_X60, INVERT_X60, BANK_HI, INVERT_R1);
+ uop_cycle(59);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R2, INVERT_R2, BANK_HI, INVERT_R1);
+ uop_repeat();
+ uop_calc(MUL, BANK_LO, INVERT_R2, INVERT_X60, BANK_HI, INVERT_X120);
+
+ // now compute the final results
+
+ uop_calc(MUL, BANK_HI, INVERT_X120, INVERT_X120, BANK_LO, INVERT_R1);
+
+ uop_cycle(119);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X120, BANK_LO, INVERT_R1);
+
+ uop_cycle(15);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X15, BANK_LO, INVERT_R1);
+
+ uop_cycle(31);
+ uop_calc_if_even(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc_if_odd (MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_X2, BANK_HI, INVERT_R2);
+
+ uop_cycle(94);
+ uop_calc_if_even(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+ uop_calc_if_odd (MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_repeat();
+
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_X30, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, INVERT_R1, BANK_HI, INVERT_R2);
+ uop_calc(MUL, BANK_HI, INVERT_R2, INVERT_R2, BANK_LO, INVERT_R1);
+
+ // move A2 into the upper bank
+ uop_move(BANK_LO, INVERT_R1, BANK_HI, INVERT_A2);
+
+ // A3 ends up in the upper bank by itself
+ uop_calc(MUL, BANK_HI, INVERT_A2, INVERT_A2, BANK_LO, INVERT_R1);
+ uop_calc(MUL, BANK_LO, INVERT_R1, CYCLE_RZ, BANK_HI, INVERT_A3);
+
+ /* END_MICROCODE */
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_lowlevel.cpp b/ecdsa_fpga_microcode.h
similarity index 52%
copy from fpga_lowlevel.cpp
copy to ecdsa_fpga_microcode.h
index 511856a..f551d96 100644
--- a/fpga_lowlevel.cpp
+++ b/ecdsa_fpga_microcode.h
@@ -1,137 +1,164 @@
-//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.cpp
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit adder with carry input and carry output.
-//
-// Carries are 1 bit wide.
-//
-// {c_out, s} = x + y + c_in
-//
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
-//------------------------------------------------------------------------------
-{
- // calculate sum
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
-
- // process carry input
- if (c_in) r++;
-
- // store sum
- *s = (FPGA_WORD)r;
-
- // shift sum to obtain carry flag
- r >>= FPGA_WORD_WIDTH;
-
- // store carry
- *c_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit subtractor with borrow input and borrow output.
-//
-// Borrows are 1 bit wide.
-//
-// {b_out, d} = x - y - b_in
-//
-void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
-//------------------------------------------------------------------------------
-{
- // calculate difference
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
-
- // process borrow input
- if (b_in) r--;
-
- // store difference
- *d = (FPGA_WORD)r;
-
- // shift difference to obtain borrow flag
- r >>= FPGA_WORD_WIDTH;
-
- // store borrow
- *b_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 16x16-bit multiplier.
-//
-// Inputs are 16-bit wide, outputs are 32-bit wide.
-//
-// p = x * y
-//
-void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
-//------------------------------------------------------------------------------
-{
- // multiply and store result
- *p = (FPGA_WORD)x * (FPGA_WORD)y;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 48-bit adder without carries.
-//
-// s = (x + y)[47:0]
-//
-void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
-//------------------------------------------------------------------------------
-{
- // add and store result truncated to 48 bits
- *s = (x + y) & FPGA_MASK_ADDER48;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_microcode.h
+// --------------------------------
+// Microcode Architecture for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdlib.h> // NULL
+
+
+//------------------------------------------------------------------------------
+enum UOP_BANK
+//------------------------------------------------------------------------------
+{
+ BANK_LO, BANK_HI
+};
+
+//--------------------------
+enum UOP_OPERAND
+//--------------------------
+{
+ CONST_ZERO, // 0
+ CONST_ONE, // 1
+ CONST_DELTA, // 2
+
+ CONST_GX, // 3
+ CONST_GY, // 4
+
+ CONST_HX, // 5
+ CONST_HY, // 6
+
+ CYCLE_RX, // 7
+ CYCLE_RY, // 8
+ CYCLE_RZ, // 9
+
+ CYCLE_SX, // 10
+ CYCLE_SY, // 11
+ CYCLE_SZ, // 12
+
+ CYCLE_A, // 13
+ CYCLE_A2, // 14
+ CYCLE_B, // 15
+ CYCLE_C, // 16
+ CYCLE_C2, // 17
+ CYCLE_C2_2, // 18
+ CYCLE_D, // 19
+ CYCLE_E, // 20
+ CYCLE_F, // 21
+ CYCLE_G, // 22
+ CYCLE_H, // 23
+ CYCLE_J, // 24
+
+ CYCLE_Z2, // 25
+
+ CYCLE_T1, // 26
+ CYCLE_T2, // 27
+ CYCLE_T3, // 28
+ CYCLE_T4, // 29
+
+ INVERT_R1, // 30
+ INVERT_R2, // 31
+
+ INVERT_X2, // 32
+ INVERT_X3, // 33
+ INVERT_X6, // 34
+ INVERT_X12, // 35
+ INVERT_X15, // 36
+ INVERT_X30, // 37
+ INVERT_X32, // 38
+ INVERT_X60, // 39
+ INVERT_X120, // 40
+
+ INVERT_A2, // 41
+ INVERT_A3, // 42
+
+ ECDSA_UOP_OPERAND_COUNT
+};
+
+//------------------------------------------------------------------------------
+enum UOP_MATH
+//------------------------------------------------------------------------------
+{
+ ADD, SUB, MUL
+};
+
+
+//------------------------------------------------------------------------------
+// Global Storage Buffers
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER BUF_LO[ECDSA_UOP_OPERAND_COUNT];
+extern FPGA_BUFFER BUF_HI[ECDSA_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+// Global Flags
+//------------------------------------------------------------------------------
+extern bool uop_flagz_sz;
+extern bool uop_flagz_rz;
+extern bool uop_flagz_e;
+extern bool uop_flagz_f;
+
+
+//------------------------------------------------------------------------------
+// Loop Macros
+//------------------------------------------------------------------------------
+#define uop_loop int uop_cnt
+#define uop_cycle(iters); for (uop_cnt=0; uop_cnt<iters; uop_cnt++) {
+#define uop_repeat(); }
+#define uop_calc_if_even(...) if (!(uop_cnt % 2)) uop_calc(__VA_ARGS__)
+#define uop_calc_if_odd(...) else uop_calc(__VA_ARGS__)
+
+
+//------------------------------------------------------------------------------
+// Prototypes (Micro-Operations)
+//------------------------------------------------------------------------------
+void uop_move(enum UOP_BANK src, int s_op1, UOP_BANK dst, int d_op1);
+void uop_cmpz(UOP_BANK src, int s_op);
+
+void uop_calc (UOP_MATH math,
+ UOP_BANK src, int s_op1, int s_op2,
+ UOP_BANK dst, int d_op);
+
+void uop_load(const FPGA_BUFFER *mem, UOP_BANK dst, int d_op);
+void uop_stor(UOP_BANK src, int s_op, FPGA_BUFFER *mem);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_fpga_model.cpp b/ecdsa_fpga_model.cpp
new file mode 100644
index 0000000..182c2ab
--- /dev/null
+++ b/ecdsa_fpga_model.cpp
@@ -0,0 +1,539 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_model.cpp
+// --------------------------------------------
+// Base point scalar multiplier model for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdio.h>
+#include <stdlib.h>
+
+
+//------------------------------------------------------------------------------
+// Microcode Switch
+//------------------------------------------------------------------------------
+#define USE_MICROCODE
+
+
+//------------------------------------------------------------------------------
+// More Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void fpga_model_init ();
+
+bool test_base_point_multiplier (const FPGA_BUFFER *k,
+ const FPGA_BUFFER *qx,
+ const FPGA_BUFFER *qy);
+
+bool abuse_internal_point_adder ();
+bool abuse_internal_point_doubler ();
+
+
+//------------------------------------------------------------------------------
+// Locals
+//------------------------------------------------------------------------------
+static FPGA_BUFFER ecdsa_d_nsa;
+static FPGA_BUFFER ecdsa_qx_nsa;
+static FPGA_BUFFER ecdsa_qy_nsa;
+
+static FPGA_BUFFER ecdsa_k_nsa;
+static FPGA_BUFFER ecdsa_rx_nsa;
+static FPGA_BUFFER ecdsa_ry_nsa;
+
+static FPGA_BUFFER ecdsa_d_random;
+static FPGA_BUFFER ecdsa_qx_random;
+static FPGA_BUFFER ecdsa_qy_random;
+
+
+//------------------------------------------------------------------------------
+int main()
+//------------------------------------------------------------------------------
+{
+ bool ok; // flag
+
+ // initialize buffers
+ fpga_multiword_init();
+ fpga_modular_init();
+ fpga_curve_init();
+ fpga_model_init();
+
+ printf("ECDSA_UOP_OPERAND_COUNT == %d\n\n", ECDSA_UOP_OPERAND_COUNT);
+
+ // test base point multiplier: Q = d * G
+ printf("Trying to derive public key from private key (NSA test vector)...\n\n");
+ ok = test_base_point_multiplier(&ecdsa_d_nsa, &ecdsa_qx_nsa, &ecdsa_qy_nsa);
+ if (!ok) return EXIT_FAILURE;
+
+ // test base point multiplier: R = k * G
+ printf("Trying to sign something (NSA test vector)...\n\n");
+ ok = test_base_point_multiplier(&ecdsa_k_nsa, &ecdsa_rx_nsa, &ecdsa_ry_nsa);
+ if (!ok) return EXIT_FAILURE;
+
+ // test base point multiplier: Q = d * G
+ printf("Trying to derive public key from private key (random test vector)...\n\n");
+ ok = test_base_point_multiplier(&ecdsa_d_random, &ecdsa_qx_random, &ecdsa_qy_random);
+ if (!ok) return EXIT_FAILURE;
+
+ // test base point multiplier: O = n * G
+ printf("Trying to multiply the base point by its order...\n\n");
+ ok = test_base_point_multiplier(&ECDSA_N, &ECDSA_ZERO, &ECDSA_ZERO);
+ if (!ok) return EXIT_FAILURE;
+
+ // now run some intricate tests...
+
+ /* Excerpt from the commit message e718fdfae6443466e566ed6ce1515cdecc215ac0:
+
+ The model does multiplication using the double-and-add algorithm. When adding
+ two points P and Q on curves P-256 and P-384, four special cases must be
+ considered. One of them is P = Q, in that situation the explicit addition
+ formulae don't work and either 2*P or 2*Q must be returned from the addition
+ routine. In this model Q is always the base point G, so when P = G, then 2*G
+ must be returned. Since G is fixed, this model stores precomputed point H = 2*G
+ and returns it when adding G + G for true constant-time operation.
+
+ During multiplication the bits of k are scanned left-to-right, so doubling is
+ done before addition. This way the only situation when both inputs to the
+ addition routine are equal to G is when after doubling the result is G. This in
+ its turn is only possible when k = n + 2 (where n is the order of the base
+ point G). ECDSA requires integer k to be [1, n-1], one of the side effects
+ is that the model has a code path that will never be used under normal
+ operation. This code path can be verified by first multiplying by k = 2
+ (special handling for P = G not triggered), then multiplying by k = n + 2
+ (special handling for P = G triggered). Both multiplications should produce
+ the same output. In the former case the output will be calculated on-the-fly,
+ in the latter case the pre-computed coordinates of H will be used.
+ */
+
+
+ // test base point multiplier: H = 2 * G
+ FPGA_BUFFER two;
+ fpga_modular_add(&ECDSA_ONE, &ECDSA_ONE, &two);
+
+ printf("Trying to double the base point...\n\n");
+ ok = test_base_point_multiplier(&two, &ECDSA_HX, &ECDSA_HY);
+ if (!ok) return EXIT_FAILURE;
+
+ // test base point multiplier: G = (n + 1) * G
+ FPGA_BUFFER n1;
+ fpga_modular_add(&ECDSA_N, &ECDSA_ONE, &n1); // n1 = n + 1
+
+ printf("Trying to multiply the base point by its order plus one...\n\n");
+ ok = test_base_point_multiplier(&n1, &ECDSA_GX, &ECDSA_GY);
+ if (!ok) return EXIT_FAILURE;
+
+ // test base point multiplier: H = (n + 2) * G
+ FPGA_BUFFER n2;
+ fpga_modular_add(&ECDSA_N, &two, &n2); // n2 = n + two
+
+ printf("Trying to multiply the base point by its order plus two...\n\n");
+ ok = test_base_point_multiplier(&n2, &ECDSA_HX, &ECDSA_HY);
+ if (!ok) return EXIT_FAILURE;
+
+ // ..and some more tests
+
+ // try to abuse internal point doubler
+ ok = abuse_internal_point_doubler();
+ if (!ok) return EXIT_FAILURE;
+
+ // try to abuse internal point adder
+ ok = abuse_internal_point_adder();
+ if (!ok) return EXIT_FAILURE;
+
+ // everything went just fine
+ return EXIT_SUCCESS;
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_model_init()
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ FPGA_BUFFER tmp_d_nsa = ECDSA_D_NSA_INIT, tmp_k_nsa = ECDSA_K_NSA_INIT;
+ FPGA_BUFFER tmp_qx_nsa = ECDSA_QX_NSA_INIT, tmp_rx_nsa = ECDSA_RX_NSA_INIT;
+ FPGA_BUFFER tmp_qy_nsa = ECDSA_QY_NSA_INIT, tmp_ry_nsa = ECDSA_RY_NSA_INIT;
+
+ FPGA_BUFFER tmp_d_random = ECDSA_D_RANDOM_INIT;
+ FPGA_BUFFER tmp_qx_random = ECDSA_QX_RANDOM_INIT;
+ FPGA_BUFFER tmp_qy_random = ECDSA_QY_RANDOM_INIT;
+
+ // fill buffers for large multi-word integers
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ ecdsa_d_nsa.words[w] = tmp_d_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_qx_nsa.words[w] = tmp_qx_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_qy_nsa.words[w] = tmp_qy_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+
+ ecdsa_k_nsa.words[w] = tmp_k_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_rx_nsa.words[w] = tmp_rx_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_ry_nsa.words[w] = tmp_ry_nsa.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+
+ ecdsa_d_random.words[w] = tmp_d_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_qx_random.words[w] = tmp_qx_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ ecdsa_qy_random.words[w] = tmp_qy_random.words[FPGA_OPERAND_NUM_WORDS - (w+1)];
+ }
+}
+
+
+//------------------------------------------------------------------------------
+bool test_base_point_multiplier (const FPGA_BUFFER *k,
+ const FPGA_BUFFER *qx,
+ const FPGA_BUFFER *qy)
+//------------------------------------------------------------------------------
+//
+// k - multiplier
+//
+// qx, qy - expected coordinates of product
+//
+// Returns true when point (rx,ry) = k * G matches the point (qx,qy).
+//
+//------------------------------------------------------------------------------
+{
+ bool ok; // flag
+ FPGA_BUFFER rx, ry; // result
+
+ // run the model
+ fpga_curve_base_scalar_multiply(k, &rx, &ry);
+
+ // handle result
+ ok = compare_fpga_buffers(qx, qy, &rx, &ry);
+
+ if (!ok)
+ { printf("\n ERROR\n\n");
+ return false;
+ }
+ else printf("\n OK\n\n");
+
+ // everything went just fine
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+bool compare_fpga_buffers( const FPGA_BUFFER *az,
+ const FPGA_BUFFER *bz)
+//------------------------------------------------------------------------------
+//
+// Compare z-coordinates of two points and return true when they match.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // print all the values
+ print_fpga_buffer(" Expected: Z = ", az);
+ print_fpga_buffer(" Calculated: Z = ", bz);
+
+ // compare values
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ // compare x
+ if (az->words[w] != bz->words[w]) return false;
+ }
+
+ // values are the same
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+bool compare_fpga_buffers( const FPGA_BUFFER *ax,
+ const FPGA_BUFFER *ay,
+ const FPGA_BUFFER *bx,
+ const FPGA_BUFFER *by)
+//------------------------------------------------------------------------------
+//
+// Compare affine coordinates of two points and return true when they match.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // print all the values
+ print_fpga_buffer(" Expected: X = ", ax);
+ print_fpga_buffer(" Calculated: X = ", bx);
+ printf("\n");
+ print_fpga_buffer(" Expected: Y = ", ay);
+ print_fpga_buffer(" Calculated: Y = ", by);
+
+ // compare values
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ // compare x
+ if (ax->words[w] != bx->words[w]) return false;
+
+ // compare y
+ if (ay->words[w] != by->words[w]) return false;
+ }
+
+ // values are the same
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+bool compare_fpga_buffers( const FPGA_BUFFER *ax,
+ const FPGA_BUFFER *ay,
+ const FPGA_BUFFER *az,
+ const FPGA_BUFFER *bx,
+ const FPGA_BUFFER *by,
+ const FPGA_BUFFER *bz)
+//------------------------------------------------------------------------------
+//
+// Compare projective coordinates of two points and return true when they match.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // print all the values
+ print_fpga_buffer(" Expected: X = ", ax);
+ print_fpga_buffer(" Calculated: X = ", bx);
+ printf("\n");
+ print_fpga_buffer(" Expected: Y = ", ay);
+ print_fpga_buffer(" Calculated: Y = ", by);
+ printf("\n");
+ print_fpga_buffer(" Expected: Z = ", az);
+ print_fpga_buffer(" Calculated: Z = ", bz);
+
+ // compare values
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ // compare x
+ if (ax->words[w] != bx->words[w]) return false;
+
+ // compare y
+ if (ay->words[w] != by->words[w]) return false;
+
+ // compare z
+ if (az->words[w] != bz->words[w]) return false;
+ }
+
+ // values are the same
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+void print_fpga_buffer(const char *s, const FPGA_BUFFER *buf)
+//------------------------------------------------------------------------------
+//
+// Pretty print large multi-word integer.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // print header
+ printf("%s", s);
+
+ // print all bytes
+ for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--)
+ {
+ printf("%08x", buf->words[w-1]);
+
+ // insert space after every group of 4 bytes
+ if (w > 1) printf(" ");
+ }
+
+ // print footer
+ printf("\n");
+}
+
+
+//------------------------------------------------------------------------------
+void print_fpga_buffer_nodelim(const char *s, const FPGA_BUFFER *buf)
+//------------------------------------------------------------------------------
+//
+// Pretty print large multi-word integer.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // print header
+ printf("%s", s);
+
+ // print all bytes
+ for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--)
+ printf("%08x", buf->words[w-1]);
+
+ // print footer
+ printf("\n");
+}
+
+
+//------------------------------------------------------------------------------
+bool abuse_internal_point_doubler()
+//------------------------------------------------------------------------------
+//
+// This routine tries to abuse the internal curve point doubler by forcing it
+// to double point at infinity.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+ bool ok; // flag
+
+ FPGA_BUFFER px, py, pz; // input
+ FPGA_BUFFER qx, qy, qz; // output
+
+ // set P.X and P.Y to some "random" garbage and P.Z to zero
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ { px.words[w] = ECDSA_GX.words[w] ^ ECDSA_HX.words[w];
+ py.words[w] = ECDSA_GY.words[w] ^ ECDSA_HY.words[w];
+ }
+ fpga_multiword_copy(&ECDSA_ZERO, &pz);
+
+ // try to double point at infinity (should produce point at infinity)
+ printf("Trying to double something at infinity...\n\n");
+ fpga_curve_double_jacobian(&px, &py, &pz, &qx, &qy, &qz);
+
+ // handle result
+ ok = compare_fpga_buffers(&ECDSA_ZERO, &qz);
+ if (! ok)
+ { printf("\n ERROR\n\n");
+ return false;
+ }
+ else printf("\n OK\n\n");
+
+ // everything went just fine
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+bool abuse_internal_point_adder()
+//------------------------------------------------------------------------------
+//
+// This routine tries to abuse the internal curve point adder by forcing it to
+// go throgh all the possible "corner cases".
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+ bool ok; // flag
+
+ FPGA_BUFFER px, py, pz; // input
+ FPGA_BUFFER rx, ry, rz; // output
+
+ //
+ // try to add point at infinity to the base point
+ //
+ {
+ // set P.X and P.Y to some "random" garbage and P.Z to zero
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ { px.words[w] = ECDSA_GX.words[w] ^ ECDSA_HX.words[w];
+ py.words[w] = ECDSA_GY.words[w] ^ ECDSA_HY.words[w];
+ }
+ fpga_multiword_copy(&ECDSA_ZERO, &pz);
+
+ // run addition proceduce
+ printf("Trying to add something at infinity to the base point...\n\n");
+ fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
+
+ // handle result
+ ok = compare_fpga_buffers(&ECDSA_GX, &ECDSA_GY, &ECDSA_ONE, &rx, &ry, &rz);
+ if (! ok)
+ { printf("\n ERROR\n\n");
+ return false;
+ }
+ else printf("\n OK\n\n");
+ }
+
+ //
+ // try to add the base point to itself
+ //
+ {
+ // set P to G
+ fpga_multiword_copy(&ECDSA_GX, &px);
+ fpga_multiword_copy(&ECDSA_GY, &py);
+ fpga_multiword_copy(&ECDSA_ONE, &pz);
+
+ // run addition proceduce
+ printf("Trying to add the base point to itself...\n\n");
+ fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
+
+ // handle result
+ ok = compare_fpga_buffers(&ECDSA_HX, &ECDSA_HY, &ECDSA_ONE, &rx, &ry, &rz);
+ if (! ok)
+ { printf("\n ERROR\n\n");
+ return false;
+ }
+ else printf("\n OK\n\n");
+ }
+
+ //
+ // try to add the base point to its opposite
+ //
+ {
+ // set P to (G.X, -G.Y, 1)
+ fpga_multiword_copy(&ECDSA_ZERO, &px);
+ fpga_multiword_copy(&ECDSA_ZERO, &py);
+ fpga_multiword_copy(&ECDSA_ONE, &pz);
+
+ fpga_modular_add(&ECDSA_ZERO, &ECDSA_GX, &px);
+ fpga_modular_sub(&ECDSA_ZERO, &ECDSA_GY, &py);
+
+ // run addition proceduce
+ printf("Trying to add the base point to its opposite...\n\n");
+ fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
+
+ // handle result
+ ok = compare_fpga_buffers(&ECDSA_ONE, &ECDSA_ONE, &ECDSA_ZERO, &rx, &ry, &rz);
+ if (! ok)
+ { printf("\n ERROR\n\n");
+ return false;
+ }
+ else printf("\n OK\n\n");
+ }
+
+ // everything went just fine
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_lowlevel.cpp b/ecdsa_fpga_model.h
similarity index 51%
copy from fpga_lowlevel.cpp
copy to ecdsa_fpga_model.h
index 511856a..ea045b9 100644
--- a/fpga_lowlevel.cpp
+++ b/ecdsa_fpga_model.h
@@ -1,137 +1,141 @@
-//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.cpp
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit adder with carry input and carry output.
-//
-// Carries are 1 bit wide.
-//
-// {c_out, s} = x + y + c_in
-//
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
-//------------------------------------------------------------------------------
-{
- // calculate sum
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
-
- // process carry input
- if (c_in) r++;
-
- // store sum
- *s = (FPGA_WORD)r;
-
- // shift sum to obtain carry flag
- r >>= FPGA_WORD_WIDTH;
-
- // store carry
- *c_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit subtractor with borrow input and borrow output.
-//
-// Borrows are 1 bit wide.
-//
-// {b_out, d} = x - y - b_in
-//
-void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
-//------------------------------------------------------------------------------
-{
- // calculate difference
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
-
- // process borrow input
- if (b_in) r--;
-
- // store difference
- *d = (FPGA_WORD)r;
-
- // shift difference to obtain borrow flag
- r >>= FPGA_WORD_WIDTH;
-
- // store borrow
- *b_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 16x16-bit multiplier.
-//
-// Inputs are 16-bit wide, outputs are 32-bit wide.
-//
-// p = x * y
-//
-void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
-//------------------------------------------------------------------------------
-{
- // multiply and store result
- *p = (FPGA_WORD)x * (FPGA_WORD)y;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 48-bit adder without carries.
-//
-// s = (x + y)[47:0]
-//
-void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
-//------------------------------------------------------------------------------
-{
- // add and store result truncated to 48 bits
- *s = (x + y) & FPGA_MASK_ADDER48;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_model.h
+// --------------------------------------------
+// Base point scalar multiplier model for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+//
+// Curve Selection (don't override if already selected, handy for testing
+// from the STM32 sample driver)
+//
+// USE_CURVE == 1 -> P-256
+// USE_CURVE == 2 -> P-384
+//
+//------------------------------------------------------------------------------
+#ifndef USE_CURVE
+#define USE_CURVE 2
+#endif
+//------------------------------------------------------------------------------
+#define BAD_CURVE #error USE_CURVE must be either 1 or 2!
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_lowlevel.h"
+#include "ecdsa_fpga_multiword.h"
+#include "ecdsa_fpga_modular.h"
+#include "ecdsa_fpga_curve.h"
+#ifdef USE_MICROCODE
+#include "ecdsa_fpga_microcode.h"
+#endif
+
+
+//------------------------------------------------------------------------------
+// Test Vectors
+//------------------------------------------------------------------------------
+#include "ecdsa_test_vector_nsa.h"
+#include "ecdsa_test_vector_randomized.h"
+
+
+//------------------------------------------------------------------------------
+// Test Vectors Switch
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define ECDSA_D_NSA_INIT ECDSA_P256_D_NSA_INIT
+#define ECDSA_QX_NSA_INIT ECDSA_P256_QX_NSA_INIT
+#define ECDSA_QY_NSA_INIT ECDSA_P256_QY_NSA_INIT
+
+#define ECDSA_K_NSA_INIT ECDSA_P256_K_NSA_INIT
+#define ECDSA_RX_NSA_INIT ECDSA_P256_RX_NSA_INIT
+#define ECDSA_RY_NSA_INIT ECDSA_P256_RY_NSA_INIT
+
+#define ECDSA_D_RANDOM_INIT ECDSA_P256_D_RANDOM_INIT
+#define ECDSA_QX_RANDOM_INIT ECDSA_P256_QX_RANDOM_INIT
+#define ECDSA_QY_RANDOM_INIT ECDSA_P256_QY_RANDOM_INIT
+
+#elif USE_CURVE == 2
+
+#define ECDSA_D_NSA_INIT ECDSA_P384_D_NSA_INIT
+#define ECDSA_QX_NSA_INIT ECDSA_P384_QX_NSA_INIT
+#define ECDSA_QY_NSA_INIT ECDSA_P384_QY_NSA_INIT
+
+#define ECDSA_K_NSA_INIT ECDSA_P384_K_NSA_INIT
+#define ECDSA_RX_NSA_INIT ECDSA_P384_RX_NSA_INIT
+#define ECDSA_RY_NSA_INIT ECDSA_P384_RY_NSA_INIT
+
+#define ECDSA_D_RANDOM_INIT ECDSA_P384_D_RANDOM_INIT
+#define ECDSA_QX_RANDOM_INIT ECDSA_P384_QX_RANDOM_INIT
+#define ECDSA_QY_RANDOM_INIT ECDSA_P384_QY_RANDOM_INIT
+
+#else
+
+BAD_CURVE
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void print_fpga_buffer (const char *s,
+ const FPGA_BUFFER *v);
+
+void print_fpga_buffer_nodelim (const char *s,
+ const FPGA_BUFFER *v);
+
+bool compare_fpga_buffers (const FPGA_BUFFER *az,
+ const FPGA_BUFFER *bz);
+
+bool compare_fpga_buffers (const FPGA_BUFFER *ax,
+ const FPGA_BUFFER *ay,
+ const FPGA_BUFFER *bx,
+ const FPGA_BUFFER *by);
+
+bool compare_fpga_buffers (const FPGA_BUFFER *ax,
+ const FPGA_BUFFER *ay,
+ const FPGA_BUFFER *az,
+ const FPGA_BUFFER *bx,
+ const FPGA_BUFFER *by,
+ const FPGA_BUFFER *bz);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_fpga_modular.cpp b/ecdsa_fpga_modular.cpp
new file mode 100644
index 0000000..9d22c05
--- /dev/null
+++ b/ecdsa_fpga_modular.cpp
@@ -0,0 +1,723 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_modular.cpp
+// -------------------------------------
+// Modular arithmetic routines for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER ECDSA_Q;
+FPGA_BUFFER ECDSA_DELTA;
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_init()
+//------------------------------------------------------------------------------
+{
+ int w_src, w_dst; // word counters
+
+ // temporary things
+ FPGA_WORD TMP_Q [FPGA_OPERAND_NUM_WORDS] = ECDSA_Q_INIT;
+ FPGA_WORD TMP_DELTA[FPGA_OPERAND_NUM_WORDS] = ECDSA_DELTA_INIT;
+
+ /* fill buffers for large multi-word integers, we need to fill them in
+ * reverse order because of the way C arrays are initialized
+ */
+ for ( w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1;
+ w_src < FPGA_OPERAND_NUM_WORDS;
+ w_src++, w_dst--)
+ {
+ ECDSA_Q.words[w_dst] = TMP_Q[w_src];
+ ECDSA_DELTA.words[w_dst] = TMP_DELTA[w_src];
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular addition.
+//
+// This routine implements algorithm 3. from "Ultra High Performance ECC over
+// NIST Primes on Commercial FPGAs".
+//
+// s = (a + b) mod q
+//
+// The naive approach is like the following:
+//
+// 1. s = a + b
+// 2. if (s >= q) s -= q
+//
+// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q)
+// and then select the right variant.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_add(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *s)
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+ FPGA_BUFFER ab, ab_n; // intermediate buffers
+ bool c_in, c_out; // carries
+ bool b_in, b_out; // borrows
+
+ c_in = false; // first word has no carry
+ b_in = false; // first word has no borrow
+
+ // run parallel addition and subtraction
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ fpga_lowlevel_add32(a->words[w], b->words[w], c_in, &ab.words[w], &c_out);
+ fpga_lowlevel_sub32(ab.words[w], ECDSA_Q.words[w], b_in, &ab_n.words[w], &b_out);
+
+ c_in = c_out; // propagate carry
+ b_in = b_out; // propagate borrow
+ }
+
+ // now select the right buffer
+
+ /*
+ * We select the right variant based on borrow and carry flags after
+ * addition and subtraction of the very last pair of words. Note, that
+ * we only need to select the first variant (a + b) when (a + b) < q.
+ * This way if we get negative number after subtraction, we discard it
+ * and use the output of the adder instead. The subtractor output is
+ * negative when borrow flag is set *and* carry flag is not set. When
+ * both borrow and carry are set, the number is non-negative, because
+ * borrow and carry cancel each other out.
+ */
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ s->words[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular subtraction.
+//
+// This routine implements algorithm 3. from "Ultra High Performance ECC over
+// NIST Primes on Commercial FPGAs".
+//
+// d = (a - b) mod q
+//
+// The naive approach is like the following:
+//
+// 1. d = a - b
+// 2. if (a < b) d += q
+//
+// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q)
+// and then select the right variant.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_sub(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *d)
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+ FPGA_BUFFER ab, ab_n; // intermediate buffers
+ bool c_in, c_out; // carries
+ bool b_in, b_out; // borrows
+
+ c_in = false; // first word has no carry
+ b_in = false; // first word has no borrow
+
+ // run parallel subtraction and addition
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ {
+ fpga_lowlevel_sub32(a->words[w], b->words[w], b_in, &ab.words[w], &b_out);
+ fpga_lowlevel_add32(ab.words[w], ECDSA_Q.words[w], c_in, &ab_n.words[w], &c_out);
+
+ b_in = b_out; // propagate borrow
+ c_in = c_out; // propagate carry
+ }
+
+ // now select the right buffer
+
+ /*
+ * We select the right variant based on borrow flag after subtraction
+ * and addition of the very last pair of words. Note, that we only
+ * need to select the second variant (a - b + q) when a < b. This way
+ * if we get negative number after subtraction, we discard it
+ * and use the output of the adder instead. The Subtractor output is
+ * negative when borrow flag is set.
+ */
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ d->words[w] = b_out ? ab_n.words[w] : ab.words[w];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular multiplication.
+//
+// This routine implements modular multiplication algorithm from "Ultra High
+// Performance ECC over NIST Primes on Commercial FPGAs".
+//
+// p = (a * b) mod q
+//
+// The complex algorithm is split into three parts:
+//
+// 1. Calculation of partial words
+// 2. Acccumulation of partial words into full-size product
+// 3. Modular reduction of the full-size product
+//
+// See comments for corresponding helper routines for more information.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_BUFFER *p)
+//------------------------------------------------------------------------------
+{
+ FPGA_WORD_EXTENDED si[4*FPGA_OPERAND_NUM_WORDS-1]; // parts of intermediate product
+ FPGA_WORD c[2*FPGA_OPERAND_NUM_WORDS]; // full-size intermediate product
+
+ /* multiply to get partial words */
+ fpga_modular_mul_helper_multiply(a, b, si);
+
+ /* accumulate partial words into full-size product */
+ fpga_modular_mul_helper_accumulate(si, c);
+
+ /* reduce full-size product using special routine */
+ fpga_modular_mul_helper_reduce(c, p);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Parallelized multiplication.
+//
+// This routine implements the algorithm in Fig. 3. from "Ultra High
+// Performance ECC over NIST Primes on Commercial FPGAs".
+//
+// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2
+// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide
+// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS
+// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial
+// output words si[].
+//
+// The order of loading a and b into the multipliers is a bit complicated,
+// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is
+// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the
+// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul_helper_multiply(const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si)
+//------------------------------------------------------------------------------
+{
+ int w; // counter
+ int t, x; // more counters
+ int j, i; // word indices
+ FPGA_WORD p; // product
+
+ // buffers for smaller words that multipliers can handle
+ FPGA_WORD_REDUCED ai[2*FPGA_OPERAND_NUM_WORDS];
+ FPGA_WORD_REDUCED bj[2*FPGA_OPERAND_NUM_WORDS];
+
+ // split a and b into smaller words
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ ai[2*w] = (FPGA_WORD_REDUCED)a->words[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)),
+ bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2));
+
+ // accumulators
+ FPGA_WORD_EXTENDED mac[2*FPGA_OPERAND_NUM_WORDS];
+
+ // clear accumulators
+ for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) mac[w] = 0;
+
+ // run the crazy algorithm :)
+ for (t=0; t<(2*FPGA_OPERAND_NUM_WORDS); t++)
+ {
+ // save upper half of si[] (one word per cycle)
+ if (t > 0)
+ { si[4*FPGA_OPERAND_NUM_WORDS - (t+1)] = mac[t];
+ mac[t] = 0;
+ }
+
+ // update index
+ j = 2*FPGA_OPERAND_NUM_WORDS - (t+1);
+
+ // parallel multiplication
+ for (x=0; x<(2*FPGA_OPERAND_NUM_WORDS); x++)
+ {
+ // update index
+ i = t - x;
+ if (i < 0) i += 2*FPGA_OPERAND_NUM_WORDS;
+
+ // multiply...
+ fpga_lowlevel_mul16(ai[i], bj[j], &p);
+
+ // ...accumulate
+ mac[x] += p;
+ }
+
+ }
+
+ // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once)
+ for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++)
+ si[w] = mac[2*FPGA_OPERAND_NUM_WORDS - (w+1)];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Accumulation of partial words into full-size product.
+//
+// This routine implements the Algorithm 4. from "Ultra High Performance ECC
+// over NIST Primes on Commercial FPGAs".
+//
+// Input words si[] are accumulated into full-size product c[].
+//
+// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in
+// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are
+// summed in full, odd words are split into two parts. During every cycle the
+// upper part of the previous word and the lower part of the following word are
+// summed too.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul_helper_accumulate(const FPGA_WORD_EXTENDED *si, FPGA_WORD *c)
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+ FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums
+ FPGA_WORD_REDUCED cw_carry; // wide carry
+
+ // clear carry
+ cw_carry = 0;
+
+ // execute the algorithm
+ for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++)
+ {
+ // handy flags
+ bool w_is_first = (w == 0);
+ bool w_is_last = (w == (2*FPGA_OPERAND_NUM_WORDS-1));
+
+ // accumulate full current even word...
+ // ...and also the upper part of the previous odd word (if not the first word)
+ fpga_lowlevel_add47(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0);
+
+ // generate another word from "carry" part of the previous even word...
+ // ...and also the lower part of the following odd word (if not the last word)
+ cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2));
+ cw1 |= (FPGA_WORD_EXTENDED)cw_carry;
+
+ // accumulate once again
+ fpga_lowlevel_add47(cw0, cw1, &cw1);
+
+ // store current word...
+ c[w] = (FPGA_WORD)cw1;
+
+ // ...and carry
+ cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH);
+ }
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Fast modular reduction for NIST prime P-256.
+//
+// p = c mod p256
+//
+// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve
+// Cryptography".
+//
+// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
+// on the other hand is the output of the parallelized Comba multiplier, so it
+// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
+//
+// To save FPGA resources, the calculation is done using only two adders and
+// one subtractor. The algorithm is split into five steps.
+//
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+void fpga_modular_mul_helper_reduce_p256(const FPGA_WORD *c, FPGA_BUFFER *p)
+{
+ // "funny" words
+ FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9;
+
+ // compose "funny" words out of input words
+ s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0];
+ s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0;
+ s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0;
+ s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8];
+ s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9];
+ s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11];
+ s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12];
+ s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13];
+ s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14];
+
+ // intermediate results
+ FPGA_BUFFER sum0, sum1, difference;
+
+ /* Step 1. */
+ fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2
+ fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3
+ fpga_modular_sub(&ECDSA_ZERO, &s6, &difference); // difference = -s6
+
+ /* Step 2. */
+ fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2
+ fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3
+ fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7)
+
+ /* Step 3. */
+ fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5
+ fpga_modular_add(&sum1, &ECDSA_ZERO, &sum1); // compulsory cycle to keep sum1 constant for next stage
+ fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8)
+
+ /* Step 4. */
+ fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+ fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9)
+
+ /* Step 5. */
+ fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
+}
+#endif
+
+
+//------------------------------------------------------------------------------
+//
+// Fast modular reduction for NIST prime P-384.
+//
+// p = c mod p384
+//
+// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve
+// Cryptography".
+//
+// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
+// on the other hand is the output of the parallelized Comba multiplier, so it
+// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
+//
+// To save FPGA resources, the calculation is done using only two adders and
+// one subtractor. The algorithm is split into five steps.
+//
+//------------------------------------------------------------------------------
+#if USE_CURVE == 2
+void fpga_modular_mul_helper_reduce_p384(const FPGA_WORD *c, FPGA_BUFFER *p)
+{
+ // "funny" words
+ FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+
+ // compose "funny" words
+ s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0];
+ s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0;
+ s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12];
+ s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21];
+ s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0;
+ s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0;
+ s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20];
+ s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23];
+ s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0;
+ s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0;
+
+ // intermediate results
+ FPGA_BUFFER sum0, sum1, difference;
+
+ /* Step 1. */
+ fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3
+ fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2
+ fpga_modular_sub(&ECDSA_ZERO, &s8, &difference); // difference = -s8
+
+ /* Step 2. */
+ fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4
+ fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5
+ fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9)
+
+ /* Step 3. */
+ fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6
+ fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7
+ fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10)
+
+ /* Step 4. */
+ fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+ fpga_modular_sub(&difference, &ECDSA_ZERO, &difference); // compulsory cycle to keep difference constant for next stage
+
+ /* Step 5. */
+ fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
+// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
+// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
+}
+#endif
+
+
+#if USE_CURVE == 1
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p256(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3)
+//------------------------------------------------------------------------------
+//
+// This uses the addition chain from
+//
+// < https://briansmith.org/ecc-inversion-addition-chains-01 >
+//
+// to calculate A2 = A^-2 and A3 = A^-3.
+//
+//------------------------------------------------------------------------------
+{
+ // counter
+ int cyc_cnt;
+
+ // working variables
+ FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X32;
+
+ // first obtain intermediate helper quantities (X1..X32)
+
+ // X1
+ fpga_multiword_copy(A, &X1);
+
+ // X2
+ fpga_modular_mul(&X1, &X1, &R1);
+ fpga_modular_mul(&R1, &X1, &X2);
+
+ // X3
+ fpga_modular_mul(&X2, &X2, &R1);
+ fpga_modular_mul(&R1, &X1, &X3);
+
+ // X6
+ fpga_multiword_copy(&X3, &R1);
+ for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X3, &X6);
+
+ // X12
+ fpga_multiword_copy(&X6, &R1);
+ for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X6, &X12);
+
+ // X15
+ fpga_multiword_copy(&X12, &R1);
+ for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X3, &X15);
+
+ // X30
+ fpga_multiword_copy(&X15, &R1);
+ for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X15, &X30);
+
+ // X32
+ fpga_multiword_copy(&X30, &R1);
+ for (cyc_cnt=0; cyc_cnt<2; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X2, &X32);
+
+ // now compute the final results
+
+ fpga_multiword_copy(&X32, &R1);
+ for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X1, &R2);
+
+ for (cyc_cnt=0; cyc_cnt<128; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1);
+ else fpga_modular_mul(&R1, &R1, &R2);
+ }
+ fpga_modular_mul(&R2, &X32, &R1);
+
+ for (cyc_cnt=0; cyc_cnt<32; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X32, &R2);
+
+ for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1);
+ else fpga_modular_mul(&R1, &R1, &R2);
+ }
+ fpga_modular_mul(&R2, &X30, &R1);
+
+ fpga_modular_mul(&R1, &R1, &R2);
+ fpga_modular_mul(&R2, &R2, &R1);
+
+ // A2 obtained
+ fpga_multiword_copy(&R1, A2);
+
+ // now calculate compute inverse cubed from inverse squared
+ fpga_modular_mul(&R1, &R1, &R2);
+ fpga_modular_mul(&R2, A, &R1);
+
+ // A3 obtained
+ fpga_multiword_copy(&R1, A3);
+}
+#endif
+
+
+#if USE_CURVE == 2
+//------------------------------------------------------------------------------
+void fpga_modular_inv23_p384(const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3)
+//------------------------------------------------------------------------------
+//
+// This uses the addition chain from
+//
+// < https://briansmith.org/ecc-inversion-addition-chains-01 >
+//
+// to calculate A2 = A^-2 and A3 = A^-3.
+//
+//------------------------------------------------------------------------------
+{
+ // counter
+ int cyc_cnt;
+
+ // working variables
+ FPGA_BUFFER R1, R2, X1, X2, X3, X6, X12, X15, X30, X60, X120;
+
+ // first obtain intermediate helper quantities (X1..X120)
+
+ // X1
+ fpga_multiword_copy(A, &X1);
+
+ // X2
+ fpga_modular_mul(&X1, &X1, &R1);
+ fpga_modular_mul(&R1, &X1, &X2);
+
+ // X3
+ fpga_modular_mul(&X2, &X2, &R1);
+ fpga_modular_mul(&R1, &X1, &X3);
+
+ // X6
+ fpga_multiword_copy(&X3, &R1);
+ for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X3, &X6);
+
+ // X12
+ fpga_multiword_copy(&X6, &R1);
+ for (cyc_cnt=0; cyc_cnt<6; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X6, &X12);
+
+ // X15
+ fpga_multiword_copy(&X12, &R1);
+ for (cyc_cnt=0; cyc_cnt<3; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X3, &X15);
+
+ // X30
+ fpga_multiword_copy(&X15, &R1);
+ for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R2, &X15, &X30);
+
+ // X60
+ fpga_multiword_copy(&X30, &R1);
+ for (cyc_cnt=0; cyc_cnt<30; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X30, &X60);
+
+ // X120
+ fpga_multiword_copy(&X60, &R1);
+ for (cyc_cnt=0; cyc_cnt<60; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X60, &X120);
+
+ // now compute the final results
+
+ fpga_multiword_copy(&X120, &R1);
+ for (cyc_cnt=0; cyc_cnt<120; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X120, &R2);
+
+ for (cyc_cnt=0; cyc_cnt<15; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1);
+ else fpga_modular_mul(&R1, &R1, &R2);
+ }
+ fpga_modular_mul(&R1, &X15, &R2);
+
+ for (cyc_cnt=0; cyc_cnt<31; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R2, &R2, &R1);
+ else fpga_modular_mul(&R1, &R1, &R2);
+ }
+ fpga_modular_mul(&R1, &X30, &R2);
+ fpga_modular_mul(&R2, &R2, &R1);
+ fpga_modular_mul(&R1, &R1, &R2);
+ fpga_modular_mul(&R2, &X2, &R1);
+
+ for (cyc_cnt=0; cyc_cnt<94; cyc_cnt++)
+ { if (!(cyc_cnt % 2)) fpga_modular_mul(&R1, &R1, &R2);
+ else fpga_modular_mul(&R2, &R2, &R1);
+ }
+ fpga_modular_mul(&R1, &X30, &R2);
+ fpga_modular_mul(&R2, &R2, &R1);
+ fpga_modular_mul(&R1, &R1, &R2);
+
+ // A2 obtained
+ fpga_multiword_copy(&R2, A2);
+
+ // now calculate compute inverse cubed from inverse squared
+ fpga_modular_mul(&R2, &R2, &R1);
+ fpga_modular_mul(&R1, A, &R2);
+
+ // A3 obtained
+ fpga_multiword_copy(&R2, A3);
+}
+#endif
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_lowlevel.cpp b/ecdsa_fpga_modular.h
similarity index 50%
rename from fpga_lowlevel.cpp
rename to ecdsa_fpga_modular.h
index 511856a..3b75779 100644
--- a/fpga_lowlevel.cpp
+++ b/ecdsa_fpga_modular.h
@@ -1,137 +1,144 @@
-//------------------------------------------------------------------------------
-//
-// fpga_lowlevel.cpp
-// -----------------------------------
-// Models of Low-level FPGA primitives
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit adder with carry input and carry output.
-//
-// Carries are 1 bit wide.
-//
-// {c_out, s} = x + y + c_in
-//
-//------------------------------------------------------------------------------
-void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
-//------------------------------------------------------------------------------
-{
- // calculate sum
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
-
- // process carry input
- if (c_in) r++;
-
- // store sum
- *s = (FPGA_WORD)r;
-
- // shift sum to obtain carry flag
- r >>= FPGA_WORD_WIDTH;
-
- // store carry
- *c_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 32-bit subtractor with borrow input and borrow output.
-//
-// Borrows are 1 bit wide.
-//
-// {b_out, d} = x - y - b_in
-//
-void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
-//------------------------------------------------------------------------------
-{
- // calculate difference
- FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
-
- // process borrow input
- if (b_in) r--;
-
- // store difference
- *d = (FPGA_WORD)r;
-
- // shift difference to obtain borrow flag
- r >>= FPGA_WORD_WIDTH;
-
- // store borrow
- *b_out = (r != 0);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 16x16-bit multiplier.
-//
-// Inputs are 16-bit wide, outputs are 32-bit wide.
-//
-// p = x * y
-//
-void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
-//------------------------------------------------------------------------------
-{
- // multiply and store result
- *p = (FPGA_WORD)x * (FPGA_WORD)y;
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Low-level 48-bit adder without carries.
-//
-// s = (x + y)[47:0]
-//
-void fpga_lowlevel_add48(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
-//------------------------------------------------------------------------------
-{
- // add and store result truncated to 48 bits
- *s = (x + y) & FPGA_MASK_ADDER48;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// ecdsa_fpga_modular.h
+// -------------------------------------
+// Modular arithmetic routines for ECDSA
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters (P-256)
+//------------------------------------------------------------------------------
+
+/* Field Size */
+#define ECDSA_P256_Q_INIT \
+ {0xffffffff, 0x00000001, 0x00000000, 0x00000000, \
+ 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
+
+/* Division Factor */
+#define ECDSA_P256_DELTA_INIT \
+ {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, \
+ 0x00000000, 0x80000000, 0x00000000, 0x00000000}
+
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters (P-384)
+//------------------------------------------------------------------------------
+
+/* Field Size */
+#define ECDSA_P384_Q_INIT \
+ {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, \
+ 0xffffffff, 0x00000000, 0x00000000, 0xffffffff}
+
+/* Division Factor */
+#define ECDSA_P384_DELTA_INIT \
+ {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0x7fffffff, 0x80000000, 0x00000000, 0x80000000}
+
+
+//------------------------------------------------------------------------------
+// ECDSA Parameters Switch
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define ECDSA_Q_INIT ECDSA_P256_Q_INIT
+#define ECDSA_DELTA_INIT ECDSA_P256_DELTA_INIT
+
+#elif USE_CURVE == 2
+
+#define ECDSA_Q_INIT ECDSA_P384_Q_INIT
+#define ECDSA_DELTA_INIT ECDSA_P384_DELTA_INIT
+
+#else
+
+BAD_CURVE
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER ECDSA_Q;
+extern FPGA_BUFFER ECDSA_DELTA;
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void fpga_modular_init ();
+
+void fpga_modular_add (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *S);
+void fpga_modular_sub (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *D);
+void fpga_modular_mul (const FPGA_BUFFER *A, const FPGA_BUFFER *B, FPGA_BUFFER *P);
+
+void fpga_modular_mul_helper_multiply (const FPGA_BUFFER *a, const FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si);
+void fpga_modular_mul_helper_accumulate (const FPGA_WORD_EXTENDED *si, FPGA_WORD *c);
+void fpga_modular_mul_helper_reduce_p256 (const FPGA_WORD *c, FPGA_BUFFER *p);
+void fpga_modular_mul_helper_reduce_p384 (const FPGA_WORD *c, FPGA_BUFFER *p);
+
+void fpga_modular_inv23_p256 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3);
+void fpga_modular_inv23_p384 (const FPGA_BUFFER *A, FPGA_BUFFER *A2, FPGA_BUFFER *A3);
+
+void fpga_modular_inv23_p256_microcode ();
+void fpga_modular_inv23_p384_microcode ();
+
+
+//------------------------------------------------------------------------------
+// ECDSA Prototype Switch
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256
+#define fpga_modular_inv23 fpga_modular_inv23_p256
+#define fpga_modular_inv23_microcode fpga_modular_inv23_p256_microcode
+
+#elif USE_CURVE == 2
+
+#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384
+#define fpga_modular_inv23 fpga_modular_inv23_p384
+#define fpga_modular_inv23_microcode fpga_modular_inv23_p384_microcode
+
+#else
+
+BAD_CURVE
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_modular.h b/ecdsa_fpga_multiword.cpp
similarity index 58%
rename from fpga_modular.h
rename to ecdsa_fpga_multiword.cpp
index 4d31725..ab65e7a 100644
--- a/fpga_modular.h
+++ b/ecdsa_fpga_multiword.cpp
@@ -1,87 +1,107 @@
-//------------------------------------------------------------------------------
-//
-// fpga_modular.h
-// ---------------------------
-// Modular arithmetic routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-extern FPGA_BUFFER ecdsa_q;
-extern FPGA_BUFFER ecdsa_zero;
-extern FPGA_BUFFER ecdsa_one;
-extern FPGA_BUFFER ecdsa_delta;
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_modular_init ();
-
-void fpga_modular_add (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s);
-void fpga_modular_sub (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d);
-void fpga_modular_mul (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p);
-void fpga_modular_inv (FPGA_BUFFER *a, FPGA_BUFFER *a1);
-
-void fpga_modular_mul_helper_multiply (FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si);
-void fpga_modular_mul_helper_accumulate (FPGA_WORD_EXTENDED *si, FPGA_WORD *c);
-void fpga_modular_mul_helper_reduce_p256 (FPGA_WORD *c, FPGA_BUFFER *p);
-void fpga_modular_mul_helper_reduce_p384 (FPGA_WORD *c, FPGA_BUFFER *p);
-
-void fpga_modular_inv_helper_shl (FPGA_WORD *x, FPGA_WORD *y);
-void fpga_modular_inv_helper_shr (FPGA_WORD *x, FPGA_WORD *y);
-void fpga_modular_inv_helper_add (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s);
-void fpga_modular_inv_helper_sub (FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d);
-void fpga_modular_inv_helper_cpy (FPGA_WORD *dst, FPGA_WORD *src);
-void fpga_modular_inv_helper_cmp (FPGA_WORD *a, FPGA_WORD *b, int *c);
-
-
-//------------------------------------------------------------------------------
-// Reduction Routine Selection
-//------------------------------------------------------------------------------
-
-#if USE_CURVE == 1
-#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p256
-#elif USE_CURVE == 2
-#define fpga_modular_mul_helper_reduce fpga_modular_mul_helper_reduce_p384
-#else
-#error USE_CURVE must be either 1 or 2!
-#endif
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// fpga_ecdsa_multiword.cpp
+// -----------------------------------------
+// Models of multi-precision FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "ecdsa_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER ECDSA_ZERO;
+FPGA_BUFFER ECDSA_ONE;
+
+
+//------------------------------------------------------------------------------
+void fpga_multiword_init()
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ /* fill buffers for large multi-word integers */
+ for ( w = FPGA_OPERAND_NUM_WORDS - 1;
+ w >= 0;
+ w -= 1)
+ {
+ ECDSA_ZERO.words[w] = 0; // all words are zero
+ ECDSA_ONE.words[w] = w ? 0 : 1; // only the lowest word is 1
+ }
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_multiword_copy(const FPGA_BUFFER *src, FPGA_BUFFER *dst)
+//------------------------------------------------------------------------------
+//
+// Copies large multi-word integer from src into dst.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // copy all the words from src into dst
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ dst->words[w] = src->words[w];
+}
+
+
+//------------------------------------------------------------------------------
+bool fpga_multiword_is_zero(const FPGA_BUFFER *x)
+//------------------------------------------------------------------------------
+//
+// Returns true when large multi-word integer x is zero.
+//
+//------------------------------------------------------------------------------
+{
+ int w; // word counter
+
+ // try to find non-zero words
+ for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+ if (x->words[w]) return false;
+
+ // no non-zero words found
+ return true;
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/fpga_util.cpp b/ecdsa_fpga_multiword.h
similarity index 71%
rename from fpga_util.cpp
rename to ecdsa_fpga_multiword.h
index 6f7a834..9190fd5 100644
--- a/fpga_util.cpp
+++ b/ecdsa_fpga_multiword.h
@@ -1,86 +1,91 @@
-//------------------------------------------------------------------------------
-//
-// fpga_util.cpp
-// ---------------------
-// Utility FPGA routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-bool fpga_buffer_is_zero(FPGA_BUFFER *x)
-//------------------------------------------------------------------------------
-//
-// Returns true when large multi-word integer x is zero.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // try to find non-zero words
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- if (x->words[w]) return false;
-
- // no non-zero words found
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-void fpga_buffer_copy(FPGA_BUFFER *src, FPGA_BUFFER *dst)
-//------------------------------------------------------------------------------
-//
-// Copies large multi-word integer from src into dst.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // copy all the words from src into dst
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- dst->words[w] = src->words[w];
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// fpga_ecdsa_multiword.h
+// -----------------------------------------
+// Models of multi-precision FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Model Parameters
+//------------------------------------------------------------------------------
+#if USE_CURVE == 1
+
+#define FPGA_OPERAND_WIDTH (256) // largest supported operand width in bits
+
+#elif USE_CURVE == 2
+
+#define FPGA_OPERAND_WIDTH (384) // largest supported operand width in bits
+
+#else
+
+BAD_CURVE
+
+#endif
+
+
+//------------------------------------------------------------------------------
+// FPGA Pipeline Settings
+//------------------------------------------------------------------------------
+#define FPGA_OPERAND_NUM_WORDS (FPGA_OPERAND_WIDTH / FPGA_WORD_WIDTH)
+
+
+//------------------------------------------------------------------------------
+// Operand Data Type
+//------------------------------------------------------------------------------
+typedef struct FPGA_BUFFER
+{
+ FPGA_WORD words[FPGA_OPERAND_NUM_WORDS];
+}
+FPGA_BUFFER;
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER ECDSA_ZERO;
+extern FPGA_BUFFER ECDSA_ONE;
+
+
+//------------------------------------------------------------------------------
+// Prototpes
+//------------------------------------------------------------------------------
+void fpga_multiword_init ();
+void fpga_multiword_copy (const FPGA_BUFFER *src, FPGA_BUFFER *dst);
+bool fpga_multiword_is_zero (const FPGA_BUFFER *x);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/ecdsa_microcode_parser.py b/ecdsa_microcode_parser.py
new file mode 100644
index 0000000..56aa94c
--- /dev/null
+++ b/ecdsa_microcode_parser.py
@@ -0,0 +1,694 @@
+#
+# ecdsa_microcode_parser.py
+#
+# Author: Pavel Shatov
+#
+# Copyright (c) 2018, NORDUnet A/S
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of the NORDUnet A/S nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Imports
+import re
+import sys
+from enum import Enum
+
+# Source File
+C_FILES = [ "ecdsa_fpga_curve_microcode.cpp",
+ "ecdsa_fpga_microcode.cpp"]
+
+class MICROCODE_PARSER:
+
+ # enumerate microcode pieces
+ class MICROCODE_PIECE_ENUM(Enum):
+
+ NONE = -1
+
+ PREPARE = 0
+
+ CYCLE_DOUBLE = 1
+ CYCLE_ADD = 2
+
+ CYCLE_ADD_AT_INFINITY = 3
+ CYCLE_ADD_SAME_X_SAME_Y = 4
+ CYCLE_ADD_SAME_X = 5
+ CYCLE_ADD_REGULAR = 6
+
+ CYCLE_K0 = 7
+ CYCLE_K1 = 8
+
+ CONVERT = 9
+
+ CONVERT_AT_INFINITY = 10
+ CONVERT_REGULAR = 11
+
+ INVERT_P256 = 12
+ INVERT_P384 = 13
+
+
+ # magic pair of begin/end markers
+ MARKER_BEGIN_MICROCODE = "BEGIN_MICROCODE:"
+ MARKER_END_MICROCODE = "END_MICROCODE"
+
+ # names of banks
+ MICROCODE_C_NAME_BANK_LO = "BANK_LO"
+ MICROCODE_C_NAME_BANK_HI = "BANK_HI"
+
+ # micro-operation names
+ MICROCODE_C_NAME_UOP_CMPZ = "uop_cmpz"
+ MICROCODE_C_NAME_UOP_MOVE = "uop_move"
+ MICROCODE_C_NAME_UOP_CALC = "uop_calc"
+ MICROCODE_C_NAME_UOP_CYCLE = "uop_cycle"
+ MICROCODE_C_NAME_UOP_REPEAT = "uop_repeat"
+ MICROCODE_C_NAME_UOP_CALC_EVEN = "uop_calc_if_even"
+ MICROCODE_C_NAME_UOP_CALC_ODD = "uop_calc_if_odd"
+
+ # calculate micro-operations
+ MICROCODE_C_NAME_UOP_MATH_MUL = "MUL"
+ MICROCODE_C_NAME_UOP_MATH_ADD = "ADD"
+ MICROCODE_C_NAME_UOP_MATH_SUB = "SUB"
+
+ # names of banks in C source
+ MICROCODE_C_NAMES_BANKS = [MICROCODE_C_NAME_BANK_LO, MICROCODE_C_NAME_BANK_HI]
+
+ # names of operands in C source
+ MICROCODE_C_NAMES_OPERANDS = [ "CONST_ZERO", "CONST_ONE",
+ "CONST_DELTA",
+ "CONST_GX", "CONST_GY",
+ "CONST_HX", "CONST_HY",
+ "INVERT_R1", "INVERT_R2",
+ "INVERT_X2", "INVERT_X3", "INVERT_X6",
+ "INVERT_X12", "INVERT_X15", "INVERT_X30",
+ "INVERT_X32", "INVERT_X60", "INVERT_X120",
+ "INVERT_A2", "INVERT_A3",
+ "CYCLE_RX", "CYCLE_RY", "CYCLE_RZ",
+ "CYCLE_SX", "CYCLE_SY", "CYCLE_SZ",
+ "CYCLE_A", "CYCLE_A2", "CYCLE_B",
+ "CYCLE_C", "CYCLE_C2", "CYCLE_C2_2",
+ "CYCLE_D", "CYCLE_E", "CYCLE_F",
+ "CYCLE_G", "CYCLE_H", "CYCLE_J",
+ "CYCLE_Z2",
+ "CYCLE_T1", "CYCLE_T2",
+ "CYCLE_T3", "CYCLE_T4"]
+
+ # names of banks in Verilog source
+ MICROCODE_V_NAME_BANKS_DUMMY = "UOP_BANKS_DUMMY"
+
+ MICROCODE_V_NAMES_BANKS = ["UOP_BANKS_LO2HI", "UOP_BANKS_HI2LO"]
+
+ # names of operands in Verilog source
+ MICROCODE_V_NAMES_OPERANDS = ["UOP_OPERAND_" + op for op in MICROCODE_C_NAMES_OPERANDS]
+
+ # names of opcodes in Verilog source
+ MICROCODE_V_NAME_OPCODE_CMPZ = "UOP_OPCODE_CMPZ"
+ MICROCODE_V_NAME_OPCODE_MOVE = "UOP_OPCODE_COPY"
+ MICROCODE_V_NAME_OPCODE_MUL = "UOP_OPCODE_MUL"
+ MICROCODE_V_NAME_OPCODE_ADD = "UOP_OPCODE_ADD"
+ MICROCODE_V_NAME_OPCODE_SUB = "UOP_OPCODE_SUB"
+ MICROCODE_V_NAME_OPCODE_STOP = "UOP_OPCODE_STOP"
+
+ MICROCODE_V_NAME_OPERAND_DONTCARE = "UOP_OPERAND_DONTCARE"
+
+ # match group to catch operand names
+ MATCH_GROUP_09AZ = "([0-9A-Z_]+)"
+
+ # match group to catch bank names
+ MATCH_GROUP_BANK = "(" + MICROCODE_C_NAME_BANK_LO + "|" + MICROCODE_C_NAME_BANK_HI + ")"
+
+ # match group to catch number of loop iterations
+ MATCH_GROUP_NUMBER = "(\d+)"
+
+ # match group to catch math instruction
+ MATCH_GROUP_MATH = "(" + MICROCODE_C_NAME_UOP_MATH_MUL + "|" + MICROCODE_C_NAME_UOP_MATH_ADD + "|" + MICROCODE_C_NAME_UOP_MATH_SUB + ")"
+
+ # match group to catch calculation micro-operation
+ MATCH_GROUP_CALC = "(" + MICROCODE_C_NAME_UOP_CALC + "|" + MICROCODE_C_NAME_UOP_CALC_EVEN + "|" + MICROCODE_C_NAME_UOP_CALC_ODD + ")"
+
+ # map string microcode piece names to enum values
+ MICROCODE_PIECE_DICT = { "PREPARE": MICROCODE_PIECE_ENUM.PREPARE,
+
+ "CYCLE_DOUBLE": MICROCODE_PIECE_ENUM.CYCLE_DOUBLE,
+ "CYCLE_ADD": MICROCODE_PIECE_ENUM.CYCLE_ADD,
+
+ "CYCLE_ADD_AT_INFINITY": MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY,
+ "CYCLE_ADD_SAME_X_SAME_Y": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y,
+ "CYCLE_ADD_SAME_X": MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X,
+ "CYCLE_ADD_REGULAR": MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR,
+
+ "CYCLE_K0": MICROCODE_PIECE_ENUM.CYCLE_K0,
+ "CYCLE_K1": MICROCODE_PIECE_ENUM.CYCLE_K1,
+
+ "INVERT_P256": MICROCODE_PIECE_ENUM.INVERT_P256,
+ "INVERT_P384": MICROCODE_PIECE_ENUM.INVERT_P384,
+
+ "CONVERT": MICROCODE_PIECE_ENUM.CONVERT,
+
+ "CONVERT_AT_INFINITY": MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY,
+ "CONVERT_REGULAR": MICROCODE_PIECE_ENUM.CONVERT_REGULAR}
+
+
+ # map C bank names to Verilog bank names
+ MICROCODE_BANK_DICT = dict(zip(MICROCODE_C_NAMES_BANKS, MICROCODE_V_NAMES_BANKS))
+
+ # map C operand names to Verilog operand names
+ MICROCODE_OPERAND_DICT = dict(zip(MICROCODE_C_NAMES_OPERANDS, MICROCODE_V_NAMES_OPERANDS))
+
+ # map C calculation names to Verilog opcode names
+ MICROCODE_MATH_DICT = { MICROCODE_C_NAME_UOP_MATH_MUL: MICROCODE_V_NAME_OPCODE_MUL,
+ MICROCODE_C_NAME_UOP_MATH_ADD: MICROCODE_V_NAME_OPCODE_ADD,
+ MICROCODE_C_NAME_UOP_MATH_SUB: MICROCODE_V_NAME_OPCODE_SUB}
+
+ # microcode format
+ MICROCODE_FORMAT_ADDR = "9'd%03d"
+ MICROCODE_FORMAT_LINE = MICROCODE_FORMAT_ADDR + ": data <= %s;"
+ MICROCODE_FORMAT_OFFSET = "localparam [UOP_ADDR_WIDTH-1:0] %s = " + MICROCODE_FORMAT_ADDR + ";"
+
+ # pieces of microcode
+ MICROCODE_LINES_PREPARE = []
+
+ MICROCODE_LINES_CYCLE_DOUBLE = []
+ MICROCODE_LINES_CYCLE_ADD = []
+
+ MICROCODE_LINES_CYCLE_ADD_AT_INFINITY = []
+ MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y = []
+ MICROCODE_LINES_CYCLE_ADD_SAME_X = []
+ MICROCODE_LINES_CYCLE_ADD_REGULAR = []
+
+ MICROCODE_LINES_CYCLE_K0 = []
+ MICROCODE_LINES_CYCLE_K1 = []
+
+ MICROCODE_LINES_INVERT_P256 = []
+ MICROCODE_LINES_INVERT_P384 = []
+
+ MICROCODE_LINES_CONVERT = []
+
+ MICROCODE_LINES_CONVERT_AT_INFINITY = []
+ MICROCODE_LINES_CONVERT_REGULAR = []
+
+ MICROCODE_LINE_STOP = "{%s, %s, %s, %s, %s}" % ( MICROCODE_V_NAME_OPCODE_STOP,
+ MICROCODE_V_NAME_BANKS_DUMMY,
+ MICROCODE_V_NAME_OPERAND_DONTCARE,
+ MICROCODE_V_NAME_OPERAND_DONTCARE,
+ MICROCODE_V_NAME_OPERAND_DONTCARE)
+
+ def __init__(self, filenames):
+ self.__filenames = filenames
+
+ def parse(self):
+ for next_filename in self.__filenames:
+ print("Parsing '%s'..." % (next_filename))
+ parsing_now = False
+ line_num = 0
+ with open(next_filename, "r") as c_file:
+ c_lines = c_file.readlines()
+ for next_c_line in c_lines:
+ line_num += 1
+ self.__line_num = line_num
+ self.__next_c_line = next_c_line.strip()
+ if len(self.__next_c_line) == 0: continue
+ if self.__next_c_line.startswith("//"): continue
+ if not parsing_now:
+ self.__current_piece = self.__try_start_parsing()
+ if self.__current_piece != self.MICROCODE_PIECE_ENUM.NONE:
+ print(" Found piece of microcode: %s" % (str(self.__current_piece)))
+ parsing_now = True
+ else:
+ parsing_now = self.__continue_parsing()
+
+
+ def format(self):
+
+ mode = 0
+
+ if len(sys.argv) == 2:
+ if sys.argv[1] == "P256": mode = 1
+ if sys.argv[1] == "P384": mode = 2
+
+ if mode == 0:
+ sys.exit("sys.exit(): Usage: ecdsa_microcode_parser.py <P256|P384>!")
+
+ if len(self.MICROCODE_LINES_PREPARE) == 0: sys.exit("sys.exit(): Empty PREPARE piece!")
+
+ if len(self.MICROCODE_LINES_CYCLE_DOUBLE) == 0: sys.exit("sys.exit(): Empty CYCLE_DOUBLE piece!")
+ if len(self.MICROCODE_LINES_CYCLE_ADD) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD piece!")
+
+ if len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_AT_INFINITY piece!")
+ if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X_SAME_Y piece!")
+ if len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_SAME_X piece!")
+ if len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR) == 0: sys.exit("sys.exit(): Empty CYCLE_ADD_REGULAR piece!")
+
+ if len(self.MICROCODE_LINES_CYCLE_K0) == 0: sys.exit("sys.exit(): Empty CYCLE_K0 piece!")
+ if len(self.MICROCODE_LINES_CYCLE_K1) == 0: sys.exit("sys.exit(): Empty CYCLE_K1 piece!")
+
+ if len(self.MICROCODE_LINES_INVERT_P256) == 0: sys.exit("sys.exit(): Empty INVERT_P256 piece!")
+ if len(self.MICROCODE_LINES_INVERT_P384) == 0: sys.exit("sys.exit(): Empty INVERT_P384 piece!")
+
+ if len(self.MICROCODE_LINES_CONVERT) == 0: sys.exit("sys.exit(): Empty CONVERT piece!")
+
+ if len(self.MICROCODE_LINES_CONVERT_AT_INFINITY) == 0: sys.exit("sys.exit(): Empty CONVERT_AT_INFINITY piece!")
+ if len(self.MICROCODE_LINES_CONVERT_REGULAR) == 0: sys.exit("sys.exit(): Empty CONVERT_REGULAR piece!")
+
+ length = 0
+ length += len(self.MICROCODE_LINES_PREPARE)
+
+ length += len(self.MICROCODE_LINES_CYCLE_DOUBLE)
+ length += len(self.MICROCODE_LINES_CYCLE_ADD)
+
+ length += len(self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY)
+ length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y)
+ length += len(self.MICROCODE_LINES_CYCLE_ADD_SAME_X)
+ length += len(self.MICROCODE_LINES_CYCLE_ADD_REGULAR)
+
+ length += len(self.MICROCODE_LINES_CYCLE_K0)
+ length += len(self.MICROCODE_LINES_CYCLE_K1)
+
+ length += len(self.MICROCODE_LINES_CONVERT)
+
+ length += len(self.MICROCODE_LINES_CONVERT_AT_INFINITY)
+ length += len(self.MICROCODE_LINES_CONVERT_REGULAR)
+
+ if mode == 1: length += len(self.MICROCODE_LINES_INVERT_P256)
+ if mode == 2: length += len(self.MICROCODE_LINES_INVERT_P384)
+
+ print("Total number of micro-operations (w/o stops): %s" % (length))
+
+
+ self.__addr = 0
+
+ print("\n -=-=-=-=-=- CUT AND PASTE BELOW -=-=-=-=-=-\n")
+
+ num_mul_cycle = 0
+ num_mul_invert_p256 = 0
+ num_mul_invert_p384 = 0
+
+ offset_prepare = self.__addr;
+ print("// PREPARE");
+ for line in self.MICROCODE_LINES_PREPARE:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_double = self.__addr;
+ print("// CYCLE_DOUBLE");
+ for line in self.MICROCODE_LINES_CYCLE_DOUBLE:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_add = self.__addr;
+ print("// CYCLE_ADD");
+ for line in self.MICROCODE_LINES_CYCLE_ADD:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_add_at_infinity = self.__addr;
+ print("// CYCLE_ADD_AT_INFINITY");
+ for line in self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_add_same_x_same_y = self.__addr;
+ print("// CYCLE_ADD_SAME_X_SAME_Y");
+ for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_add_same_x = self.__addr;
+ print("// CYCLE_ADD_SAME_X");
+ for line in self.MICROCODE_LINES_CYCLE_ADD_SAME_X:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_add_regular = self.__addr;
+ print("// CYCLE_ADD_REGULAR");
+ for line in self.MICROCODE_LINES_CYCLE_ADD_REGULAR:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_k0 = self.__addr;
+ print("// CYCLE_K0");
+ for line in self.MICROCODE_LINES_CYCLE_K0:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_cycle_k1 = self.__addr;
+ print("// CYCLE_K1");
+ for line in self.MICROCODE_LINES_CYCLE_K1:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_convert = self.__addr;
+ print("// CONVERT");
+ for line in self.MICROCODE_LINES_CONVERT:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_convert_at_infinity = self.__addr;
+ print("// CONVERT_AT_INFINITY");
+ for line in self.MICROCODE_LINES_CONVERT_AT_INFINITY:
+ self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ offset_convert_regular = self.__addr;
+ print("// CONVERT_REGULAR");
+ for line in self.MICROCODE_LINES_CONVERT_REGULAR:
+ num_mul_cycle += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ if mode == 1:
+ offset_invert_p256 = self.__addr;
+ print("// INVERT_P256");
+ for line in self.MICROCODE_LINES_INVERT_P256:
+ num_mul_invert_p256 += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ if mode == 2:
+ offset_invert_p384 = self.__addr;
+ print("// INVERT_P384");
+ for line in self.MICROCODE_LINES_INVERT_P384:
+ num_mul_invert_p384 += self.__format_line(line)
+ self.__format_line(self.MICROCODE_LINE_STOP)
+
+ print("\n")
+ self.__format_offset("UOP_OFFSET_PREPARE ", offset_prepare)
+ self.__format_offset("UOP_OFFSET_CYCLE_DOUBLE ", offset_cycle_double)
+ self.__format_offset("UOP_OFFSET_CYCLE_ADD ", offset_cycle_add)
+ self.__format_offset("UOP_OFFSET_CYCLE_ADD_AT_INFINITY ", offset_cycle_add_at_infinity)
+ self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X_SAME_Y", offset_cycle_add_same_x_same_y)
+ self.__format_offset("UOP_OFFSET_CYCLE_ADD_SAME_X ", offset_cycle_add_same_x)
+ self.__format_offset("UOP_OFFSET_CYCLE_ADD_REGULAR ", offset_cycle_add_regular)
+ self.__format_offset("UOP_OFFSET_CYCLE_K0 ", offset_cycle_k0)
+ self.__format_offset("UOP_OFFSET_CYCLE_K1 ", offset_cycle_k1)
+ self.__format_offset("UOP_OFFSET_CONVERT ", offset_convert)
+ self.__format_offset("UOP_OFFSET_CONVERT_AT_INFINITY ", offset_convert_at_infinity)
+ self.__format_offset("UOP_OFFSET_CONVERT_REGULAR ", offset_convert_regular)
+ if mode == 1: self.__format_offset("UOP_OFFSET_INVERT_P256 ", offset_invert_p256)
+ if mode == 2: self.__format_offset("UOP_OFFSET_INVERT_P384 ", offset_invert_p384)
+
+ print("")
+ print("num_mul_cycle = %d" % num_mul_cycle)
+ print("num_mul_invert_p256 = %d" % num_mul_invert_p256)
+ print("num_mul_invert_p384 = %d" % num_mul_invert_p384)
+
+ def __format_line(self, line):
+ line = self.MICROCODE_FORMAT_LINE % (self.__addr, line)
+ print(line)
+ self.__addr += 1
+ is_mul = line.find(self.MICROCODE_V_NAME_OPCODE_MUL)
+ if is_mul >= 0: return 1
+ return 0
+
+ def __format_offset(self, name, offset):
+ print(self.MICROCODE_FORMAT_OFFSET % (name, offset))
+
+ def __try_start_parsing(self):
+ piece = self.MICROCODE_PIECE_ENUM.NONE
+ begin_marker = re.match("^/\*\s*" + self.MARKER_BEGIN_MICROCODE + "\s*" +
+ self.MATCH_GROUP_09AZ + "\s*\*/$",
+ self.__next_c_line);
+ if begin_marker:
+ piece = self.MICROCODE_PIECE_DICT[begin_marker.group(1)]
+
+ return piece
+
+ def __encode_uop(self, opcode, banks, src1, src2, dst):
+ return "{%s, %s, %s, %s, %s}" % (opcode, banks, src1, src2, dst)
+
+ def __continue_parsing(self):
+
+ end_marker = re.match("^/\*\s*" + self.MARKER_END_MICROCODE + "\s*\*/$",
+ self.__next_c_line);
+ if end_marker: return False
+
+ # cmpz?
+ uop_cmpz = re.match("^" + self.MICROCODE_C_NAME_UOP_CMPZ + "\(\s*" +
+ self.MATCH_GROUP_BANK + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\)\;$",
+ self.__next_c_line, re.IGNORECASE)
+ if uop_cmpz:
+ ok = self.__parse_uop_cmpz(uop_cmpz)
+ if ok: return True
+ else:
+ self.__print_parse_error("parse_uop_cmpz() failed!")
+ self.__abort()
+
+ # move?
+ uop_move = re.match("^" + self.MICROCODE_C_NAME_UOP_MOVE + "\(\s*" +
+ self.MATCH_GROUP_BANK + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\,\s*" +
+ self.MATCH_GROUP_BANK + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\)\;$",
+ self.__next_c_line, re.IGNORECASE)
+ if uop_move:
+ ok = self.__parse_uop_move(uop_move)
+ if ok: return True
+ else:
+ self.__print_parse_error("parse_uop_move() failed!")
+ self.__abort()
+
+ # calc?
+ uop_calc = re.match("^" + self.MATCH_GROUP_CALC + "\s*\(" +
+ self.MATCH_GROUP_MATH + "\,\s*" +
+ self.MATCH_GROUP_BANK + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\,\s*" +
+ self.MATCH_GROUP_BANK + "\,\s*" +
+ self.MATCH_GROUP_09AZ + "\)\;$",
+ self.__next_c_line, re.IGNORECASE)
+ if uop_calc:
+ ok = self.__parse_uop_calc(uop_calc)
+ if ok: return True
+ else:
+ self.__print_parse_error("parse_uop_calc() failed!")
+ self.__abort()
+
+ # cycle?
+ uop_cycle = re.match("^" + self.MICROCODE_C_NAME_UOP_CYCLE + "\(" +
+ self.MATCH_GROUP_NUMBER + "\)\;$",
+ self.__next_c_line, re.IGNORECASE)
+ if uop_cycle:
+ ok = self.__parse_uop_cycle(uop_cycle)
+ if ok: return True
+ else:
+ self.__print_parse_error("parse_uop_cycle() failed!")
+ self.__abort()
+
+ # repeat?
+ uop_repeat = re.match("^" + self.MICROCODE_C_NAME_UOP_REPEAT + "\(" + "\)\;$",
+ self.__next_c_line, re.IGNORECASE)
+ if uop_repeat:
+ ok = self.__parse_uop_repeat()
+ if ok: return True
+ else:
+ self.__print_parse_error("__parse_uop_repeat() failed!")
+ self.__abort()
+
+ self.__print_parse_error("unknown micro-operation!")
+ self.__abort()
+
+ def __check_math(self, math):
+ if not math in self.MICROCODE_MATH_DICT:
+ print_parse_error("bad math!")
+ return False
+
+ return True
+
+ def __check_banks(self, src, dst):
+ if not src in self.MICROCODE_BANK_DICT:
+ print_parse_error("bad src bank!")
+ return False
+
+ if not dst in self.MICROCODE_BANK_DICT:
+ print_parse_error("bad dst bank!")
+ return False
+
+ if src == dst:
+ print_parse_error("src bank == dst bank!")
+ return False
+
+ return True
+
+ def __check_bank(self, src):
+ if not src in self.MICROCODE_BANK_DICT:
+ print_parse_error("bad src bank!")
+ return False
+
+ return True
+
+ def __check_op3(self, src1, src2, dst):
+
+ if not src1 in self.MICROCODE_OPERAND_DICT:
+ self.__print_parse_error("bad src1 operand!")
+ return False
+
+ if src2 != "" and not src2 in self.MICROCODE_OPERAND_DICT:
+ self.__print_parse_error("bad src2 operand!")
+ return False
+
+ if dst != "" and not dst in self.MICROCODE_OPERAND_DICT:
+ self.__print_parse_error("bad dst operand!")
+ return False
+
+ return True
+
+ def __check_op2(self, src, dst):
+ return self.__check_op3(src, "", dst)
+
+ def __check_op1(self, src):
+ return self.__check_op2(src, "")
+
+ def __parse_uop_move(self, params):
+ bank_src = params.group(1)
+ bank_dst = params.group(3)
+ op_src = params.group(2)
+ op_dst = params.group(4)
+
+ if not self.__check_banks(bank_src, bank_dst):
+ self.__print_parse_error("check_banks() failed!")
+ return False
+
+ if not self.__check_op2(op_src, op_dst):
+ self.__print_parse_error("check_op2() failed!")
+ return False
+
+ opcode = self.MICROCODE_V_NAME_OPCODE_MOVE
+ bank = self.MICROCODE_BANK_DICT[bank_src]
+ src1 = self.MICROCODE_OPERAND_DICT[op_src]
+ src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE
+ dst = self.MICROCODE_OPERAND_DICT[op_dst]
+
+ data = self.__encode_uop(opcode, bank, src1, src2, dst)
+ self.__store_uop(data)
+ return True
+
+ def __parse_uop_cmpz(self, params):
+ bank_src = params.group(1)
+ op_src = params.group(2)
+
+ if not self.__check_bank(bank_src):
+ self.__print_parse_error("check_bank() failed!")
+ return False
+
+ if not self.__check_op1(op_src):
+ self.__print_parse_error("check_op1() failed!")
+ return False
+
+ opcode = self.MICROCODE_V_NAME_OPCODE_CMPZ
+ bank = self.MICROCODE_BANK_DICT[bank_src]
+ src1 = self.MICROCODE_OPERAND_DICT[op_src]
+ src2 = self.MICROCODE_V_NAME_OPERAND_DONTCARE
+ dst = self.MICROCODE_V_NAME_OPERAND_DONTCARE
+
+ data = self.__encode_uop(opcode, bank, src1, src2, dst)
+ self.__store_uop(data)
+ return True
+
+ def __parse_uop_calc(self, params):
+ calc = params.group(1)
+ math = params.group(2)
+ bank_src = params.group(3)
+ bank_dst = params.group(6)
+ op_src1 = params.group(4)
+ op_src2 = params.group(5)
+ op_dst = params.group(7)
+
+ if not self.__check_math(math):
+ self.__print_parse_error("check_calc() failed!")
+ return False
+
+ if not self.__check_banks(bank_src, bank_dst):
+ self.__print_parse_error("check_banks() failed!")
+ return False
+
+ if not self.__check_op3(op_src1, op_src2, op_dst):
+ self.__print_parse_error("check_op3() failed!")
+ return False
+
+ opcode = self.MICROCODE_MATH_DICT[math]
+ banks = self.MICROCODE_BANK_DICT[bank_src]
+ src1 = self.MICROCODE_OPERAND_DICT[op_src1]
+ src2 = self.MICROCODE_OPERAND_DICT[op_src2]
+ dst = self.MICROCODE_OPERAND_DICT[op_dst]
+
+ data = self.__encode_uop(opcode, banks, src1, src2, dst)
+ if calc == self.MICROCODE_C_NAME_UOP_CALC: self.__store_uop(data)
+ elif calc == self.MICROCODE_C_NAME_UOP_CALC_EVEN: self.__loop_even = data
+ elif calc == self.MICROCODE_C_NAME_UOP_CALC_ODD: self.__loop_odd = data
+ else: return False
+
+ return True
+
+ def __parse_uop_cycle(self, params):
+ self.__loop_iters = int(params.group(1))
+ return True
+
+ def __parse_uop_repeat(self):
+ print(" Unrolling loop (%d iters)..." % (self.__loop_iters))
+
+ for i in range(0, self.__loop_iters):
+ if i % 2 == 0: self.__store_uop(self.__loop_even)
+ else: self.__store_uop(self.__loop_odd)
+
+ return True
+
+ def __store_uop(self, data):
+ #print("\t" + data)
+ if self.__current_piece == self.MICROCODE_PIECE_ENUM.PREPARE: self.MICROCODE_LINES_PREPARE.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_DOUBLE: self.MICROCODE_LINES_CYCLE_DOUBLE.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD: self.MICROCODE_LINES_CYCLE_ADD.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_AT_INFINITY: self.MICROCODE_LINES_CYCLE_ADD_AT_INFINITY.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X_SAME_Y: self.MICROCODE_LINES_CYCLE_ADD_SAME_X_SAME_Y.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_SAME_X: self.MICROCODE_LINES_CYCLE_ADD_SAME_X.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_ADD_REGULAR: self.MICROCODE_LINES_CYCLE_ADD_REGULAR.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K0: self.MICROCODE_LINES_CYCLE_K0.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CYCLE_K1: self.MICROCODE_LINES_CYCLE_K1.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P256: self.MICROCODE_LINES_INVERT_P256.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.INVERT_P384: self.MICROCODE_LINES_INVERT_P384.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT: self.MICROCODE_LINES_CONVERT.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_AT_INFINITY: self.MICROCODE_LINES_CONVERT_AT_INFINITY.append(data)
+ elif self.__current_piece == self.MICROCODE_PIECE_ENUM.CONVERT_REGULAR: self.MICROCODE_LINES_CONVERT_REGULAR.append(data)
+
+ def __print_parse_error(self, msg):
+ print("PARSE ERROR: %s" % (msg))
+
+ def __abort(self):
+ sys.exit("Stopped at line #%d:\n%s" % (self.__line_num, self.__next_c_line))
+
+
+# -----------------------------------------------------------------------------
+def main(filenames):
+# -----------------------------------------------------------------------------
+ parser = MICROCODE_PARSER(filenames)
+ parser.parse()
+ parser.format()
+
+# -----------------------------------------------------------------------------
+if __name__ == "__main__":
+# -----------------------------------------------------------------------------
+ main(C_FILES)
+
+#
+# End-of-File
+#
diff --git a/ecdsa_model.h b/ecdsa_model.h
deleted file mode 100644
index fc7e571..0000000
--- a/ecdsa_model.h
+++ /dev/null
@@ -1,205 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// ecdsa_model.h
-// --------------------------------------------
-// Base point scalar multiplier model for ECDSA
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-//
-// Curve Selection
-//
-// USE_CURVE == 1 -> P-256
-// USE_CURVE == 2 -> P-384
-//
-//------------------------------------------------------------------------------
-#define USE_CURVE 2
-
-
-//------------------------------------------------------------------------------
-// Model Parameters
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-#define OPERAND_WIDTH (256) // largest supported operand width in bits
-#elif USE_CURVE == 2
-#define OPERAND_WIDTH (384) // largest supported operand width in bits
-#else
-#error USE_CURVE must be either 1 or 2!
-#endif
-
-
-//------------------------------------------------------------------------------
-// P-256 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_256_Q {0xffffffff, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff}
-
-/* Generic Numbers */
-#define P_256_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_256_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_256_DELTA {0x7fffffff, 0x80000000, 0x80000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}
-
-/* Base Point */
-#define P_256_G_X {0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296}
-#define P_256_G_Y {0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5}
-
-/* Doubled Base Point */
-#define P_256_H_X {0x29d05c19, 0x3da77b71, 0x0e863235, 0x38b77e1b, 0x11f904fe, 0xa42998be, 0x16bd8d74, 0x4ece7ad0}
-#define P_256_H_Y {0xb01cbd1c, 0x01e58065, 0x711814b5, 0x83f061e9, 0xd431cca9, 0x94cea131, 0x3449bf97, 0xc840ae07}
-
-/* Base Point Order */
-#define P_256_N {0xffffffff, 0x00000000, 0xffffffff, 0xffffffff, 0xbce6faad, 0xa7179e84, 0xf3b9cac2, 0xfc632551}
-
-/* Private Key */
-#define P_256_D {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a}
-
-/* Per-message Random Number */
-#define P_256_K {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0}
-
-/* Public Key */
-#define P_256_Q_X {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8}
-#define P_256_Q_Y {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9}
-
-/* Part of Signature */
-#define P_256_R_X {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c}
-#define P_256_R_Y {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541}
-
-
-//------------------------------------------------------------------------------
-// P-384 Parameters and Test Vectors
-//------------------------------------------------------------------------------
-
-/* Field Size */
-#define P_384_Q {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff}
-
-/* Generic Numbers */
-#define P_384_ZERO {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}
-#define P_384_ONE {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001}
-
-/* Division Factor */
-#define P_384_DELTA {0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff, 0x80000000, 0x00000000, 0x80000000}
-
-/* Base Point */
-#define P_384_G_X {0xaa87ca22, 0xbe8b0537, 0x8eb1c71e, 0xf320ad74, 0x6e1d3b62, 0x8ba79b98, 0x59f741e0, 0x82542a38, 0x5502f25d, 0xbf55296c, 0x3a545e38, 0x72760ab7}
-#define P_384_G_Y {0x3617de4a, 0x96262c6f, 0x5d9e98bf, 0x9292dc29, 0xf8f41dbd, 0x289a147c, 0xe9da3113, 0xb5f0b8c0, 0x0a60b1ce, 0x1d7e819d, 0x7a431d7c, 0x90ea0e5f}
-
-/* Doubled Base Point */
-#define P_384_H_X {0xaaf06bba, 0x82e9f590, 0xe29c71c2, 0x19bea517, 0x23c5893a, 0xe8b0c8cf, 0x4c117c3e, 0xfb57ab8d, 0x55fa1b42, 0x8155ad27, 0x8b574391, 0x1b13ea8a}
-#define P_384_H_Y {0xc9e821b5, 0x69d9d390, 0xa2616740, 0x6d6d23d6, 0x070be242, 0xd765eb83, 0x1625ceec, 0x4a0f473e, 0xf59f4e30, 0xe2817e62, 0x85bce284, 0x6f15f19d}
-
-/* Base Point Order */
-#define P_384_N {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xc7634d81, 0xf4372ddf, 0x581a0db2, 0x48b0a77a, 0xecec196a, 0xccc52973}
-
-/* Private Key */
-#define P_384_D {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24}
-
-/* Per-message Random Number */
-#define P_384_K {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359}
-
-/* Public Key */
-#define P_384_Q_X {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0}
-#define P_384_Q_Y {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9}
-
-/* Part of Signature */
-#define P_384_R_X {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856}
-#define P_384_R_Y {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b}
-
-
-//------------------------------------------------------------------------------
-// Parameter and Test Vector Selection
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-
-#define ECDSA_Q P_256_Q
-
-#define ECDSA_ZERO P_256_ZERO
-#define ECDSA_ONE P_256_ONE
-
-#define ECDSA_DELTA P_256_DELTA
-
-#define ECDSA_G_X P_256_G_X
-#define ECDSA_G_Y P_256_G_Y
-
-#define ECDSA_H_X P_256_H_X
-#define ECDSA_H_Y P_256_H_Y
-
-#define ECDSA_N P_256_N
-#define ECDSA_D P_256_D
-#define ECDSA_K P_256_K
-
-#define ECDSA_Q_X P_256_Q_X
-#define ECDSA_Q_Y P_256_Q_Y
-
-#define ECDSA_R_X P_256_R_X
-#define ECDSA_R_Y P_256_R_Y
-
-#elif USE_CURVE == 2
-
-#define ECDSA_Q P_384_Q
-
-#define ECDSA_ZERO P_384_ZERO
-#define ECDSA_ONE P_384_ONE
-
-#define ECDSA_DELTA P_384_DELTA
-
-#define ECDSA_G_X P_384_G_X
-#define ECDSA_G_Y P_384_G_Y
-
-#define ECDSA_H_X P_384_H_X
-#define ECDSA_H_Y P_384_H_Y
-
-#define ECDSA_N P_384_N
-#define ECDSA_D P_384_D
-#define ECDSA_K P_384_K
-
-#define ECDSA_Q_X P_384_Q_X
-#define ECDSA_Q_Y P_384_Q_Y
-
-#define ECDSA_R_X P_384_R_X
-#define ECDSA_R_Y P_384_R_Y
-
-#else
-
-#error USE_CURVE must be either 1 or 2!
-
-#endif
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/ecdsa_model_fpga.cpp b/ecdsa_model_fpga.cpp
deleted file mode 100644
index 8ad0962..0000000
--- a/ecdsa_model_fpga.cpp
+++ /dev/null
@@ -1,414 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// ecdsa_model_fpga.cpp
-// --------------------------------------------
-// Base point scalar multiplier model for ECDSA
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-#include "fpga_curve.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_model_init ();
-bool test_base_point_multiplier (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-
-bool abuse_internal_point_adder ();
-bool abuse_internal_point_doubler ();
-
-void print_fpga_buffer (const char *s, FPGA_BUFFER *v);
-
-bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by);
-bool compare_fpga_buffers (FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz);
-
-
-//------------------------------------------------------------------------------
-// Locals
-//------------------------------------------------------------------------------
-static FPGA_BUFFER ecdsa_d;
-static FPGA_BUFFER ecdsa_k;
-static FPGA_BUFFER ecdsa_n;
-
-
-//------------------------------------------------------------------------------
-int main()
-//------------------------------------------------------------------------------
-{
- bool ok; // flag
-
- //
- // initialize buffers
- //
- fpga_model_init();
- fpga_modular_init();
- fpga_curve_init();
-
-
- //
- // test base point multiplier: Q = d * G
- //
- printf("Trying to derive public key from private key...\n\n");
- ok = test_base_point_multiplier(&ecdsa_d, &ecdsa_q_x, &ecdsa_q_y);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // test base point multiplier: R = k * G
- //
- printf("Trying to sign something...\n\n");
- ok = test_base_point_multiplier(&ecdsa_k, &ecdsa_r_x, &ecdsa_r_y);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // test base point multiplier: O = n * G
- //
- printf("Trying to multiply the base point by its order...\n\n");
- ok = test_base_point_multiplier(&ecdsa_n, &ecdsa_zero, &ecdsa_zero);
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // try to abuse internal point doubler
- //
- ok = abuse_internal_point_doubler();
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // try to abuse internal point adder
- //
- ok = abuse_internal_point_adder();
- if (!ok) return EXIT_FAILURE;
-
-
- //
- // everything went just fine
- //
- return EXIT_SUCCESS;
-}
-
-
-//------------------------------------------------------------------------------
-void fpga_model_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_d = ECDSA_D;
- FPGA_BUFFER tmp_k = ECDSA_K;
- FPGA_BUFFER tmp_n = ECDSA_N;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_d.words[w] = tmp_d.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_k.words[w] = tmp_k.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_n.words[w] = tmp_n.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-bool test_base_point_multiplier(FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-//
-// k - multiplier
-//
-// qx, qy - expected coordinates of product
-//
-// Returns true when point (rx,ry) = k * G matches the point (qx,qy).
-//
-//------------------------------------------------------------------------------
-{
- bool ok; // flag
- FPGA_BUFFER rx, ry; // result
-
- /* run the model */
- fpga_curve_scalar_multiply(k, &rx, &ry);
-
- /* handle result */
- ok = compare_fpga_buffers(qx, qy, &rx, &ry);
- if (!ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool abuse_internal_point_doubler()
-//------------------------------------------------------------------------------
-//
-// This routine tries to abuse the internal curve point doubler by forcing it
-// to double point at infinity.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool ok; // flag
-
- FPGA_BUFFER px, py, pz; // input
- FPGA_BUFFER qx, qy, qz; // output
-
- // set P.X and P.Y to some "random" garbage and P.Z to zero
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { px.words[w] = ecdsa_g_x.words[w] ^ ecdsa_h_x.words[w];
- py.words[w] = ecdsa_g_y.words[w] ^ ecdsa_h_y.words[w];
- }
- fpga_buffer_copy(&ecdsa_zero, &pz);
-
- // try to double point at infinity (should produce point at infinity)
- printf("Trying to double something at infinity...\n\n");
- fpga_curve_double_jacobian(&px, &py, &pz, &qx, &qy, &qz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_one, &ecdsa_one, &ecdsa_zero, &qx, &qy, &qz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool abuse_internal_point_adder()
-//------------------------------------------------------------------------------
-//
-// This routine tries to abuse the internal curve point adder by forcing it to
-// go throgh all the possible "corner cases".
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool ok; // flag
-
- FPGA_BUFFER px, py, pz; // input
- FPGA_BUFFER rx, ry, rz; // output
-
- //
- // try to add point at infinity to the base point
- //
- {
- // set P.X and P.Y to some "random" garbage and P.Z to zero
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { px.words[w] = ecdsa_g_x.words[w] ^ ecdsa_h_x.words[w];
- py.words[w] = ecdsa_g_y.words[w] ^ ecdsa_h_y.words[w];
- }
- fpga_buffer_copy(&ecdsa_zero, &pz);
-
- // run addition proceduce
- printf("Trying to add something at infinity to the base point...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_g_x, &ecdsa_g_y, &ecdsa_one, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- //
- // try to add the base point to itself
- //
- {
- // set P to G
- fpga_buffer_copy(&ecdsa_g_x, &px);
- fpga_buffer_copy(&ecdsa_g_y, &py);
- fpga_buffer_copy(&ecdsa_one, &pz);
-
- // run addition proceduce
- printf("Trying to add the base point to itself...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_h_x, &ecdsa_h_y, &ecdsa_one, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- //
- // try to add the base point to its opposite
- //
- {
- // set P to (G.X, -G.Y, 1)
- fpga_buffer_copy(&ecdsa_zero, &px);
- fpga_buffer_copy(&ecdsa_zero, &py);
- fpga_buffer_copy(&ecdsa_one, &pz);
-
- fpga_modular_add(&ecdsa_zero, &ecdsa_g_x, &px);
- fpga_modular_sub(&ecdsa_zero, &ecdsa_g_y, &py);
-
- // run addition proceduce
- printf("Trying to add the base point to its opposite...\n\n");
- fpga_curve_add_jacobian(&px, &py, &pz, &rx, &ry, &rz);
-
- // handle result
- ok = compare_fpga_buffers(&ecdsa_one, &ecdsa_one, &ecdsa_zero, &rx, &ry, &rz);
- if (! ok)
- { printf("\n ERROR\n\n");
- return false;
- }
- else printf("\n OK\n\n");
- }
-
- // everything went just fine
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-void print_fpga_buffer(const char *s, FPGA_BUFFER *buf)
-//------------------------------------------------------------------------------
-//
-// Pretty print large multi-word integer.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print header
- printf("%s", s);
-
- // print all bytes
- for (w=OPERAND_NUM_WORDS; w>0; w--)
- {
- printf("%08x", buf->words[w-1]);
-
- // insert space after every group of 4 bytes
- if (w > 1) printf(" ");
- }
-
- // print footer
- printf("\n");
-}
-
-
-//------------------------------------------------------------------------------
-bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *bx, FPGA_BUFFER *by)
-//------------------------------------------------------------------------------
-//
-// Compare affine coordinates of two points and return true when they match.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print all the values
- print_fpga_buffer(" Expected: X = ", ax);
- print_fpga_buffer(" Calculated: X = ", bx);
- printf("\n");
- print_fpga_buffer(" Expected: Y = ", ay);
- print_fpga_buffer(" Calculated: Y = ", by);
-
- // compare values
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- // compare x
- if (ax->words[w] != bx->words[w]) return false;
-
- // compare y
- if (ay->words[w] != by->words[w]) return false;
- }
-
- // values are the same
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *ay, FPGA_BUFFER *az, FPGA_BUFFER *bx, FPGA_BUFFER *by, FPGA_BUFFER *bz)
-//------------------------------------------------------------------------------
-//
-// Compare projective coordinates of two points and return true when they match.
-//
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // print all the values
- print_fpga_buffer(" Expected: X = ", ax);
- print_fpga_buffer(" Calculated: X = ", bx);
- printf("\n");
- print_fpga_buffer(" Expected: Y = ", ay);
- print_fpga_buffer(" Calculated: Y = ", by);
- printf("\n");
- print_fpga_buffer(" Expected: Z = ", az);
- print_fpga_buffer(" Calculated: Z = ", bz);
-
- // compare values
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- // compare x
- if (ax->words[w] != bx->words[w]) return false;
-
- // compare y
- if (ay->words[w] != by->words[w]) return false;
-
- // compare z
- if (az->words[w] != bz->words[w]) return false;
- }
-
- // values are the same
- return true;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_curve.cpp b/fpga_curve.cpp
deleted file mode 100644
index 9cc8ec0..0000000
--- a/fpga_curve.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// fpga_curve.cpp
-// ------------------------------------
-// Elliptic curve arithmetic procedures
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-#include "fpga_curve.h"
-#include "fpga_util.h"
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-FPGA_BUFFER ecdsa_g_x, ecdsa_g_y;
-FPGA_BUFFER ecdsa_h_x, ecdsa_h_y;
-FPGA_BUFFER ecdsa_q_x, ecdsa_q_y;
-FPGA_BUFFER ecdsa_r_x, ecdsa_r_y;
-
-
-//------------------------------------------------------------------------------
-void fpga_curve_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_g_x = ECDSA_G_X, tmp_g_y = ECDSA_G_Y;
- FPGA_BUFFER tmp_h_x = ECDSA_H_X, tmp_h_y = ECDSA_H_Y;
- FPGA_BUFFER tmp_q_x = ECDSA_Q_X, tmp_q_y = ECDSA_Q_Y;
- FPGA_BUFFER tmp_r_x = ECDSA_R_X, tmp_r_y = ECDSA_R_Y;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_g_x.words[w] = tmp_g_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_g_y.words[w] = tmp_g_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_h_x.words[w] = tmp_h_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_h_y.words[w] = tmp_h_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_r_x.words[w] = tmp_r_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_r_y.words[w] = tmp_r_y.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_q_x.words[w] = tmp_q_x.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_q_y.words[w] = tmp_q_y.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve point doubling routine.
-//
-// R(rx,ry,rz) = 2 * P(px,py,pz)
-//
-// Note, that P(px,py,pz) is supposed to be in projective Jacobian coordinates,
-// R will be in projective Jacobian coordinates.
-//
-// This routine implements algorithm 3.21 from "Guide to Elliptic Curve
-// Cryptography", the only difference is that step 6. does T1 = T2 + T2 and
-// then T2 = T2 + T1 instead of T2 = 3 * T2, because our addition is much
-// faster, than multiplication.
-//
-// Note, that this routine also handles one special case, namely when P is at
-// infinity.
-//
-// Instead of actual modular division, multiplication by pre-computed constant
-// (2^-1 mod q) is done.
-//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
-//
-// WARNING: Though this procedure always does doubling steps, it does not take
-// any active measures to keep run-time constant. The main purpose of this
-// model is to help debug Verilog code for FPGA, so *DO NOT* use is anywhere
-// near production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_double_jacobian(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER t1, t2, t3; // temporary variables
-
- // check, whether P is at infinity
- bool pz_is_zero = fpga_buffer_is_zero(pz);
-
- /* 2. */ fpga_buffer_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 3. */ fpga_modular_sub(px, &t1, &t2);
- /* 4. */ fpga_modular_add(px, &t1, &t1);
- /* 5. */ fpga_modular_mul(&t1, &t2, &t2);
- /* 6. */ fpga_modular_add(&t2, &t2, &t1);
- /* */ fpga_modular_add(&t1, &t2, &t2);
- /* 7. */ fpga_modular_add(py, py, ry);
- /* 8. */ fpga_modular_mul(pz, ry, rz);
- /* 9. */ fpga_buffer_copy(ry, &t1);
- fpga_buffer_copy(ry, &t3);
- fpga_modular_mul(&t1, &t3, ry);
- /* 10. */ fpga_modular_mul(px, ry, &t3);
- /* 11. */ fpga_buffer_copy(ry, &t1);
- fpga_modular_mul(ry, &t1, &t1);
- /* 12. */ fpga_modular_mul(&t1, &ecdsa_delta, ry);
- /* 13. */ fpga_buffer_copy(&t2, &t1);
- fpga_modular_mul(&t1, &t2, rx);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_modular_sub(rx, &t1, rx);
- /* 16. */ fpga_modular_sub(&t3, rx, &t1);
- /* 17. */ fpga_modular_mul(&t1, &t2, &t1);
- /* 18. */ fpga_modular_sub(&t1, ry, ry);
-
- // handle special case (input point is at infinity)
- if (pz_is_zero)
- { fpga_buffer_copy(&ecdsa_one, rx);
- fpga_buffer_copy(&ecdsa_one, ry);
- fpga_buffer_copy(&ecdsa_zero, rz);
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve point addition routine.
-//
-// R(rx,ry,rz) = P(px,py,pz) + Q(qx,qy)
-//
-// Note, that P(px, py, pz) is supposed to be in projective Jacobian
-// coordinates, while Q(qx,qy) is supposed to be in affine coordinates,
-// R(rx, ry, rz) will be in projective Jacobian coordinates. Moreover, in this
-// particular implementation Q is always the base point G.
-//
-// This routine implements algorithm 3.22 from "Guide to Elliptic Curve
-// Cryptography". Differences from the original algorithm:
-//
-// 1) Step 1. is omitted, because point Q is always the base point, which is
-// not at infinity by definition.
-//
-// 2) Step 9.1 just returns the pre-computed double of the base point instead
-// of actually doubling it.
-//
-// Note, that this routine also handles three special cases:
-//
-// 1) P is at infinity
-// 2) P == Q
-// 3) P == -Q
-//
-// Note, that FPGA modular multiplier can't multiply a given buffer by itself,
-// this way it's impossible to do eg. fpga_modular_mul(pz, pz, &t1). To overcome
-// the problem the algorithm was modified to do fpga_buffer_copy(pz, &t1) and
-// then fpga_modular_mul(pz, &t1, &t1) instead.
-//
-// WARNING: This procedure does not take any active measures to keep run-time
-// constant. The main purpose of this model is to help debug Verilog code for
-// FPGA, so *DO NOT* use is anywhere near production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_add_jacobian(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER t1, t2, t3, t4; // temporary variables
-
- bool pz_is_zero = fpga_buffer_is_zero(pz); // Step 2.
-
- /* 3. */ fpga_buffer_copy(pz, &t1);
- fpga_modular_mul(pz, &t1, &t1);
- /* 4. */ fpga_modular_mul(pz, &t1, &t2);
- /* 5. */ fpga_modular_mul(&t1, &ecdsa_g_x, &t1);
- /* 6. */ fpga_modular_mul(&t2, &ecdsa_g_y, &t2);
- /* 7. */ fpga_modular_sub(&t1, px, &t1);
- /* 8. */ fpga_modular_sub(&t2, py, &t2);
-
- bool t1_is_zero = fpga_buffer_is_zero(&t1); // | Step 9.
- bool t2_is_zero = fpga_buffer_is_zero(&t2); // |
-
- /* 10. */ fpga_modular_mul(pz, &t1, rz);
- /* 11. */ fpga_buffer_copy(&t1, &t3);
- fpga_modular_mul(&t1, &t3, &t3);
- /* 12. */ fpga_modular_mul(&t1, &t3, &t4);
- /* 13. */ fpga_modular_mul(px, &t3, &t3);
- /* 14. */ fpga_modular_add(&t3, &t3, &t1);
- /* 15. */ fpga_buffer_copy(&t2, rx);
- fpga_modular_mul(rx, &t2, rx);
- /* 16. */ fpga_modular_sub(rx, &t1, rx);
- /* 17. */ fpga_modular_sub(rx, &t4, rx);
- /* 18. */ fpga_modular_sub(&t3, rx, &t3);
- /* 19. */ fpga_modular_mul(&t2, &t3, &t3);
- /* 20. */ fpga_modular_mul(py, &t4, &t4);
- /* 21. */ fpga_modular_sub(&t3, &t4, ry);
-
- //
- // final selection
- //
- if (pz_is_zero) // P at infinity ?
- { fpga_buffer_copy(&ecdsa_g_x, rx);
- fpga_buffer_copy(&ecdsa_g_y, ry);
- fpga_buffer_copy(&ecdsa_one, rz);
- }
- else if (t1_is_zero) // same x for P and Q ?
- {
- fpga_buffer_copy(t2_is_zero ? &ecdsa_h_x : &ecdsa_one, rx); // | same y ? (P==Q => R=2*G) : (P==-Q => R=O)
- fpga_buffer_copy(t2_is_zero ? &ecdsa_h_y : &ecdsa_one, ry); // |
- fpga_buffer_copy(t2_is_zero ? &ecdsa_one : &ecdsa_zero, rz); // |
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Conversion from projective Jacobian to affine coordinates.
-//
-// P(px,py,pz) -> Q(qx,qy)
-//
-// Note, that qx = px / Z^2 and qy = py / Z^3. Division in modular arithmetic
-// is equivalent to multiplication by the inverse value of divisor, so
-// qx = px * (pz^-1)^2 and qy = py * (pz^-1)^3.
-//
-// Note, that this procedure does *NOT* handle points at infinity correctly. It
-// can only be called from the base point multiplication routine, that
-// specifically makes sure that P is not at infinity, so pz will always be
-// non-zero value.
-//
-//------------------------------------------------------------------------------
-void fpga_curve_point_to_affine(FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-{
- FPGA_BUFFER pz1; // inverse value of pz
- FPGA_BUFFER t2, t3; // intermediate values
-
- fpga_modular_inv(pz, &pz1); // pz1 = pz^-1 (mod q)
-
- fpga_modular_mul(&pz1, &pz1, &t2); // t2 = pz1 ^ 2 (mod q)
- fpga_modular_mul(&pz1, &t2, &t3); // t3 = tz1 ^ 3 (mod q)
-
- fpga_modular_mul(px, &t2, qx); // qx = px * (pz^-1)^2 (mod q)
- fpga_modular_mul(py, &t3, qy); // qy = py * (pz^-1)^3 (mod q)
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Elliptic curve base point scalar multiplication routine.
-//
-// Q(qx,qy) = k * G(px,py)
-//
-// Note, that Q is supposed to be in affine coordinates. Multiplication is done
-// using the double-and-add algorithm 3.27 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// WARNING: Though this procedure always does the addition step, it only
-// updates the result when current bit of k is set. It does not take any
-// active measures to keep run-time constant. The main purpose of this model
-// is to help debug Verilog code for FPGA, so *DO NOT* use it anywhere near
-// production!
-//
-//------------------------------------------------------------------------------
-void fpga_curve_scalar_multiply(FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy)
-//------------------------------------------------------------------------------
-{
- int word_count, bit_count; // counters
-
- FPGA_BUFFER rx, ry, rz; // intermediate result
- FPGA_BUFFER tx, ty, tz; // temporary variable
-
- /* set initial value of R to point at infinity */
- fpga_buffer_copy(&ecdsa_one, &rx);
- fpga_buffer_copy(&ecdsa_one, &ry);
- fpga_buffer_copy(&ecdsa_zero, &rz);
-
- /* process bits of k left-to-right */
- for (word_count=OPERAND_NUM_WORDS; word_count>0; word_count--)
- for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
- {
- /* calculate T = 2 * R */
- fpga_curve_double_jacobian(&rx, &ry, &rz, &tx, &ty, &tz);
-
- /* always calculate R = T + P for constant-time */
- fpga_curve_add_jacobian(&tx, &ty, &tz, &rx, &ry, &rz);
-
- /* revert to the value of T before addition if the current bit of k is not set */
- if (!((k->words[word_count-1] >> (bit_count-1)) & 1))
- { fpga_buffer_copy(&tx, &rx);
- fpga_buffer_copy(&ty, &ry);
- fpga_buffer_copy(&tz, &rz);
- }
-
- }
-
- // convert result to affine coordinates anyway
- fpga_curve_point_to_affine(&rx, &ry, &rz, qx, qy);
-
- // check, that rz is non-zero (not point at infinity)
- bool rz_is_zero = fpga_buffer_is_zero(&rz);
-
- // handle special case (result is point at infinity)
- if (rz_is_zero)
- { fpga_buffer_copy(&ecdsa_zero, qx);
- fpga_buffer_copy(&ecdsa_zero, qy);
- }
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_curve.h b/fpga_curve.h
deleted file mode 100644
index c90cc16..0000000
--- a/fpga_curve.h
+++ /dev/null
@@ -1,61 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// fpga_curve.h
-// ------------------------------------
-// Elliptic curve arithmetic procedures
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-extern FPGA_BUFFER ecdsa_g_x, ecdsa_g_y;
-extern FPGA_BUFFER ecdsa_h_x, ecdsa_h_y;
-extern FPGA_BUFFER ecdsa_q_x, ecdsa_q_y;
-extern FPGA_BUFFER ecdsa_r_x, ecdsa_r_y;
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-void fpga_curve_init ();
-void fpga_curve_scalar_multiply (FPGA_BUFFER *k, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-void fpga_curve_add_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz);
-void fpga_curve_double_jacobian (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *rx, FPGA_BUFFER *ry, FPGA_BUFFER *rz);
-void fpga_curve_point_to_affine (FPGA_BUFFER *px, FPGA_BUFFER *py, FPGA_BUFFER *pz, FPGA_BUFFER *qx, FPGA_BUFFER *qy);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_modular.cpp b/fpga_modular.cpp
deleted file mode 100644
index 9b01df0..0000000
--- a/fpga_modular.cpp
+++ /dev/null
@@ -1,838 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// fpga_modular.cpp
-// ---------------------------
-// Modular arithmetic routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Headers
-//------------------------------------------------------------------------------
-#include <stdint.h>
-#include "ecdsa_model.h"
-#include "fpga_lowlevel.h"
-#include "fpga_modular.h"
-
-
-//------------------------------------------------------------------------------
-// Globals
-//------------------------------------------------------------------------------
-FPGA_BUFFER ecdsa_q;
-FPGA_BUFFER ecdsa_zero;
-FPGA_BUFFER ecdsa_one;
-FPGA_BUFFER ecdsa_delta;
-
-
-//------------------------------------------------------------------------------
-void fpga_modular_init()
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- FPGA_BUFFER tmp_q = ECDSA_Q;
- FPGA_BUFFER tmp_zero = ECDSA_ZERO;
- FPGA_BUFFER tmp_one = ECDSA_ONE;
- FPGA_BUFFER tmp_delta = ECDSA_DELTA;
-
- /* fill buffers for large multi-word integers */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- { ecdsa_q.words[w] = tmp_q.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_zero.words[w] = tmp_zero.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_one.words[w] = tmp_one.words[OPERAND_NUM_WORDS - (w+1)];
- ecdsa_delta.words[w] = tmp_delta.words[OPERAND_NUM_WORDS - (w+1)];
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular addition.
-//
-// This routine implements algorithm 3. from "Ultra High Performance ECC over
-// NIST Primes on Commercial FPGAs".
-//
-// s = (a + b) mod q
-//
-// The naive approach is like the following:
-//
-// 1. s = a + b
-// 2. if (s >= q) s -= q
-//
-// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q)
-// and then select the right variant.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_add(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *s)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_BUFFER ab, ab_n; // intermediate buffers
- bool c_in, c_out; // carries
- bool b_in, b_out; // borrows
-
- c_in = false; // first word has no carry
- b_in = false; // first word has no borrow
-
- // run parallel addition and subtraction
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- fpga_lowlevel_add32(a->words[w], b->words[w], c_in, &ab.words[w], &c_out);
- fpga_lowlevel_sub32(ab.words[w], ecdsa_q.words[w], b_in, &ab_n.words[w], &b_out);
-
- c_in = c_out; // propagate carry
- b_in = b_out; // propagate borrow
- }
-
- // now select the right buffer
-
- /*
- * We select the right variant based on borrow and carry flags after
- * addition and subtraction of the very last pair of words. Note, that
- * we only need to select the first variant (a + b) when (a + b) < q.
- * This way if we get negative number after subtraction, we discard it
- * and use the output of the adder instead. The subtractor output is
- * negative when borrow flag is set *and* carry flag is not set. When
- * both borrow and carry are set, the number is non-negative, because
- * borrow and carry cancel each other out.
- */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- s->words[w] = (b_out && !c_out) ? ab.words[w] : ab_n.words[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular subtraction.
-//
-// This routine implements algorithm 3. from "Ultra High Performance ECC over
-// NIST Primes on Commercial FPGAs".
-//
-// d = (a - b) mod q
-//
-// The naive approach is like the following:
-//
-// 1. d = a - b
-// 2. if (a < b) d += q
-//
-// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q)
-// and then select the right variant.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_sub(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *d)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_BUFFER ab, ab_n; // intermediate buffers
- bool c_in, c_out; // carries
- bool b_in, b_out; // borrows
-
- c_in = false; // first word has no carry
- b_in = false; // first word has no borrow
-
- // run parallel subtraction and addition
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- {
- fpga_lowlevel_sub32(a->words[w], b->words[w], b_in, &ab.words[w], &b_out);
- fpga_lowlevel_add32(ab.words[w], ecdsa_q.words[w], c_in, &ab_n.words[w], &c_out);
-
- b_in = b_out; // propagate borrow
- c_in = c_out; // propagate carry
- }
-
- // now select the right buffer
-
- /*
- * We select the right variant based on borrow flag after subtraction
- * and addition of the very last pair of words. Note, that we only
- * need to select the second variant (a - b + q) when a < b. This way
- * if we get negative number after subtraction, we discard it
- * and use the output of the adder instead. The Subtractor output is
- * negative when borrow flag is set.
- */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- d->words[w] = b_out ? ab_n.words[w] : ab.words[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular multiplication.
-//
-// This routine implements modular multiplication algorithm from "Ultra High
-// Performance ECC over NIST Primes on Commercial FPGAs".
-//
-// p = (a * b) mod q
-//
-// The complex algorithm is split into three parts:
-//
-// 1. Calculation of partial words
-// 2. Acccumulation of partial words into full-size product
-// 3. Modular reduction of the full-size product
-//
-// See comments for corresponding helper routines for more information.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_BUFFER *p)
-//------------------------------------------------------------------------------
-{
- FPGA_WORD_EXTENDED si[4*OPERAND_NUM_WORDS-1]; // parts of intermediate product
- FPGA_WORD c[2*OPERAND_NUM_WORDS]; // full-size intermediate product
-
- /* multiply to get partial words */
- fpga_modular_mul_helper_multiply(a, b, si);
-
- /* accumulate partial words into full-size product */
- fpga_modular_mul_helper_accumulate(si, c);
-
- /* reduce full-size product using special routine */
- fpga_modular_mul_helper_reduce(c, p);
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Modular multiplicative inversion procedure.
-//
-// a1 = a^-1 (mod n)
-//
-// This routine implements the algorithm from "Constant Time Modular
-// Inversion" by Joppe W. Bos (http://www.joppebos.com/files/CTInversion.pdf)
-//
-// The algorithm has two phases: 1) calculation of "almost" modular inverse,
-// which is a^-1*2^k and 2) removal of redundant factor 2^k.
-//
-// The first stage has four temporary variables: r, s, u, v; they are updated
-// at every iteration. Depending on flags there can be four branches, FPGA
-// will pre-calculate all possible values in parallel and then use a multiplexor
-// to select the next value. This implementation also calculates all possible
-// outcomes. This is done for debugging purposes, *NOT* for constant run-time!
-//
-// The second part only works with s and k.
-//
-// Note, that k is a simple counter, and it can't exceed 2*OPERAND_WIDTH.
-//
-// The complex inversion algorithm uses helper routines. Note, that width of the
-// intermediate results can temporarily exceed OPERAND_WIDTH, so all the helper
-// routines process OPERAND_NUM_WORDS+1 words.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv(FPGA_BUFFER *a, FPGA_BUFFER *a1)
-{
- int i; // round counter
- int w; // word counter
- int k; // redundant power of 2
-
- /* q, 1 */
- FPGA_WORD buf_q[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_1[OPERAND_NUM_WORDS+1];
-
- /* r, s */
- FPGA_WORD buf_r[OPERAND_NUM_WORDS+1], buf_s[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_double[OPERAND_NUM_WORDS+1], buf_s_double[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_new[OPERAND_NUM_WORDS+1], buf_s_new[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_r_plus_s[OPERAND_NUM_WORDS+1], buf_s_plus_r[OPERAND_NUM_WORDS+1];
-
- /* u, v */
- FPGA_WORD buf_u[OPERAND_NUM_WORDS+1], buf_v[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_half[OPERAND_NUM_WORDS+1], buf_v_half[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_minus_v[OPERAND_NUM_WORDS+1], buf_v_minus_u[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_minus_v_half[OPERAND_NUM_WORDS+1], buf_v_minus_u_half[OPERAND_NUM_WORDS+1];
- FPGA_WORD buf_u_new[OPERAND_NUM_WORDS+1], buf_v_new[OPERAND_NUM_WORDS+1];
-
- /* comparison */
- int cmp_v_1, cmp_u_v;
-
- /* clear buffers */
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- buf_r[w] = 0, buf_s[w] = 0,
- buf_u[w] = 0, buf_v[w] = 0,
- buf_q[w] = 0, buf_1[w] = 0;
-
- /* initialize q, 1 */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- buf_q[w] = ecdsa_q.words[w], buf_1[w] = ecdsa_one.words[w];
-
- /* initialize r, s */
- buf_r[0] = 0, buf_s[0] = 1;
-
- /* initialize u, v */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- buf_u[w] = ecdsa_q.words[w], buf_v[w] = a->words[w];
-
- /* initialize k */
- k = 0;
-
- /* flags for the first stage */
- bool v_is_1, u_is_greater_than_v, u_is_even, v_is_even;
-
- /* first stage */
- for (i=0; i<(2*OPERAND_WIDTH); i++)
- {
- /* pre-calculate all possible values for r and s */
- fpga_modular_inv_helper_shl(buf_r, buf_r_double); // r_double = 2 * r
- fpga_modular_inv_helper_shl(buf_s, buf_s_double); // s_double = 2 * s
- fpga_modular_inv_helper_add(buf_r, buf_s, buf_r_plus_s); // r_plus_s = r + s
- fpga_modular_inv_helper_add(buf_s, buf_r, buf_s_plus_r); // s_plus_r = s + r
-
- /* pre-calculate all possible values for u and v */
- fpga_modular_inv_helper_shr(buf_u, buf_u_half); // u_half = u / 2
- fpga_modular_inv_helper_shr(buf_v, buf_v_half); // v_half = v / 2
- fpga_modular_inv_helper_sub(buf_u, buf_v, buf_u_minus_v); // u_minus_v = u - v
- fpga_modular_inv_helper_sub(buf_v, buf_u, buf_v_minus_u); // v_minus_u = v - u
- fpga_modular_inv_helper_shr(buf_u_minus_v, buf_u_minus_v_half); // u_minus_v_half = u_minus_v / 2
- fpga_modular_inv_helper_shr(buf_v_minus_u, buf_v_minus_u_half); // v_minus_u_half = v_minus_u / 2
-
- /* compare */
- fpga_modular_inv_helper_cmp(buf_v, buf_1, &cmp_v_1);
- fpga_modular_inv_helper_cmp(buf_u, buf_v, &cmp_u_v);
-
- /* assign flags */
- v_is_1 = (cmp_v_1 == 0);
- u_is_greater_than_v = (cmp_u_v > 0);
- u_is_even = !(buf_u[0] & 1);
- v_is_even = !(buf_v[0] & 1);
-
- /* select u */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u_half);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, buf_u);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_u_new, u_is_greater_than_v ? buf_u_minus_v_half : buf_u);
-
- /* select v */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, buf_v_half);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_v_new, u_is_greater_than_v ? buf_v : buf_v_minus_u_half);
-
- /* select r */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, buf_r_double);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_r_new, u_is_greater_than_v ? buf_r_plus_s : buf_r_double);
-
- /* select s */
- if ( u_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s_double);
- if (!u_is_even && v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, buf_s);
- if (!u_is_even && !v_is_even) fpga_modular_inv_helper_cpy(buf_s_new, u_is_greater_than_v ? buf_s_double : buf_s_plus_r);
-
- /* update values */
- if (!v_is_1)
- { fpga_modular_inv_helper_cpy(buf_u, buf_u_new);
- fpga_modular_inv_helper_cpy(buf_v, buf_v_new);
- fpga_modular_inv_helper_cpy(buf_r, buf_r_new);
- fpga_modular_inv_helper_cpy(buf_s, buf_s_new);
- }
-
- /* update k */
- if (!v_is_1) k++;
- }
-
- //
- // Note, that to save FPGA resources, the second stage re-uses buffers
- // used in the first stage.
- //
-
- /* flags for the second stage */
- bool k_is_0, s_is_odd;
-
- /* second stage */
- for (i=0; i<(2*OPERAND_WIDTH); i++)
- {
- /* pre-calculate all possible values */
- fpga_modular_inv_helper_shr(buf_s, buf_u);
- fpga_modular_inv_helper_add(buf_s, buf_q, buf_r);
- fpga_modular_inv_helper_shr(buf_r, buf_v);
-
- //
- // assign flags
- //
- s_is_odd = buf_s[0] & 1;
- k_is_0 = (k == 0);
-
- //
- // select new values based on flags
- //
- fpga_modular_inv_helper_cpy(buf_s_new, s_is_odd ? buf_v : buf_u);
-
- /* update s */
- if (! k_is_0)
- fpga_modular_inv_helper_cpy(buf_s, buf_s_new);
-
- /* update k */
- if (! k_is_0) k--;
- }
-
- /* done, copy s into output buffer */
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- a1->words[w] = buf_s[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Parallelized multiplication.
-//
-// This routine implements the algorithm in Fig. 3. from "Ultra High
-// Performance ECC over NIST Primes on Commercial FPGAs".
-//
-// Inputs a and b are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2
-// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide
-// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS
-// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial
-// output words si[].
-//
-// The order of loading a and b into the multipliers is a bit complicated,
-// during the first 2*OPERAND_NUM_WORDS-1 cycles one si word per cycle is
-// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the
-// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul_helper_multiply(FPGA_BUFFER *a, FPGA_BUFFER *b, FPGA_WORD_EXTENDED *si)
-//------------------------------------------------------------------------------
-{
- int w; // counter
- int t, x; // more counters
- int j, i; // word indices
- FPGA_WORD p; // product
-
- // buffers for smaller words that multipliers can handle
- FPGA_WORD_REDUCED ai[2*OPERAND_NUM_WORDS];
- FPGA_WORD_REDUCED bj[2*OPERAND_NUM_WORDS];
-
- // split a and b into smaller words
- for (w=0; w<OPERAND_NUM_WORDS; w++)
- ai[2*w] = (FPGA_WORD_REDUCED)a->words[w], ai[2*w + 1] = (FPGA_WORD_REDUCED)(a->words[w] >> (FPGA_WORD_WIDTH / 2)),
- bj[2*w] = (FPGA_WORD_REDUCED)b->words[w], bj[2*w + 1] = (FPGA_WORD_REDUCED)(b->words[w] >> (FPGA_WORD_WIDTH / 2));
-
- // accumulators
- FPGA_WORD_EXTENDED mac[2*OPERAND_NUM_WORDS];
-
- // clear accumulators
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++) mac[w] = 0;
-
- // run the crazy algorithm :)
- for (t=0; t<(2*OPERAND_NUM_WORDS); t++)
- {
- // save upper half of si[] (one word per cycle)
- if (t > 0)
- { si[4*OPERAND_NUM_WORDS - (t+1)] = mac[t];
- mac[t] = 0;
- }
-
- // update index
- j = 2*OPERAND_NUM_WORDS - (t+1);
-
- // parallel multiplication
- for (x=0; x<(2*OPERAND_NUM_WORDS); x++)
- {
- // update index
- i = t - x;
- if (i < 0) i += 2*OPERAND_NUM_WORDS;
-
- // multiply...
- fpga_lowlevel_mul16(ai[i], bj[j], &p);
-
- // ...accumulate
- mac[x] += p;
- }
-
- }
-
- // now finally save lower half of si[] (2*OPERAND_NUM_WORDS words at once)
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++)
- si[w] = mac[2*OPERAND_NUM_WORDS - (w+1)];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Accumulation of partial words into full-size product.
-//
-// This routine implements the Algorithm 4. from "Ultra High Performance ECC
-// over NIST Primes on Commercial FPGAs".
-//
-// Input words si[] are accumulated into full-size product c[].
-//
-// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in
-// si[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are
-// summed in full, odd words are split into two parts. During every cycle the
-// upper part of the previous word and the lower part of the following word are
-// summed too.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_mul_helper_accumulate(FPGA_WORD_EXTENDED *si, FPGA_WORD *c)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD_EXTENDED cw0, cw1; // intermediate sums
- FPGA_WORD_REDUCED cw_carry; // wide carry
-
- // clear carry
- cw_carry = 0;
-
- // execute the algorithm
- for (w=0; w<(2*OPERAND_NUM_WORDS); w++)
- {
- // handy flags
- bool w_is_first = (w == 0);
- bool w_is_last = (w == (2*OPERAND_NUM_WORDS-1));
-
- // accumulate full current even word...
- // ...and also the upper part of the previous odd word (if not the first word)
- fpga_lowlevel_add48(si[2*w], w_is_first ? 0 : si[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0);
-
- // generate another word from "carry" part of the previous even word...
- // ...and also the lower part of the following odd word (if not the last word)
- cw1 = w_is_last ? 0 : (FPGA_WORD)(si[2*w+1] << (FPGA_WORD_WIDTH / 2));
- cw1 |= (FPGA_WORD_EXTENDED)cw_carry;
-
- // accumulate once again
- fpga_lowlevel_add48(cw0, cw1, &cw1);
-
- // store current word...
- c[w] = (FPGA_WORD)cw1;
-
- // ...and carry
- cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH);
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Fast modular reduction for NIST prime P-256.
-//
-// p = c mod p256
-//
-// This routine implements the algorithm 2.29 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
-// on the other hand is the output of the parallelized Comba multiplier, so it
-// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
-//
-// To save FPGA resources, the calculation is done using only two adders and
-// one subtractor. The algorithm is split into five steps.
-//
-//------------------------------------------------------------------------------
-#if USE_CURVE == 1
-void fpga_modular_mul_helper_reduce_p256(FPGA_WORD *c, FPGA_BUFFER *p)
-{
- // "funny" words
- FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9;
-
- // compose "funny" words out of input words
- s1.words[7] = c[ 7], s1.words[6] = c[ 6], s1.words[5] = c[ 5], s1.words[4] = c[ 4], s1.words[3] = c[ 3], s1.words[2] = c[ 2], s1.words[1] = c[ 1], s1.words[0] = c[ 0];
- s2.words[7] = c[15], s2.words[6] = c[14], s2.words[5] = c[13], s2.words[4] = c[12], s2.words[3] = c[11], s2.words[2] = 0, s2.words[1] = 0, s2.words[0] = 0;
- s3.words[7] = 0, s3.words[6] = c[15], s3.words[5] = c[14], s3.words[4] = c[13], s3.words[3] = c[12], s3.words[2] = 0, s3.words[1] = 0, s3.words[0] = 0;
- s4.words[7] = c[15], s4.words[6] = c[14], s4.words[5] = 0, s4.words[4] = 0, s4.words[3] = 0, s4.words[2] = c[10], s4.words[1] = c[ 9], s4.words[0] = c[ 8];
- s5.words[7] = c[ 8], s5.words[6] = c[13], s5.words[5] = c[15], s5.words[4] = c[14], s5.words[3] = c[13], s5.words[2] = c[11], s5.words[1] = c[10], s5.words[0] = c[ 9];
- s6.words[7] = c[10], s6.words[6] = c[ 8], s6.words[5] = 0, s6.words[4] = 0, s6.words[3] = 0, s6.words[2] = c[13], s6.words[1] = c[12], s6.words[0] = c[11];
- s7.words[7] = c[11], s7.words[6] = c[ 9], s7.words[5] = 0, s7.words[4] = 0, s7.words[3] = c[15], s7.words[2] = c[14], s7.words[1] = c[13], s7.words[0] = c[12];
- s8.words[7] = c[12], s8.words[6] = 0, s8.words[5] = c[10], s8.words[4] = c[ 9], s8.words[3] = c[ 8], s8.words[2] = c[15], s8.words[1] = c[14], s8.words[0] = c[13];
- s9.words[7] = c[13], s9.words[6] = 0, s9.words[5] = c[11], s9.words[4] = c[10], s9.words[3] = c[ 9], s9.words[2] = 0, s9.words[1] = c[15], s9.words[0] = c[14];
-
- // intermediate results
- FPGA_BUFFER sum0, sum1, difference;
-
- /* Step 1. */
- fpga_modular_add(&s2, &s2, &sum0); // sum0 = 2*s2
- fpga_modular_add(&s3, &s3, &sum1); // sum1 = 2*s3
- fpga_modular_sub(&ecdsa_zero, &s6, &difference); // difference = -s6
-
- /* Step 2. */
- fpga_modular_add(&sum0, &s1, &sum0); // sum0 = s1 + 2*s2
- fpga_modular_add(&sum1, &s4, &sum1); // sum1 = s4 + 2*s3
- fpga_modular_sub(&difference, &s7, &difference); // difference = -(s6 + s7)
-
- /* Step 3. */
- fpga_modular_add(&sum0, &s5, &sum0); // sum0 = s1 + 2*s2 + s5
- fpga_modular_add(&sum1, &ecdsa_zero, &sum1); // compulsory cycle to keep sum1 constant for next stage
- fpga_modular_sub(&difference, &s8, &difference); // difference = -(s6 + s7 + s8)
-
- /* Step 4. */
- fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
- fpga_modular_sub(&difference, &s9, &difference); // difference = -(s6 + s7 + s8 + s9)
-
- /* Step 5. */
- fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + 2*s3 + s4 + s5 - s6 - s7 - s8 - s9
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
-// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
-}
-#endif
-
-
-//------------------------------------------------------------------------------
-//
-// Fast modular reduction for NIST prime P-384.
-//
-// p = c mod p384
-//
-// This routine implements the algorithm 2.30 from "Guide to Elliptic Curve
-// Cryptography".
-//
-// Output p is OPERAND_WIDTH wide (contains OPERAND_NUM_WORDS words), input c
-// on the other hand is the output of the parallelized Comba multiplier, so it
-// is 2*OPERAND_WIDTH wide and has twice as many words (2*OPERAND_NUM_WORDS).
-//
-// To save FPGA resources, the calculation is done using only two adders and
-// one subtractor. The algorithm is split into five steps.
-//
-//------------------------------------------------------------------------------
-#if USE_CURVE == 2
-void fpga_modular_mul_helper_reduce_p384(FPGA_WORD *c, FPGA_BUFFER *p)
-{
- // "funny" words
- FPGA_BUFFER s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
-
- // compose "funny" words
- s1.words[11] = c[11], s1.words[10] = c[10], s1.words[ 9] = c[ 9], s1.words[ 8] = c[ 8], s1.words[ 7] = c[ 7], s1.words[ 6] = c[ 6], s1.words[ 5] = c[ 5], s1.words[ 4] = c[ 4], s1.words[ 3] = c[ 3], s1.words[ 2] = c[ 2], s1.words[ 1] = c[ 1], s1.words[ 0] = c[ 0];
- s2.words[11] = 0, s2.words[10] = 0, s2.words[ 9] = 0, s2.words[ 8] = 0, s2.words[ 7] = 0, s2.words[ 6] = c[23], s2.words[ 5] = c[22], s2.words[ 4] = c[21], s2.words[ 3] = 0, s2.words[ 2] = 0, s2.words[ 1] = 0, s2.words[ 0] = 0;
- s3.words[11] = c[23], s3.words[10] = c[22], s3.words[ 9] = c[21], s3.words[ 8] = c[20], s3.words[ 7] = c[19], s3.words[ 6] = c[18], s3.words[ 5] = c[17], s3.words[ 4] = c[16], s3.words[ 3] = c[15], s3.words[ 2] = c[14], s3.words[ 1] = c[13], s3.words[ 0] = c[12];
- s4.words[11] = c[20], s4.words[10] = c[19], s4.words[ 9] = c[18], s4.words[ 8] = c[17], s4.words[ 7] = c[16], s4.words[ 6] = c[15], s4.words[ 5] = c[14], s4.words[ 4] = c[13], s4.words[ 3] = c[12], s4.words[ 2] = c[23], s4.words[ 1] = c[22], s4.words[ 0] = c[21];
- s5.words[11] = c[19], s5.words[10] = c[18], s5.words[ 9] = c[17], s5.words[ 8] = c[16], s5.words[ 7] = c[15], s5.words[ 6] = c[14], s5.words[ 5] = c[13], s5.words[ 4] = c[12], s5.words[ 3] = c[20], s5.words[ 2] = 0, s5.words[ 1] = c[23], s5.words[ 0] = 0;
- s6.words[11] = 0, s6.words[10] = 0, s6.words[ 9] = 0, s6.words[ 8] = 0, s6.words[ 7] = c[23], s6.words[ 6] = c[22], s6.words[ 5] = c[21], s6.words[ 4] = c[20], s6.words[ 3] = 0, s6.words[ 2] = 0, s6.words[ 1] = 0, s6.words[ 0] = 0;
- s7.words[11] = 0, s7.words[10] = 0, s7.words[ 9] = 0, s7.words[ 8] = 0, s7.words[ 7] = 0, s7.words[ 6] = 0, s7.words[ 5] = c[23], s7.words[ 4] = c[22], s7.words[ 3] = c[21], s7.words[ 2] = 0, s7.words[ 1] = 0, s7.words[ 0] = c[20];
- s8.words[11] = c[22], s8.words[10] = c[21], s8.words[ 9] = c[20], s8.words[ 8] = c[19], s8.words[ 7] = c[18], s8.words[ 6] = c[17], s8.words[ 5] = c[16], s8.words[ 4] = c[15], s8.words[ 3] = c[14], s8.words[ 2] = c[13], s8.words[ 1] = c[12], s8.words[ 0] = c[23];
- s9.words[11] = 0, s9.words[10] = 0, s9.words[ 9] = 0, s9.words[ 8] = 0, s9.words[ 7] = 0, s9.words[ 6] = 0, s9.words[ 5] = 0, s9.words[ 4] = c[23], s9.words[ 3] = c[22], s9.words[ 2] = c[21], s9.words[ 1] = c[20], s9.words[ 0] = 0;
- s10.words[11] = 0, s10.words[10] = 0, s10.words[ 9] = 0, s10.words[ 8] = 0, s10.words[ 7] = 0, s10.words[ 6] = 0, s10.words[ 5] = 0, s10.words[ 4] = c[23], s10.words[ 3] = c[23], s10.words[ 2] = 0, s10.words[ 1] = 0, s10.words[ 0] = 0;
-
- // intermediate results
- FPGA_BUFFER sum0, sum1, difference;
-
- /* Step 1. */
- fpga_modular_add(&s1, &s3, &sum0); // sum0 = s1 + s3
- fpga_modular_add(&s2, &s2, &sum1); // sum1 = 2*s2
- fpga_modular_sub(&ecdsa_zero, &s8, &difference); // difference = -s8
-
- /* Step 2. */
- fpga_modular_add(&sum0, &s4, &sum0); // sum0 = s1 + s3 + s4
- fpga_modular_add(&sum1, &s5, &sum1); // sum1 = 2*s2 + s5
- fpga_modular_sub(&difference, &s9, &difference); // difference = -(s8 + s9)
-
- /* Step 3. */
- fpga_modular_add(&sum0, &s6, &sum0); // sum0 = s1 + s3 + s4 + s6
- fpga_modular_add(&sum1, &s7, &sum1); // sum1 = 2*s2 + s5 + s7
- fpga_modular_sub(&difference, &s10, &difference); // difference = -(s8 + s9 + s10)
-
- /* Step 4. */
- fpga_modular_add(&sum0, &sum1, &sum0); // sum0 = s1 + 2*s2 + 2*s3 + s4 + s5
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
- fpga_modular_sub(&difference, &ecdsa_zero, &difference); // compulsory cycle to keep difference constant for next stage
-
- /* Step 5. */
- fpga_modular_add(&sum0, &difference, p); // p = s1 + 2*s2 + s3 + s4 + s5 + s6 + s7 - s8 - s9 - s10
-// fpga_modular_add(<dummy>, <dummy>, &sum1); // dummy cycle, result ignored
-// fpga_modular_add(<dummy>, <dummy>, &difference); // dummy cycle, result ignored
-}
-#endif
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word shift to the left by 1 bit.
-//
-// y = x << 1
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_shl(FPGA_WORD *x, FPGA_WORD *y)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD carry_in, carry_out; // carries
-
- carry_in = 0; // first word has no carry
-
- // shift word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- carry_out = x[w] >> (FPGA_WORD_WIDTH - 1), // store next carry
- y[w] = x[w] << 1, // shift
- y[w] |= carry_in, // process carry
- carry_in = carry_out; // propagate carry
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word shift to the right by 1 bit.
-//
-// y = x >> 1
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_shr(FPGA_WORD *x, FPGA_WORD *y)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- FPGA_WORD carry_in, carry_out; // carries
-
- carry_in = 0; // first word has no carry
-
- // shift word-by-word
- for (w=OPERAND_NUM_WORDS; w>=0; w--)
- carry_out = x[w], // store next carry
- y[w] = x[w] >> 1, // shift
- y[w] |= carry_in << (FPGA_WORD_WIDTH - 1), // process carry
- carry_in = carry_out; // propagate carry
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word addition.
-//
-// s = x + y
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_add(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *s)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool carry_in, carry_out; // carries
-
- // lowest word has no carry
- carry_in = false;
-
- // sum a and b word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- {
- // low-level addition
- fpga_lowlevel_add32(x[w], y[w], carry_in, &s[w], &carry_out);
-
- // propagate carry bit
- carry_in = carry_out;
- }
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word subtraction .
-//
-// d = x - y
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_sub(FPGA_WORD *x, FPGA_WORD *y, FPGA_WORD *d)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- bool borrow_in, borrow_out; // borrows
-
- // lowest word has no borrow
- borrow_in = false;
-
- // subtract b from a word-by-word
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- {
- // low-level subtraction
- fpga_lowlevel_sub32(x[w], y[w], borrow_in, &d[w], &borrow_out);
-
- // propagate borrow bit
- borrow_in = borrow_out;
- }
-
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word copy.
-//
-// dst = src
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_cpy(FPGA_WORD *dst, FPGA_WORD *src)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
-
- // copy all the words from src into dst
- for (w=0; w<=OPERAND_NUM_WORDS; w++)
- dst[w] = src[w];
-}
-
-
-//------------------------------------------------------------------------------
-//
-// Multi-word comparison.
-//
-// The return value is -1 when a<b, 0 when a=b and 1 when a>b.
-//
-//------------------------------------------------------------------------------
-void fpga_modular_inv_helper_cmp(FPGA_WORD *a, FPGA_WORD *b, int *c)
-//------------------------------------------------------------------------------
-{
- int w; // word counter
- int r, r0, ra, rb; // result
- bool borrow; // borrow
- FPGA_WORD d; // partial difference
-
- // result is unknown so far
- r = 0;
-
- // clear borrow for the very first word
- borrow = false;
-
- // compare a and b word-by-word
- for (w=OPERAND_NUM_WORDS; w>=0; w--)
- {
- // save result
- r0 = r;
-
- // subtract current words
- fpga_lowlevel_sub32(a[w], b[w], false, &d, &borrow);
-
- // analyze flags
- rb = borrow ? -1 : 0; // a[w] < b[w]
- ra = (!borrow && (d != 0)) ? 1 : 0; // a[w] > b[w]
-
- //
- // Note, that ra is either 0 or 1, rb is either 0 or -1 and they
- // can never be non-zero at the same time.
- //
- // Note, that r can only be updated if comparison result has not
- // been resolved yet. Even if we already know comparison result,
- // we continue doing dummy subtractions to keep run-time constant.
- //
-
- // update result
- r = (r == 0) ? ra + rb : r0;
- }
-
- // done
- *c = r;
-}
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/fpga_util.h b/fpga_util.h
deleted file mode 100644
index 24b2aa2..0000000
--- a/fpga_util.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// fpga_util.h
-// ---------------------
-// Utility FPGA routines
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-
-//------------------------------------------------------------------------------
-// Prototypes
-//------------------------------------------------------------------------------
-bool fpga_buffer_is_zero (FPGA_BUFFER *x);
-void fpga_buffer_copy (FPGA_BUFFER *src, FPGA_BUFFER *dst);
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/test_vectors/charlie_p256.key b/test_vectors/charlie_p256.key
new file mode 100644
index 0000000..34b5c20
--- /dev/null
+++ b/test_vectors/charlie_p256.key
@@ -0,0 +1,8 @@
+-----BEGIN EC PARAMETERS-----
+BggqhkjOPQMBBw==
+-----END EC PARAMETERS-----
+-----BEGIN EC PRIVATE KEY-----
+MHcCAQEEIFA+WK/POvMze/U0CURQErgftFSMTSsTAqYX+bHQDX3goAoGCCqGSM49
+AwEHoUQDQgAE2TSbSLDujDYTmx147cGRGyUId/t61Erhi7L4pvTcyuXgoFbotQkq
+bHKPS8iQQ/vifCYnRN+9rxeD/C4BsGB3Gw==
+-----END EC PRIVATE KEY-----
diff --git a/test_vectors/charlie_p384.key b/test_vectors/charlie_p384.key
new file mode 100644
index 0000000..8894220
--- /dev/null
+++ b/test_vectors/charlie_p384.key
@@ -0,0 +1,9 @@
+-----BEGIN EC PARAMETERS-----
+BgUrgQQAIg==
+-----END EC PARAMETERS-----
+-----BEGIN EC PRIVATE KEY-----
+MIGkAgEBBDC7SgnY5SfwYmZetNCzmh3OlNqixZNbMWOwkPB5PuNqWmKnBV1dhQ0b
+FdxlbWXYs6KgBwYFK4EEACKhZANiAAS4Aj5grkLqFGMw8sOIMJbKlhsR9d/qSh1l
+6Y5kszUn+1cibbSKUUMlHvBr3veOtXrTxmRpYlqqraNH4QM8FHS2NDqTaP8pRQG7
+1TscxJ/ZctpDnJ2oJ+IwJyDit43RT54=
+-----END EC PRIVATE KEY-----
diff --git a/test_vectors/ecdsa_test_vector_nsa.h b/test_vectors/ecdsa_test_vector_nsa.h
new file mode 100644
index 0000000..762f284
--- /dev/null
+++ b/test_vectors/ecdsa_test_vector_nsa.h
@@ -0,0 +1,56 @@
+/* Values from "Suite B Implementer's Guide to FIPS 186-3 (ECDSA)" */
+
+#define ECDSA_P256_D_NSA_INIT \
+ {0x70a12c2d, 0xb16845ed, 0x56ff68cf, 0xc21a472b, \
+ 0x3f04d7d6, 0x851bf634, 0x9f2d7d5b, 0x3452b38a}
+
+#define ECDSA_P256_QX_NSA_INIT \
+ {0x8101ece4, 0x7464a6ea, 0xd70cf69a, 0x6e2bd3d8, \
+ 0x8691a326, 0x2d22cba4, 0xf7635eaf, 0xf26680a8}
+
+#define ECDSA_P256_QY_NSA_INIT \
+ {0xd8a12ba6, 0x1d599235, 0xf67d9cb4, 0xd58f1783, \
+ 0xd3ca43e7, 0x8f0a5aba, 0xa6240799, 0x36c0c3a9}
+
+#define ECDSA_P256_K_NSA_INIT \
+ {0x580ec00d, 0x85643433, 0x4cef3f71, 0xecaed496, \
+ 0x5b12ae37, 0xfa47055b, 0x1965c7b1, 0x34ee45d0}
+
+#define ECDSA_P256_RX_NSA_INIT \
+ {0x7214bc96, 0x47160bbd, 0x39ff2f80, 0x533f5dc6, \
+ 0xddd70ddf, 0x86bb8156, 0x61e805d5, 0xd4e6f27c}
+
+#define ECDSA_P256_RY_NSA_INIT \
+ {0x8b81e3e9, 0x77597110, 0xc7cf2633, 0x435b2294, \
+ 0xb7264298, 0x7defd3d4, 0x007e1cfc, 0x5df84541}
+
+
+#define ECDSA_P384_D_NSA_INIT \
+ {0xc838b852, 0x53ef8dc7, 0x394fa580, 0x8a518398, \
+ 0x1c7deef5, 0xa69ba8f4, 0xf2117ffe, 0xa39cfcd9, \
+ 0x0e95f6cb, 0xc854abac, 0xab701d50, 0xc1f3cf24}
+
+#define ECDSA_P384_QX_NSA_INIT \
+ {0x1fbac8ee, 0xbd0cbf35, 0x640b39ef, 0xe0808dd7, \
+ 0x74debff2, 0x0a2a329e, 0x91713baf, 0x7d7f3c3e, \
+ 0x81546d88, 0x3730bee7, 0xe48678f8, 0x57b02ca0}
+
+#define ECDSA_P384_QY_NSA_INIT \
+ {0xeb213103, 0xbd68ce34, 0x3365a8a4, 0xc3d4555f, \
+ 0xa385f533, 0x0203bdd7, 0x6ffad1f3, 0xaffb9575, \
+ 0x1c132007, 0xe1b24035, 0x3cb0a4cf, 0x1693bdf9}
+
+#define ECDSA_P384_K_NSA_INIT \
+ {0xdc6b4403, 0x6989a196, 0xe39d1cda, 0xc000812f, \
+ 0x4bdd8b2d, 0xb41bb33a, 0xf5137258, 0x5ebd1db6, \
+ 0x3f0ce827, 0x5aa1fd45, 0xe2d2a735, 0xf8749359}
+
+#define ECDSA_P384_RX_NSA_INIT \
+ {0xa0c27ec8, 0x93092dea, 0x1e1bd2cc, 0xfed3cf94, \
+ 0x5c8134ed, 0x0c9f8131, 0x1a0f4a05, 0x942db8db, \
+ 0xed8dd59f, 0x267471d5, 0x462aa14f, 0xe72de856}
+
+#define ECDSA_P384_RY_NSA_INIT \
+ {0x85564940, 0x9815bb91, 0x424eaca5, 0xfd76c973, \
+ 0x75d575d1, 0x422ec53d, 0x343bd33b, 0x847fdf0c, \
+ 0x11569685, 0xb528ab25, 0x49301542, 0x8d7cf72b}
diff --git a/test_vectors/ecdsa_test_vector_randomized.h b/test_vectors/ecdsa_test_vector_randomized.h
new file mode 100644
index 0000000..1f334c2
--- /dev/null
+++ b/test_vectors/ecdsa_test_vector_randomized.h
@@ -0,0 +1,29 @@
+/* Generated automatically, do not edit. */
+
+#define ECDSA_P256_D_RANDOM_INIT \
+ {0x503e58af, 0xcf3af333, 0x7bf53409, 0x445012b8, \
+ 0x1fb4548c, 0x4d2b1302, 0xa617f9b1, 0xd00d7de0}
+
+#define ECDSA_P256_QX_RANDOM_INIT \
+ {0xd9349b48, 0xb0ee8c36, 0x139b1d78, 0xedc1911b, \
+ 0x250877fb, 0x7ad44ae1, 0x8bb2f8a6, 0xf4dccae5}
+
+#define ECDSA_P256_QY_RANDOM_INIT \
+ {0xe0a056e8, 0xb5092a6c, 0x728f4bc8, 0x9043fbe2, \
+ 0x7c262744, 0xdfbdaf17, 0x83fc2e01, 0xb060771b}
+
+#define ECDSA_P384_D_RANDOM_INIT \
+ {0xbb4a09d8, 0xe527f062, 0x665eb4d0, 0xb39a1dce, \
+ 0x94daa2c5, 0x935b3163, 0xb090f079, 0x3ee36a5a, \
+ 0x62a7055d, 0x5d850d1b, 0x15dc656d, 0x65d8b3a2}
+
+#define ECDSA_P384_QX_RANDOM_INIT \
+ {0xb8023e60, 0xae42ea14, 0x6330f2c3, 0x883096ca, \
+ 0x961b11f5, 0xdfea4a1d, 0x65e98e64, 0xb33527fb, \
+ 0x57226db4, 0x8a514325, 0x1ef06bde, 0xf78eb57a}
+
+#define ECDSA_P384_QY_RANDOM_INIT \
+ {0xd3c66469, 0x625aaaad, 0xa347e103, 0x3c1474b6, \
+ 0x343a9368, 0xff294501, 0xbbd53b1c, 0xc49fd972, \
+ 0xda439c9d, 0xa827e230, 0x2720e2b7, 0x8dd14f9e}
+
diff --git a/test_vectors/ecdsa_test_vector_randomized.vh b/test_vectors/ecdsa_test_vector_randomized.vh
new file mode 100644
index 0000000..6c5cf80
--- /dev/null
+++ b/test_vectors/ecdsa_test_vector_randomized.vh
@@ -0,0 +1,29 @@
+/* Generated automatically, do not edit. */
+
+localparam [255:0] ECDSA_P256_D_RANDOM =
+ {32'h503e58af, 32'hcf3af333, 32'h7bf53409, 32'h445012b8,
+ 32'h1fb4548c, 32'h4d2b1302, 32'ha617f9b1, 32'hd00d7de0};
+
+localparam [255:0] ECDSA_P256_QX_RANDOM =
+ {32'hd9349b48, 32'hb0ee8c36, 32'h139b1d78, 32'hedc1911b,
+ 32'h250877fb, 32'h7ad44ae1, 32'h8bb2f8a6, 32'hf4dccae5};
+
+localparam [255:0] ECDSA_P256_QY_RANDOM =
+ {32'he0a056e8, 32'hb5092a6c, 32'h728f4bc8, 32'h9043fbe2,
+ 32'h7c262744, 32'hdfbdaf17, 32'h83fc2e01, 32'hb060771b};
+
+localparam [383:0] ECDSA_P384_D_RANDOM =
+ {32'hbb4a09d8, 32'he527f062, 32'h665eb4d0, 32'hb39a1dce,
+ 32'h94daa2c5, 32'h935b3163, 32'hb090f079, 32'h3ee36a5a,
+ 32'h62a7055d, 32'h5d850d1b, 32'h15dc656d, 32'h65d8b3a2};
+
+localparam [383:0] ECDSA_P384_QX_RANDOM =
+ {32'hb8023e60, 32'hae42ea14, 32'h6330f2c3, 32'h883096ca,
+ 32'h961b11f5, 32'hdfea4a1d, 32'h65e98e64, 32'hb33527fb,
+ 32'h57226db4, 32'h8a514325, 32'h1ef06bde, 32'hf78eb57a};
+
+localparam [383:0] ECDSA_P384_QY_RANDOM =
+ {32'hd3c66469, 32'h625aaaad, 32'ha347e103, 32'h3c1474b6,
+ 32'h343a9368, 32'hff294501, 32'hbbd53b1c, 32'hc49fd972,
+ 32'hda439c9d, 32'ha827e230, 32'h2720e2b7, 32'h8dd14f9e};
+
diff --git a/test_vectors/format_random_test_vector.py b/test_vectors/format_random_test_vector.py
new file mode 100644
index 0000000..861bab5
--- /dev/null
+++ b/test_vectors/format_random_test_vector.py
@@ -0,0 +1,327 @@
+#
+# format_random_test_vector.py
+# ------------------------------------------
+# Formats test vector for ecdsa_fpga_model
+#
+# Author: Pavel Shatov
+# Copyright (c) 2017-2018, NORDUnet A/S
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# - Neither the name of the NORDUnet nor the names of its contributors may
+# be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+
+#
+USAGE = "USAGE: format_random_test_vector.py [openssl_binary]"
+#
+
+
+#
+# This script reads the test vector generated by the script
+# regenerate_random_test_vector.py and writes nicely formatted C header file
+# and Verilog include file.
+#
+
+
+#
+# imports
+#
+import sys
+import subprocess
+
+
+#
+# list of curve names of interest
+#
+CURVE_P256 = "p256"
+CURVE_P384 = "p384"
+
+
+#
+# variables
+#
+OPENSSL = ""
+
+
+#
+# format one test vector
+#
+def format_c_header(f, curve, da, qax, qay):
+
+ if curve == CURVE_P256: curve_str = "ECDSA_P256"
+ if curve == CURVE_P384: curve_str = "ECDSA_P384"
+
+ # write all numbers in vector
+ format_c_array(f, da, "#define " + curve_str + "_D_RANDOM_INIT" + " \\\n")
+ format_c_array(f, qax, "#define " + curve_str + "_QX_RANDOM_INIT" + " \\\n")
+ format_c_array(f, qay, "#define " + curve_str + "_QY_RANDOM_INIT" + " \\\n")
+
+
+#
+# format one test vector
+#
+def format_verilog_include(f, curve, da, qax, qay):
+
+ if curve == CURVE_P256: curve_str, msb_index = "ECDSA_P256", "255"
+ if curve == CURVE_P384: curve_str, msb_index = "ECDSA_P384", "383"
+
+ # write all numbers in vector
+ format_verilog_concatenation(f, da, "localparam [" + msb_index + ":0] " + curve_str + "_D_RANDOM " + " =\n")
+ format_verilog_concatenation(f, qax, "localparam [" + msb_index + ":0] " + curve_str + "_QX_RANDOM" + " =\n")
+ format_verilog_concatenation(f, qay, "localparam [" + msb_index + ":0] " + curve_str + "_QY_RANDOM" + " =\n")
+
+
+#
+# nicely format multi-word integer into C array initializer
+#
+def format_c_array(f, n, s):
+
+ # print '#define XYZ \'
+ f.write(s)
+
+ # convert number to hex string and prepend it with zeroes if necessary
+ n_hex = hex(n).lstrip("0x").rstrip("L")
+ while (len(n_hex) % 8) > 0:
+ n_hex = "0" + n_hex
+
+ # get number of 32-bit words
+ num_words = len(n_hex) // 8
+
+ # print all words in n
+ w = 0
+ while w < num_words:
+
+ n_part = ""
+
+ # add tab for every new line
+ if w == 0:
+ n_part += "\t{"
+ elif (w % 4) == 0:
+ n_part += "\t "
+
+ # add current word
+ n_part += "0x" + n_hex[8 * w : 8 * (w + 1)]
+
+ # add separator or newline
+ if (w + 1) == num_words:
+ n_part += "}\n"
+ else:
+ n_part += ", "
+ if (w % 4) == 3:
+ n_part += "\\\n"
+
+ w += 1
+
+ # write current part
+ f.write(n_part)
+
+ # write final newline
+ f.write("\n")
+
+
+def format_verilog_concatenation(f, n, s):
+
+ # print 'localparam ZZZ ='
+ f.write(s)
+
+ # convert number to hex string and prepend it with zeroes if necessary
+ n_hex = hex(n).split("0x")[1]
+ while (len(n_hex) % 8) > 0:
+ n_hex = "0" + n_hex
+
+ # get number of 32-bit words
+ num_words = len(n_hex) // 8
+
+ # print all words in n
+ w = 0
+ while w < num_words:
+
+ n_part = ""
+
+ if w == 0:
+ n_part += "\t{"
+ elif (w % 4) == 0:
+ n_part += "\t "
+
+ n_part += "32'h" + n_hex[8 * w : 8 * (w + 1)]
+
+ if (w + 1) == num_words:
+ n_part += "};\n"
+ else:
+ n_part += ", "
+ if (w % 4) == 3:
+ n_part += "\n"
+ w += 1
+
+ f.write(n_part)
+
+ f.write("\n")
+
+
+ #
+ # returns d, qx, qy, where
+ # d is private key and qx, qy is the corresponding public key
+ #
+def get_key(openssl, party, curve):
+
+ # generate private key filename
+ key_file = party + "_" + curve + ".key"
+
+ # retrieve key components using openssl
+ openssl_command = [openssl, "ec", "-in", key_file, "-noout", "-text"]
+ openssl_stdout = subprocess.check_output(openssl_command).decode("utf-8")
+ stdout_lines = openssl_stdout.splitlines()
+
+ found_priv = False
+ found_pub = False
+
+ key_priv = ""
+ key_pub = ""
+
+ # process lines looking for "priv:" and "pub:" markers
+ for line in stdout_lines:
+
+ # found private key marker?
+ if line.strip() == "priv:":
+ found_priv = True
+ found_pub = False
+ continue
+
+ # found public key marker?
+ if line.strip() == "pub:": # openssl 1.0.2g prints 'pub: ' (extra space before newline),
+ found_pub = True # so we need to compare against line.strip(), not just line
+ found_priv = False
+ continue
+
+ # found part of private key?
+ if found_priv:
+ if not line.startswith(" "):
+ found_priv = False
+ continue
+ else:
+ key_priv += line.strip()
+
+ # found part of public key?
+ if found_pub:
+ if not line.startswith(" "):
+ found_pub = False
+ continue
+ else:
+ key_pub += line.strip()
+
+ # do some cleanup and sanity checking on private key
+ # * remove extra leading zero byte if present
+ # * remove colons
+ # * check length (256 bits or 384 bits)
+ while key_priv.startswith("00"):
+ key_priv = key_priv[2:]
+
+ key_priv = key_priv.replace(":", "")
+
+ if curve == CURVE_P256 and len(key_priv) != 256 / 4: sys.exit()
+ if curve == CURVE_P384 and len(key_priv) != 384 / 4: sys.exit()
+
+ # do some cleanup and sanity checking on public key
+ # * make sure, that uncompressed form marker (0x04) is present and
+ # then remove it
+ # * remove colons
+ # * check length (2x256 or 2x384 bits)
+ if not key_pub.startswith("04"): sys.exit()
+
+ key_pub = key_pub[2:]
+ key_pub = key_pub.replace(":", "")
+
+ if curve == CURVE_P256 and len(key_pub) != 2 * 256 / 4: sys.exit()
+ if curve == CURVE_P384 and len(key_pub) != 2 * 384 / 4: sys.exit()
+
+ # split public key into parts
+ if curve == CURVE_P256:
+ key_pub_x = key_pub[ 0: 64]
+ key_pub_y = key_pub[ 64:128]
+
+ if curve == CURVE_P384:
+ key_pub_x = key_pub[ 0: 96]
+ key_pub_y = key_pub[ 96:192]
+
+ # convert from strings to integers
+ key_priv = int(key_priv, 16)
+ key_pub_x = int(key_pub_x, 16)
+ key_pub_y = int(key_pub_y, 16)
+
+ # done
+ return key_priv, key_pub_x, key_pub_y
+
+
+if __name__ == "__main__":
+
+ # detect whether user requested some specific binary
+ if len(sys.argv) == 1:
+ OPENSSL = "openssl"
+ print("Using system OpenSSL library.")
+ elif len(sys.argv) == 2:
+ OPENSSL = sys.argv[1]
+ print("Using OpenSSL binary '" + OPENSSL + "'...")
+ else:
+ print(USAGE)
+
+ if len(OPENSSL) > 0:
+
+ # list of curves to process
+ curves = [CURVE_P256, CURVE_P384]
+
+ # open output files
+ file_h = open('ecdsa_test_vector_randomized.h', 'w')
+ file_v = open('ecdsa_test_vector_randomized.vh', 'w')
+
+ # write headers
+ file_h.write("/* Generated automatically, do not edit. */\n\n")
+ file_v.write("/* Generated automatically, do not edit. */\n\n")
+
+ # process all the keys
+ for next_curve in curves:
+
+ # load keys
+ da, qax, qay = get_key(OPENSSL, "charlie", next_curve)
+
+ # format numbers and write to files
+
+ format_c_header(file_h, next_curve,
+ da, qax, qay);
+
+ format_verilog_include(file_v, next_curve,
+ da, qax, qay);
+
+ # done
+ file_h.close()
+ file_v.close()
+
+ # everything went just fine
+ print("Test vector formatted.")
+
+#
+# End of file
+#
diff --git a/test_vectors/regenerate_random_test_vector.py b/test_vectors/regenerate_random_test_vector.py
new file mode 100644
index 0000000..75cb761
--- /dev/null
+++ b/test_vectors/regenerate_random_test_vector.py
@@ -0,0 +1,91 @@
+#
+# regenerate_test_vector.py
+# -----------------------------------------------------------
+# Generates a new randomized test vector for ecdsa_fpga_model
+#
+# Author: Pavel Shatov
+# Copyright (c) 2017-2018, NORDUnet A/S
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# - Neither the name of the NORDUnet nor the names of its contributors may
+# be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+USAGE = "USAGE: regenerate_test_vector.py [openssl_binary]"
+#
+
+# This script generates a test vector. The test vector contains two
+# private keys. One is for P-256, the other one is for P-384.
+#
+
+#
+# imports
+#
+import sys
+import subprocess
+
+OPENSSL = ""
+
+CURVE_P256 = "prime256v1"
+CURVE_P384 = "secp384r1"
+
+SECRET_CHARLIE_256 = "charlie_p256.key"
+SECRET_CHARLIE_384 = "charlie_p384.key"
+
+def openssl_ecparam_genkey(openssl, curve, file):
+ subprocess.call([openssl, "ecparam", "-genkey", "-name", curve, "-out", file])
+
+#
+# __main__
+#
+if __name__ == "__main__":
+
+ # detect whether user requested some specific binary
+ if len(sys.argv) == 1:
+ OPENSSL = "openssl"
+ print("Using system OpenSSL library.")
+ elif len(sys.argv) == 2:
+ OPENSSL = sys.argv[1]
+ print("Using OpenSSL binary '" + OPENSSL + "'...")
+ else:
+ print(USAGE)
+
+ if len(OPENSSL) > 0:
+
+ # generate a new private key for P-256 curve
+ openssl_ecparam_genkey(OPENSSL, CURVE_P256, SECRET_CHARLIE_256)
+
+ # generate a new private key for P-384 curve
+ openssl_ecparam_genkey(OPENSSL, CURVE_P384, SECRET_CHARLIE_384)
+
+ # done
+ print("New randomized test vector generated.")
+
+
+#
+# End of file
+#
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the Commits
mailing list