[Cryptech-Commits] [user/shatov/modexp_fpga_model] 01/03: Triple multiplier turns out to be an overkill in Verilog, started turning systolic multiplication into a separate procedure.
git at cryptech.is
git at cryptech.is
Mon Jul 10 18:13:37 UTC 2017
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexp_fpga_model.
commit 6e36e87a88b8f96dd012158730a4300ade3608b6
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jul 5 16:01:02 2017 +0300
Triple multiplier turns out to be an overkill in Verilog, started turning
systolic multiplication into a separate procedure.
---
modexp_fpga_model.cpp | 8 +--
modexp_fpga_model_montgomery.cpp | 65 ++++++++++++++++-----
modexp_fpga_model_pe.cpp | 1 -
modexp_fpga_systolic.cpp | 118 +++++++++++++++++++++++++++++++++++++++
modexp_fpga_systolic.h | 47 ++++++++++++++++
5 files changed, 219 insertions(+), 20 deletions(-)
diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index e1c7f4e..455980b 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -113,7 +113,7 @@ int main()
printf("Trying to sign 384-bit message...\n\n");
ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
if (!ok) return EXIT_FAILURE;
-
+ /*
printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
if (!ok) return EXIT_FAILURE;
@@ -121,11 +121,11 @@ int main()
printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
if (!ok) return EXIT_FAILURE;
-
+ */
printf("Trying to sign 512-bit message...\n\n");
ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
if (!ok) return EXIT_FAILURE;
-
+ /*
printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
if (!ok) return EXIT_FAILURE;
@@ -133,7 +133,7 @@ int main()
printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
if (!ok) return EXIT_FAILURE;
-
+ */
return EXIT_SUCCESS;
}
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index 260f498..e5237ff 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -40,6 +40,7 @@
//----------------------------------------------------------------
#include "modexp_fpga_model.h"
#include "modexp_fpga_model_pe.h"
+#include "modexp_fpga_systolic.h"
#include "modexp_fpga_model_montgomery.h"
@@ -67,18 +68,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
bool select_s; // flag
- FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulators
+ //FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulators
FPGA_WORD t_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD t_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate products
+ //FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate products
FPGA_WORD s_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD s_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
+ //FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
FPGA_WORD c_in_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD c_in_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
- FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
+ //FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
FPGA_WORD c_out_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
FPGA_WORD c_out_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; //
@@ -103,18 +104,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// initialize arrays of accumulators and carries to zeroes
for (i=0; i<num_systolic_cycles; i++)
for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_ab[i][j] = 0, c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
- t_ab[i][j] = 0, t_q [i][j] = 0, t_qn[i][j] = 0;
+ /*c_in_ab[i][j] = 0,*/ c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
+ /*t_ab[i][j] = 0,*/ t_q [i][j] = 0, t_qn[i][j] = 0;
// initialize 1-bit carry and borrow to zeroes too
c_in_s = 0, b_in_sn = 0;
- // simultaneously calculate AB, Q, QN, S, SN
+ multiply_systolic(A, B, AB, len, 2 * len);
+
+ /*
+ ///////////////////////
for (i = 0; i < (2 * len); i++)
{
- // multiply entire B by current word of A to get AB
- // multiply entire N_COEFF by current word of AB to get Q
- // multiply entire N by current word of Q to get QN
for (k = 0; k < num_systolic_cycles; k++)
{
// simulate how a systolic array would work
@@ -124,8 +125,6 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// current words of B, N_COEFF, N
FPGA_WORD Bj = (j_index < len) ? B [k * SYSTOLIC_NUM_WORDS + j] : 0;
- FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
- FPGA_WORD Nj = (j_index < len) ? N [k * SYSTOLIC_NUM_WORDS + j] : 0;
// current word of A
FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
@@ -135,6 +134,45 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// store current word of AB
if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
+ }
+
+ // propagate carries
+ for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+ c_in_ab[k][j] = c_out_ab[k][j];
+
+ // update accumulators
+ for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
+ t_ab[k][j-1] = s_ab[k][j];
+
+ // update accumulators
+ if (k > 0)
+ t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0];
+
+ }
+ }
+ */
+
+ ///////////////////////
+
+
+
+
+ // simultaneously calculate AB, Q, QN, S, SN
+ for (i = 0; i < (2 * len); i++)
+ {
+ // multiply entire B by current word of A to get AB
+ // multiply entire N_COEFF by current word of AB to get Q
+ // multiply entire N by current word of Q to get QN
+ for (k = 0; k < num_systolic_cycles; k++)
+ {
+ // simulate how a systolic array would work
+ for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
+ {
+ size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
+
+ // current words of B, N_COEFF, N
+ FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
+ FPGA_WORD Nj = (j_index < len) ? N [k * SYSTOLIC_NUM_WORDS + j] : 0;
// current word of AB
FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
@@ -157,21 +195,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
// propagate carries
for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
- c_in_ab[k][j] = c_out_ab[k][j],
c_in_q [k][j] = c_out_q [k][j],
c_in_qn[k][j] = c_out_qn[k][j];
// update accumulators
for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
{
- t_ab[k][j-1] = s_ab[k][j];
t_q [k][j-1] = s_q [k][j];
t_qn[k][j-1] = s_qn[k][j];
}
// update accumulators
if (k > 0)
- t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0],
t_q [k-1][SYSTOLIC_NUM_WORDS-1] = s_q [k][0],
t_qn[k-1][SYSTOLIC_NUM_WORDS-1] = s_qn[k][0];
diff --git a/modexp_fpga_model_pe.cpp b/modexp_fpga_model_pe.cpp
index 4f58bee..2d90672 100644
--- a/modexp_fpga_model_pe.cpp
+++ b/modexp_fpga_model_pe.cpp
@@ -39,7 +39,6 @@
// Headers
//----------------------------------------------------------------
#include "modexp_fpga_model.h"
-#include "modexp_fpga_model_pe.h"
//----------------------------------------------------------------
diff --git a/modexp_fpga_systolic.cpp b/modexp_fpga_systolic.cpp
new file mode 100644
index 0000000..9dce130
--- /dev/null
+++ b/modexp_fpga_systolic.cpp
@@ -0,0 +1,118 @@
+//
+// modexp_fpga_systolic.cpp
+// ------------------------
+// Systolic Multiplier
+//
+// Authors: Pavel Shatov
+// Copyright (c) 2017, NORDUnet A/S
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+
+//----------------------------------------------------------------
+// Headers
+//----------------------------------------------------------------
+#include "modexp_fpga_model.h"
+#include "modexp_fpga_model_pe.h"
+
+
+//----------------------------------------------------------------
+void multiply_systolic(const FPGA_WORD *A, const FPGA_WORD *B, FPGA_WORD *P, size_t len_ab, size_t len_p)
+//----------------------------------------------------------------
+{
+ // counters
+ size_t i, j, k;
+
+ // index
+ size_t j_index;
+
+ // current words of A and B
+ FPGA_WORD Aj, Bj;
+
+ // number of full systolic cycles needed to multiply entire B by one word of A
+ size_t num_systolic_cycles = len_ab / SYSTOLIC_NUM_WORDS;
+ if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len_ab) num_systolic_cycles++;
+
+ // buffers
+ FPGA_WORD t[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // accumulator
+ FPGA_WORD s[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // intermediate product
+ FPGA_WORD c_in[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // input carries
+ FPGA_WORD c_out[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS]; // output carries
+
+ // initialize arrays of accumulators and carries to zeroes
+ for (i=0; i<num_systolic_cycles; i++)
+ for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+ c_in[i][j] = 0, t[i][j] = 0;
+
+ // do the math
+ for (i=0; i<len_p; i++)
+ {
+ // reset word index
+ j_index = 0;
+
+ // scan chunks of A
+ for (k=0; k<num_systolic_cycles; k++)
+ {
+ // simulate how systolic array would work
+ for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+ {
+ // current words of B and A
+ Bj = (j_index < len_ab) ? B[j_index] : 0;
+ Aj = (i < len_ab) ? A[i] : 0;
+
+ // Pj = Aj * Bj
+ pe_mul(Aj, Bj, t[k][j], c_in[k][j], &s[k][j], &c_out[k][j]);
+
+ // store current word of P
+ if ((k == 0) && (j == 0)) P[i] = s[0][0];
+
+ // update index
+ j_index++;
+ }
+
+ // propagate carries
+ for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+ c_in[k][j] = c_out[k][j];
+
+ // update accumulator
+ for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
+ t[k][j-1] = s[k][j];
+
+ // update accumulator
+ if (k > 0)
+ t[k-1][SYSTOLIC_NUM_WORDS-1] = s[k][0];
+
+ }
+ }
+}
+
+
+//----------------------------------------------------------------
+// End of file
+//----------------------------------------------------------------
diff --git a/modexp_fpga_systolic.h b/modexp_fpga_systolic.h
new file mode 100644
index 0000000..bef0d99
--- /dev/null
+++ b/modexp_fpga_systolic.h
@@ -0,0 +1,47 @@
+//
+// modexp_fpga_systolic.h
+// ----------------------
+// Systolic Multiplier
+//
+// Authors: Pavel Shatov
+// Copyright (c) 2017, NORDUnet A/S
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+
+//----------------------------------------------------------------
+// Prototypes
+//----------------------------------------------------------------
+void multiply_systolic( const FPGA_WORD *A, const FPGA_WORD *B, FPGA_WORD *P,
+ size_t len_ab, size_t len_p);
+
+
+//----------------------------------------------------------------
+// End of file
+//----------------------------------------------------------------
More information about the Commits
mailing list