[Cryptech-Commits] [user/shatov/modexp_fpga_model] 01/03: Triple multiplier turns out to be an overkill in Verilog, started turning systolic multiplication into a separate procedure.

git at cryptech.is git at cryptech.is
Mon Jul 10 18:13:37 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexp_fpga_model.

commit 6e36e87a88b8f96dd012158730a4300ade3608b6
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jul 5 16:01:02 2017 +0300

    Triple multiplier turns out to be an overkill in Verilog, started turning
    systolic multiplication into a separate procedure.
---
 modexp_fpga_model.cpp            |   8 +--
 modexp_fpga_model_montgomery.cpp |  65 ++++++++++++++++-----
 modexp_fpga_model_pe.cpp         |   1 -
 modexp_fpga_systolic.cpp         | 118 +++++++++++++++++++++++++++++++++++++++
 modexp_fpga_systolic.h           |  47 ++++++++++++++++
 5 files changed, 219 insertions(+), 20 deletions(-)

diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index e1c7f4e..455980b 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -113,7 +113,7 @@ int main()
 	printf("Trying to sign 384-bit message...\n\n");
 	ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
 	if (!ok) return EXIT_FAILURE;
-
+	/*
 	printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
 	ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
 	if (!ok) return EXIT_FAILURE;
@@ -121,11 +121,11 @@ int main()
 	printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
 	ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
 	if (!ok) return EXIT_FAILURE;
-
+	*/
 	printf("Trying to sign 512-bit message...\n\n");
 	ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
 	if (!ok) return EXIT_FAILURE;
-
+	/*
 	printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
 	ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
 	if (!ok) return EXIT_FAILURE;
@@ -133,7 +133,7 @@ int main()
 	printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
 	ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
 	if (!ok) return EXIT_FAILURE;
-
+	*/
 	return EXIT_SUCCESS;
 }
 
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index 260f498..e5237ff 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -40,6 +40,7 @@
 //----------------------------------------------------------------
 #include "modexp_fpga_model.h"
 #include "modexp_fpga_model_pe.h"
+#include "modexp_fpga_systolic.h"
 #include "modexp_fpga_model_montgomery.h"
 
 
@@ -67,18 +68,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 
 	bool select_s;													// flag
 
-	FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// accumulators
+	//FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// accumulators
 	FPGA_WORD t_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
 	FPGA_WORD t_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
 
-	FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// intermediate products
+	//FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// intermediate products
 	FPGA_WORD s_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
 	FPGA_WORD s_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
 
-	FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// input carries
+	//FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// input carries
 	FPGA_WORD c_in_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
 	FPGA_WORD c_in_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
-	FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	// output carries
+	//FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	// output carries
 	FPGA_WORD c_out_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	//
 	FPGA_WORD c_out_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	//
 
@@ -103,18 +104,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 		// initialize arrays of accumulators and carries to zeroes
 	for (i=0; i<num_systolic_cycles; i++)
 		for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
-			c_in_ab[i][j] = 0, c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
-			t_ab[i][j]    = 0, t_q [i][j]    = 0, t_qn[i][j]    = 0;
+			/*c_in_ab[i][j] = 0,*/ c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
+			/*t_ab[i][j]    = 0,*/ t_q [i][j]    = 0, t_qn[i][j]    = 0;
 
 		// initialize 1-bit carry and borrow to zeroes too
 	c_in_s = 0, b_in_sn = 0;
 
-		// simultaneously calculate AB, Q, QN, S, SN
+	multiply_systolic(A, B, AB, len, 2 * len);
+
+	/*
+	///////////////////////
 	for (i = 0; i < (2 * len); i++)
 	{
-			// multiply entire B by current word of A to get AB
-			// multiply entire N_COEFF by current word of AB to get Q
-			// multiply entire N by current word of Q to get QN
 		for (k = 0; k < num_systolic_cycles; k++)
 		{
 				// simulate how a systolic array would work
@@ -124,8 +125,6 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 
 					// current words of B, N_COEFF, N
 				FPGA_WORD Bj       = (j_index < len) ? B      [k * SYSTOLIC_NUM_WORDS + j] : 0;
-				FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
-				FPGA_WORD Nj       = (j_index < len) ? N      [k * SYSTOLIC_NUM_WORDS + j] : 0;
 
 					// current word of A
 				FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
@@ -135,6 +134,45 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 
 					// store current word of AB
 				if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
+			}
+
+				// propagate carries
+			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+				c_in_ab[k][j] = c_out_ab[k][j];
+
+				// update accumulators
+			for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
+				t_ab[k][j-1] = s_ab[k][j];
+			
+				// update accumulators
+			if (k > 0)
+				t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0];
+
+		}
+	}
+	*/
+
+	///////////////////////
+
+
+
+
+		// simultaneously calculate AB, Q, QN, S, SN
+	for (i = 0; i < (2 * len); i++)
+	{
+			// multiply entire B by current word of A to get AB
+			// multiply entire N_COEFF by current word of AB to get Q
+			// multiply entire N by current word of Q to get QN
+		for (k = 0; k < num_systolic_cycles; k++)
+		{
+				// simulate how a systolic array would work
+			for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
+			{
+				size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
+
+					// current words of B, N_COEFF, N
+				FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
+				FPGA_WORD Nj       = (j_index < len) ? N      [k * SYSTOLIC_NUM_WORDS + j] : 0;
 
 					// current word of AB
 				FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
@@ -157,21 +195,18 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 
 				// propagate carries
 			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
-				c_in_ab[k][j] = c_out_ab[k][j],
 				c_in_q [k][j] = c_out_q [k][j],
 				c_in_qn[k][j] = c_out_qn[k][j];
 
 				// update accumulators
 			for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
 			{
-				t_ab[k][j-1] = s_ab[k][j];
 				t_q [k][j-1] = s_q [k][j];
 				t_qn[k][j-1] = s_qn[k][j];
 			}
 			
 				// update accumulators
 			if (k > 0)
-				t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0],
 				t_q [k-1][SYSTOLIC_NUM_WORDS-1] = s_q [k][0],
 				t_qn[k-1][SYSTOLIC_NUM_WORDS-1] = s_qn[k][0];
 
diff --git a/modexp_fpga_model_pe.cpp b/modexp_fpga_model_pe.cpp
index 4f58bee..2d90672 100644
--- a/modexp_fpga_model_pe.cpp
+++ b/modexp_fpga_model_pe.cpp
@@ -39,7 +39,6 @@
 // Headers
 //----------------------------------------------------------------
 #include "modexp_fpga_model.h"
-#include "modexp_fpga_model_pe.h"
 
 
 //----------------------------------------------------------------
diff --git a/modexp_fpga_systolic.cpp b/modexp_fpga_systolic.cpp
new file mode 100644
index 0000000..9dce130
--- /dev/null
+++ b/modexp_fpga_systolic.cpp
@@ -0,0 +1,118 @@
+//
+// modexp_fpga_systolic.cpp
+// ------------------------
+// Systolic Multiplier
+//
+// Authors: Pavel Shatov
+// Copyright (c) 2017, NORDUnet A/S
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+
+//----------------------------------------------------------------
+// Headers
+//----------------------------------------------------------------
+#include "modexp_fpga_model.h"
+#include "modexp_fpga_model_pe.h"
+
+
+//----------------------------------------------------------------
+void multiply_systolic(const FPGA_WORD *A, const FPGA_WORD *B, FPGA_WORD *P, size_t len_ab, size_t len_p)
+//----------------------------------------------------------------
+{
+		// counters
+	size_t i, j, k;
+
+		// index
+	size_t j_index;
+		
+		// current words of A and B
+	FPGA_WORD Aj, Bj;
+
+		// number of full systolic cycles needed to multiply entire B by one word of A
+	size_t num_systolic_cycles = len_ab / SYSTOLIC_NUM_WORDS;
+	if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len_ab) num_systolic_cycles++;
+
+		// buffers
+	FPGA_WORD t[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// accumulator
+	FPGA_WORD s[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// intermediate product
+	FPGA_WORD c_in[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	// input carries
+	FPGA_WORD c_out[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	// output carries
+
+			// initialize arrays of accumulators and carries to zeroes
+	for (i=0; i<num_systolic_cycles; i++)
+		for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+			c_in[i][j] = 0, t[i][j] = 0;
+
+		// do the math
+	for (i=0; i<len_p; i++)
+	{
+			// reset word index
+		j_index = 0;
+
+			// scan chunks of A
+		for (k=0; k<num_systolic_cycles; k++)
+		{
+				// simulate how systolic array would work
+			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+			{
+					// current words of B and A
+				Bj = (j_index < len_ab) ? B[j_index] : 0;
+				Aj = (i       < len_ab) ? A[i]       : 0;
+
+					// Pj = Aj * Bj
+				pe_mul(Aj, Bj, t[k][j], c_in[k][j], &s[k][j], &c_out[k][j]);
+
+					// store current word of P
+				if ((k == 0) && (j == 0)) P[i] = s[0][0];
+
+					// update index
+				j_index++;
+			}
+
+				// propagate carries
+			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
+				c_in[k][j] = c_out[k][j];
+
+				// update accumulator
+			for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
+				t[k][j-1] = s[k][j];
+			
+				// update accumulator
+			if (k > 0)
+				t[k-1][SYSTOLIC_NUM_WORDS-1] = s[k][0];
+
+		}
+	}
+}
+
+
+//----------------------------------------------------------------
+// End of file
+//----------------------------------------------------------------
diff --git a/modexp_fpga_systolic.h b/modexp_fpga_systolic.h
new file mode 100644
index 0000000..bef0d99
--- /dev/null
+++ b/modexp_fpga_systolic.h
@@ -0,0 +1,47 @@
+//
+// modexp_fpga_systolic.h
+// ----------------------
+// Systolic Multiplier
+//
+// Authors: Pavel Shatov
+// Copyright (c) 2017, NORDUnet A/S
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+
+//----------------------------------------------------------------
+// Prototypes
+//----------------------------------------------------------------
+void multiply_systolic(	const FPGA_WORD *A, const FPGA_WORD *B, FPGA_WORD *P,
+						size_t len_ab, size_t len_p);
+
+
+//----------------------------------------------------------------
+// End of file
+//----------------------------------------------------------------



More information about the Commits mailing list