[Cryptech-Commits] [user/shatov/modexp_fpga_model] 02/03: Turned systolic multiplication into a separate routine.

git at cryptech.is git at cryptech.is
Mon Jul 10 18:13:38 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexp_fpga_model.

commit ee41d5837267e2d6a2e2ae27751d8d03a80e3657
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jul 5 16:24:44 2017 +0300

    Turned systolic multiplication into a separate routine.
---
 modexp_fpga_model.cpp            |   8 +-
 modexp_fpga_model.h              |   2 +-
 modexp_fpga_model_montgomery.cpp | 161 +++++----------------------------------
 3 files changed, 26 insertions(+), 145 deletions(-)

diff --git a/modexp_fpga_model.cpp b/modexp_fpga_model.cpp
index 455980b..e1c7f4e 100644
--- a/modexp_fpga_model.cpp
+++ b/modexp_fpga_model.cpp
@@ -113,7 +113,7 @@ int main()
 	printf("Trying to sign 384-bit message...\n\n");
 	ok = test_modexp(N_384_ROM, M_384_ROM, D_384_ROM, S_384_ROM, OPERAND_NUM_WORDS_384);
 	if (!ok) return EXIT_FAILURE;
-	/*
+
 	printf("Trying to exponentiate 384-bit message with 192-bit prime P and exponent dP...\n\n");
 	ok = test_modexp_crt(P_384_ROM, M_384_ROM, DP_384_ROM, MP_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
 	if (!ok) return EXIT_FAILURE;
@@ -121,11 +121,11 @@ int main()
 	printf("Trying to exponentiate 384-bit message with 192-bit prime Q and exponent dQ...\n\n");
 	ok = test_modexp_crt(Q_384_ROM, M_384_ROM, DQ_384_ROM, MQ_384_ROM, OPERAND_NUM_WORDS_384 >> 1);
 	if (!ok) return EXIT_FAILURE;
-	*/
+
 	printf("Trying to sign 512-bit message...\n\n");
 	ok = test_modexp(N_512_ROM, M_512_ROM, D_512_ROM, S_512_ROM, OPERAND_NUM_WORDS_512);
 	if (!ok) return EXIT_FAILURE;
-	/*
+
 	printf("Trying to exponentiate 512-bit message with 256-bit prime P and exponent dP...\n\n");
 	ok = test_modexp_crt(P_512_ROM, M_512_ROM, DP_512_ROM, MP_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
 	if (!ok) return EXIT_FAILURE;
@@ -133,7 +133,7 @@ int main()
 	printf("Trying to exponentiate 512-bit message with 256-bit prime Q and exponent dQ...\n\n");
 	ok = test_modexp_crt(Q_512_ROM, M_512_ROM, DQ_512_ROM, MQ_512_ROM, OPERAND_NUM_WORDS_512 >> 1);
 	if (!ok) return EXIT_FAILURE;
-	*/
+
 	return EXIT_SUCCESS;
 }
 
diff --git a/modexp_fpga_model.h b/modexp_fpga_model.h
index f30a41b..2a91d32 100644
--- a/modexp_fpga_model.h
+++ b/modexp_fpga_model.h
@@ -31,7 +31,7 @@
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-//- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 
 
diff --git a/modexp_fpga_model_montgomery.cpp b/modexp_fpga_model_montgomery.cpp
index e5237ff..5bc5ba4 100644
--- a/modexp_fpga_model_montgomery.cpp
+++ b/modexp_fpga_model_montgomery.cpp
@@ -64,156 +64,37 @@ void montgomery_multiply(const FPGA_WORD *A, const FPGA_WORD *B, const FPGA_WORD
 //
 //----------------------------------------------------------------
 {
-	size_t i, j, k;													// counters
+	size_t i;												// counters
 
-	bool select_s;													// flag
+	FPGA_WORD AB[2 * MAX_OPERAND_WORDS];					// products
+	FPGA_WORD Q [    MAX_OPERAND_WORDS];					//
+	FPGA_WORD QN[2 * MAX_OPERAND_WORDS];					//
 
-	//FPGA_WORD t_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// accumulators
-	FPGA_WORD t_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
-	FPGA_WORD t_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
+	bool select_s;											// flag
 
-	//FPGA_WORD s_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// intermediate products
-	FPGA_WORD s_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
-	FPGA_WORD s_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
+	FPGA_WORD c_in_s;										// 1-bit carry and borrow
+	FPGA_WORD b_in_sn;										//
+	FPGA_WORD c_out_s;										//
+	FPGA_WORD b_out_sn;										//
 
-	//FPGA_WORD c_in_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		// input carries
-	FPGA_WORD c_in_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
-	FPGA_WORD c_in_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];		//
-	//FPGA_WORD c_out_ab[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	// output carries
-	FPGA_WORD c_out_q [MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	//
-	FPGA_WORD c_out_qn[MAX_SYSTOLIC_CYCLES][SYSTOLIC_NUM_WORDS];	//
+	FPGA_WORD S [2 * MAX_OPERAND_WORDS];					// final sum
+	FPGA_WORD SN[2 * MAX_OPERAND_WORDS];					// final difference
 
-	FPGA_WORD c_in_s;												// 1-bit carry and borrow
-	FPGA_WORD b_in_sn;												//
-	FPGA_WORD c_out_s;												//
-	FPGA_WORD b_out_sn;												//
+		// copy twice larger A into AB
+	if (reduce_only)
+		for (i=0; i<(2*len); i++)
+			AB[i] = A[i];
 
-	FPGA_WORD AB[2 * MAX_OPERAND_WORDS];							// final products
-	FPGA_WORD Q [2 * MAX_OPERAND_WORDS];							//
-	FPGA_WORD QN[2 * MAX_OPERAND_WORDS];							//
+	if (!reduce_only)	multiply_systolic(A,  B,       AB, len, 2 * len);		// AB = A  * B
+						multiply_systolic(AB, N_COEFF, Q,  len,     len);		// Q  = AB * N_COEFF
+						multiply_systolic(Q,  N,       QN, len, 2 * len);		// QN = Q * N
 
-	FPGA_WORD S [2 * MAX_OPERAND_WORDS];							// final sum
-	FPGA_WORD SN[2 * MAX_OPERAND_WORDS];							// final difference
-
-		// number of full systolic cycles needed to multiply entire B by one word of A
-	size_t num_systolic_cycles = len / SYSTOLIC_NUM_WORDS;
-	
-		// adjust number of cycles
-	if ((num_systolic_cycles * SYSTOLIC_NUM_WORDS) < len) num_systolic_cycles++;
-
-		// initialize arrays of accumulators and carries to zeroes
-	for (i=0; i<num_systolic_cycles; i++)
-		for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
-			/*c_in_ab[i][j] = 0,*/ c_in_q [i][j] = 0, c_in_qn[i][j] = 0,
-			/*t_ab[i][j]    = 0,*/ t_q [i][j]    = 0, t_qn[i][j]    = 0;
-
-		// initialize 1-bit carry and borrow to zeroes too
+		// initialize 1-bit carry and borrow
 	c_in_s = 0, b_in_sn = 0;
 
-	multiply_systolic(A, B, AB, len, 2 * len);
-
-	/*
-	///////////////////////
+		// now it's time to simultaneously add and subtract
 	for (i = 0; i < (2 * len); i++)
-	{
-		for (k = 0; k < num_systolic_cycles; k++)
-		{
-				// simulate how a systolic array would work
-			for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
-			{
-				size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
-
-					// current words of B, N_COEFF, N
-				FPGA_WORD Bj       = (j_index < len) ? B      [k * SYSTOLIC_NUM_WORDS + j] : 0;
-
-					// current word of A
-				FPGA_WORD Aj_ab = (i < len) ? A[i] : 0;
-
-					// AB = A * B		
-				pe_mul(Aj_ab, Bj, t_ab[k][j], c_in_ab[k][j], &s_ab[k][j], &c_out_ab[k][j]);
-
-					// store current word of AB
-				if ((k == 0) && (j == 0)) AB[i] = reduce_only ? A[i] : s_ab[0][0];
-			}
-
-				// propagate carries
-			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
-				c_in_ab[k][j] = c_out_ab[k][j];
-
-				// update accumulators
-			for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
-				t_ab[k][j-1] = s_ab[k][j];
-			
-				// update accumulators
-			if (k > 0)
-				t_ab[k-1][SYSTOLIC_NUM_WORDS-1] = s_ab[k][0];
-
-		}
-	}
-	*/
-
-	///////////////////////
-
-
-
-
-		// simultaneously calculate AB, Q, QN, S, SN
-	for (i = 0; i < (2 * len); i++)
-	{
-			// multiply entire B by current word of A to get AB
-			// multiply entire N_COEFF by current word of AB to get Q
-			// multiply entire N by current word of Q to get QN
-		for (k = 0; k < num_systolic_cycles; k++)
-		{
-				// simulate how a systolic array would work
-			for (j = 0; j < SYSTOLIC_NUM_WORDS; j++)
-			{
-				size_t j_index = k * SYSTOLIC_NUM_WORDS + j;
-
-					// current words of B, N_COEFF, N
-				FPGA_WORD N_COEFFj = (j_index < len) ? N_COEFF[k * SYSTOLIC_NUM_WORDS + j] : 0;
-				FPGA_WORD Nj       = (j_index < len) ? N      [k * SYSTOLIC_NUM_WORDS + j] : 0;
-
-					// current word of AB
-				FPGA_WORD Aj_q = (i < len) ? AB[i] : 0;
-
-					// Q = AB * N		
-				pe_mul(Aj_q, N_COEFFj, t_q[k][j], c_in_q[k][j], &s_q[k][j], &c_out_q[k][j]);
-
-					// store current word of Q
-				if ((k == 0) && (j == 0)) Q[i] = s_q[0][0];
-
-					// current word of Q
-				FPGA_WORD Aj_qn = (i < len) ? Q[i] : 0;
-
-					// QN = Q * N
-				pe_mul(Aj_qn, Nj, t_qn[k][j], c_in_qn[k][j], &s_qn[k][j], &c_out_qn[k][j]);
-
-					// store next word of QN
-				if ((k == 0) && (j == 0)) QN[i] = s_qn[0][0];
-			}
-
-				// propagate carries
-			for (j=0; j<SYSTOLIC_NUM_WORDS; j++)
-				c_in_q [k][j] = c_out_q [k][j],
-				c_in_qn[k][j] = c_out_qn[k][j];
-
-				// update accumulators
-			for (j=1; j<SYSTOLIC_NUM_WORDS; j++)
-			{
-				t_q [k][j-1] = s_q [k][j];
-				t_qn[k][j-1] = s_qn[k][j];
-			}
-			
-				// update accumulators
-			if (k > 0)
-				t_q [k-1][SYSTOLIC_NUM_WORDS-1] = s_q [k][0],
-				t_qn[k-1][SYSTOLIC_NUM_WORDS-1] = s_qn[k][0];
-
-		}
-	
-			// now it's time to simultaneously add and subtract
-
+	{	
 			// current operand words
 		FPGA_WORD QNi = QN[i];
 		FPGA_WORD Ni  = (i < len) ? 0 : N[i-len];



More information about the Commits mailing list