[Cryptech-Commits] [user/shatov/curve25519_fpga_model] 08/14: X25519-specific code (curve point multiplication)

git at cryptech.is git at cryptech.is
Mon Sep 24 18:52:52 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/curve25519_fpga_model.

commit ed6437839977023ffe1eb95d87760d4f1b2c518b
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Sep 24 21:38:06 2018 +0300

    X25519-specific code (curve point multiplication)
---
 x25519/x25519_fpga_curve.h             |  90 +++++++++++++
 x25519/x25519_fpga_curve_abstract.cpp  | 222 +++++++++++++++++++++++++++++++++
 x25519/x25519_fpga_curve_microcode.cpp | 208 ++++++++++++++++++++++++++++++
 3 files changed, 520 insertions(+)

diff --git a/x25519/x25519_fpga_curve.h b/x25519/x25519_fpga_curve.h
new file mode 100644
index 0000000..9f8bff9
--- /dev/null
+++ b/x25519/x25519_fpga_curve.h
@@ -0,0 +1,90 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve.h
+// -----------------------------------------------
+// Elliptic curve arithmetic procedures for X25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Curve25519 Parameters
+//------------------------------------------------------------------------------
+
+/* x-coordinate of the base point */
+#define X25519_G_X_INIT		{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+							 0x00000000, 0x00000000, 0x00000000, 0x00000009}
+
+/* coefficient (A + 2) / 4 */
+#define X25519_A24_INIT		{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+							 0x00000000, 0x00000000, 0x00000000, 0x0001DB42}
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER X25519_G_X;	// the base point
+extern FPGA_BUFFER X25519_A24;	// coefficient (A + 2) / 4
+
+
+//------------------------------------------------------------------------------
+// Implementation switch
+//------------------------------------------------------------------------------
+#ifdef USE_MICROCODE
+#define fpga_curve_x25519_scalar_multiply fpga_curve_x25519_scalar_multiply_microcode
+#else
+#define fpga_curve_x25519_scalar_multiply fpga_curve_x25519_scalar_multiply_abstract
+#endif
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_curve_x25519_init						();
+
+void	fpga_curve_x25519_scalar_multiply_abstract	(const FPGA_BUFFER *P_X, const FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+void	fpga_curve_x25519_scalar_multiply_microcode	(const FPGA_BUFFER *P_X, const FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+
+void	fpga_curve_x25519_ladder_step				(const FPGA_BUFFER *P_X,
+													 const FPGA_BUFFER *R0_X_in,  const FPGA_BUFFER *R0_Z_in,
+													 const FPGA_BUFFER *R1_X_in,  const FPGA_BUFFER *R1_Z_in,
+													 FPGA_BUFFER *R0_X_out, FPGA_BUFFER *R0_Z_out,
+													 FPGA_BUFFER *R1_X_out, FPGA_BUFFER *R1_Z_out);
+
+void	fpga_curve_x25519_to_affine		(const FPGA_BUFFER *P_X,
+										 const FPGA_BUFFER *P_Z,
+										 FPGA_BUFFER *Q_X);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519/x25519_fpga_curve_abstract.cpp b/x25519/x25519_fpga_curve_abstract.cpp
new file mode 100644
index 0000000..bb551df
--- /dev/null
+++ b/x25519/x25519_fpga_curve_abstract.cpp
@@ -0,0 +1,222 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_abstract.cpp
+// -----------------------------------------------
+// Elliptic curve arithmetic procedures for X25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER X25519_G_X;		// x-coordinate of the base point
+FPGA_BUFFER X25519_A24;		// coefficient (A + 2) / 4
+
+
+//------------------------------------------------------------------------------
+void fpga_curve_x25519_init()
+//------------------------------------------------------------------------------
+{
+	int w_src, w_dst;	// word counters
+
+	FPGA_WORD TMP_G_X[FPGA_OPERAND_NUM_WORDS]	= X25519_G_X_INIT;
+	FPGA_WORD TMP_A24[FPGA_OPERAND_NUM_WORDS]	= X25519_A24_INIT;
+
+		/* fill buffers for large multi-word integers */
+	for (	w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1;
+			w_src < FPGA_OPERAND_NUM_WORDS;
+			w_src++, w_dst--)
+	{
+		X25519_G_X.words[w_dst]	= TMP_G_X[w_src];
+		X25519_A24.words[w_dst] = TMP_A24[w_src];
+	}
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_x25519_scalar_multiply_abstract(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+	int word_count, bit_count;	// counters
+
+		// temporary buffers
+	FPGA_BUFFER R0_X;
+	FPGA_BUFFER R0_Z;
+	FPGA_BUFFER R1_X;
+	FPGA_BUFFER R1_Z;
+
+	FPGA_BUFFER T0_X;
+	FPGA_BUFFER T0_Z;
+	FPGA_BUFFER T1_X;
+	FPGA_BUFFER T1_Z;
+
+		// initialization
+	fpga_multiword_copy(&CURVE25519_ONE,  &R0_X);
+	fpga_multiword_copy(&CURVE25519_ZERO, &R0_Z);
+	fpga_multiword_copy(PX,               &R1_X);
+	fpga_multiword_copy(&CURVE25519_ONE,  &R1_Z);
+
+		// handy vars
+	FPGA_WORD k_word;
+	bool k_bit, r_swap = false;
+
+		// multiply
+	for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+	{
+		for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+		{
+				// get current bit of K
+			k_word = K->words[word_count - 1] >> (bit_count - 1);
+			k_bit = (k_word & (FPGA_WORD)1) == 1;
+
+				// we feed either R0, R1 or R1, R0 into the ladder
+			fpga_multiword_copy(r_swap == k_bit ? &R0_X : &R1_X, &T0_X);
+			fpga_multiword_copy(r_swap == k_bit ? &R0_Z : &R1_Z, &T0_Z);
+			fpga_multiword_copy(r_swap == k_bit ? &R1_X : &R0_X, &T1_X);
+			fpga_multiword_copy(r_swap == k_bit ? &R1_Z : &R0_Z, &T1_Z);
+
+				// remember whether we did swapping
+			r_swap = k_bit;
+
+				// montgomery ladder step
+			fpga_curve_x25519_ladder_step(	PX,
+											&T0_X, &T0_Z, &T1_X, &T1_Z,
+											&R0_X, &R0_Z, &R1_X, &R1_Z);
+		}
+	}
+
+		// since the lower three bits of the private key are always ...000,
+		// the result is in R0_X, R0_Z and 
+
+		// now conversion to affine coordinates
+	fpga_curve_x25519_to_affine(&R0_X, &R0_Z, &T0_X);
+
+		// so far we've done everything modulo 2*P, we now need
+		// to do final reduction modulo P, this can be done using
+		// our modular adder this way:
+	fpga_modular_add(&T0_X, &CURVE25519_ZERO, QX, &CURVE25519_1P);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Montgomery Ladder Step
+//
+// There are many papers describing Montgomery ladder, this particular
+// implementation is based on Algorithm 2 from "Fast elliptic-curve
+// cryptography on the Cell Broadband Engine" by Neil Costigan and Peter
+// Schwabe
+// https://cryptojedi.org/papers/celldh-20090107.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_x25519_ladder_step	(const FPGA_BUFFER *PX,
+									 const FPGA_BUFFER *R0X_in,  const FPGA_BUFFER *R0Z_in,
+									 const FPGA_BUFFER *R1X_in,  const FPGA_BUFFER *R1Z_in,
+									 FPGA_BUFFER *R0X_out, FPGA_BUFFER *R0Z_out,
+									 FPGA_BUFFER *R1X_out, FPGA_BUFFER *R1Z_out)
+//------------------------------------------------------------------------------
+{
+	FPGA_BUFFER S0, S1;
+	FPGA_BUFFER D0, D1;
+	FPGA_BUFFER QS0, QD0;
+	FPGA_BUFFER S0D1, S1D0;
+	FPGA_BUFFER TS, TD;
+	FPGA_BUFFER QTD;
+	FPGA_BUFFER T0, TA, T1;
+
+	fpga_modular_add(R0X_in, R0Z_in,      &S0,     &CURVE25519_2P);
+	fpga_modular_add(R1X_in, R1Z_in,      &S1,     &CURVE25519_2P);
+	fpga_modular_sub(R0X_in, R0Z_in,      &D0,     &CURVE25519_2P);
+	fpga_modular_sub(R1X_in, R1Z_in,      &D1,     &CURVE25519_2P);
+	//
+	fpga_modular_mul(&S0,    &S0,         &QS0,    &CURVE25519_2P);
+	fpga_modular_mul(&D0,    &D0,         &QD0,    &CURVE25519_2P);
+	fpga_modular_mul(&S0,    &D1,         &S0D1,   &CURVE25519_2P);
+	fpga_modular_mul(&S1,    &D0,         &S1D0,   &CURVE25519_2P);
+	//
+	fpga_modular_add(&S1D0,  &S0D1,       &TS,     &CURVE25519_2P);
+	fpga_modular_sub(&S1D0,  &S0D1,       &TD,     &CURVE25519_2P);
+	//
+	fpga_modular_mul(&TD,    &TD,         &QTD,    &CURVE25519_2P);
+	//
+	fpga_modular_sub(&QS0,   &QD0,        &T0,     &CURVE25519_2P);
+	fpga_modular_mul(&T0,    &X25519_A24, &TA,     &CURVE25519_2P);
+	fpga_modular_add(&TA,    &QD0,        &T1,     &CURVE25519_2P);
+	//
+	fpga_modular_mul(&QS0,   &QD0,        R0X_out, &CURVE25519_2P);
+	fpga_modular_mul(&T0,    &T1,         R0Z_out, &CURVE25519_2P);
+	fpga_modular_mul(&TS,    &TS,         R1X_out, &CURVE25519_2P);
+	fpga_modular_mul(PX,     &QTD,        R1Z_out, &CURVE25519_2P);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Conversion to affine coordinates.
+//
+// Q_X = P_X / P_Z = P_X * P_Z ^ -1
+//
+//------------------------------------------------------------------------------
+void	fpga_curve_x25519_to_affine		(const FPGA_BUFFER *P_X,
+										 const FPGA_BUFFER *P_Z,
+										 FPGA_BUFFER *Q_X)
+//------------------------------------------------------------------------------
+{
+	FPGA_BUFFER P_Z_1;
+
+	fpga_modular_inv_abstract(P_Z, &P_Z_1, &CURVE25519_2P);
+
+	fpga_modular_mul(P_X, &P_Z_1, Q_X, &CURVE25519_2P);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519/x25519_fpga_curve_microcode.cpp b/x25519/x25519_fpga_curve_microcode.cpp
new file mode 100644
index 0000000..d57cb63
--- /dev/null
+++ b/x25519/x25519_fpga_curve_microcode.cpp
@@ -0,0 +1,208 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_microcode.cpp
+// -----------------------------------------------
+// Elliptic curve arithmetic procedures for X25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+enum X25519_UOP_OPERAND
+//------------------------------------------------------------------------------
+{
+	CONST_A24 = CURVE25519_UOP_OPERAND_COUNT + 1,
+
+	LADDER_R0_X,
+	LADDER_R0_Z,
+
+	LADDER_R1_X,
+	LADDER_R1_Z,
+
+	LADDER_T0_X,
+	LADDER_T0_Z,
+
+	LADDER_T1_X,
+	LADDER_T1_Z,
+
+	LADDER_S0,
+	LADDER_S1,
+
+	LADDER_D0,
+	LADDER_D1,
+
+	LADDER_QS0,
+	LADDER_QD0,
+
+	LADDER_S0D1,
+	LADDER_S1D0,
+
+	LADDER_TS,
+	LADDER_TD,
+
+	LADDER_QTD,
+
+	LADDER_T0,
+	LADDER_TA,
+	LADDER_T1,
+
+	LADDER_P_X,
+
+	X25519_UOP_OPERAND_COUNT
+};
+
+
+//------------------------------------------------------------------------------
+// Storage Buffers
+//------------------------------------------------------------------------------
+static FPGA_BUFFER BUF_LO[X25519_UOP_OPERAND_COUNT];
+static FPGA_BUFFER BUF_HI[X25519_UOP_OPERAND_COUNT];
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_x25519_scalar_multiply_microcode(const FPGA_BUFFER *PX, const FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+	bool k_bit, s;				// 1-bit values
+	FPGA_WORD k_word;			// current word of multiplier
+	int word_count, bit_count;	// counters
+
+		// initialize constant operands
+	fpga_multiword_copy(&CURVE25519_ZERO, &BUF_LO[CONST_ZERO]);
+	fpga_multiword_copy(&CURVE25519_ZERO, &BUF_HI[CONST_ZERO]);
+
+	fpga_multiword_copy(&CURVE25519_ONE, &BUF_LO[CONST_ONE]);
+	fpga_multiword_copy(&CURVE25519_ONE, &BUF_HI[CONST_ONE]);
+
+	fpga_multiword_copy(&X25519_A24, &BUF_LO[CONST_A24]);
+	fpga_multiword_copy(&X25519_A24, &BUF_HI[CONST_A24]);
+
+		//
+		// BEGIN MICROCODE
+		//
+
+		// initialization
+	uop_load(PX, BANK_HI, LADDER_P_X, BUF_LO, BUF_HI);
+	uop_move(BANK_HI, CONST_ONE,  CONST_ZERO, BANK_LO, LADDER_R0_X, LADDER_R0_Z, BUF_LO, BUF_HI);
+	uop_move(BANK_HI, LADDER_P_X, CONST_ONE,  BANK_LO, LADDER_R1_X, LADDER_R1_Z, BUF_LO, BUF_HI);
+
+		// ladder
+	s = false;
+	for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+	{
+		for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+		{
+			k_word = K->words[word_count - 1] >> (bit_count - 1);	// current word
+			k_bit = (k_word & (FPGA_WORD)1) == 1;					// current bit
+
+				// inputs are all in LO: R0_X, R0_Z, R1_X, R1_Z
+
+				// swap if needed
+			if (s == k_bit)
+			{	uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI);	// HI: T0_X, T0_Z = LO: R0_X, R0_Z
+				uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI);	// HI: T1_X, T1_Z = LO: R1_X, R1_Z
+			}
+			else
+			{	uop_move(BANK_LO, LADDER_R1_X, LADDER_R1_Z, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BUF_LO, BUF_HI);	// HI: T0_X, T0_Z = LO: R1_X, R1_Z
+				uop_move(BANK_LO, LADDER_R0_X, LADDER_R0_Z, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BUF_LO, BUF_HI);	// HI: T1_X, T1_Z = LO: R0_X, R0_Z
+			}
+
+				// remember whether we actually did the swap
+			s = k_bit;
+
+				// run step
+			uop_calc(ADD, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_S0,   BUF_LO, BUF_HI, MOD_2P);	// LO: S0 = HI: T0_X + T0_Z
+			uop_calc(ADD, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_S1,   BUF_LO, BUF_HI, MOD_2P);	// LO: S1 = HI: T1_X + T1_Z
+			uop_calc(SUB, BANK_HI, LADDER_T0_X, LADDER_T0_Z, BANK_LO, LADDER_D0,   BUF_LO, BUF_HI, MOD_2P);	// LO: D0 = HI: T0_X - T0_Z
+			uop_calc(SUB, BANK_HI, LADDER_T1_X, LADDER_T1_Z, BANK_LO, LADDER_D1,   BUF_LO, BUF_HI, MOD_2P);	// LO: D1 = HI: T1_X - T1_Z
+
+			uop_calc(MUL, BANK_LO, LADDER_S0,   LADDER_S0,   BANK_HI, LADDER_QS0,  BUF_LO, BUF_HI, MOD_2P);	// HI: QS0  = LO: S0 * S0
+			uop_calc(MUL, BANK_LO, LADDER_D0,   LADDER_D0,   BANK_HI, LADDER_QD0,  BUF_LO, BUF_HI, MOD_2P);	// HI: QD0  = LO: D0 * D0
+			uop_calc(MUL, BANK_LO, LADDER_S0,   LADDER_D1,   BANK_HI, LADDER_S0D1, BUF_LO, BUF_HI, MOD_2P);	// HI: S0D1 = LO: S0 * D1
+			uop_calc(MUL, BANK_LO, LADDER_S1,   LADDER_D0,   BANK_HI, LADDER_S1D0, BUF_LO, BUF_HI, MOD_2P);	// HI: S1D0 = LO: S1 * D0
+
+			uop_calc(ADD, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TS,   BUF_LO, BUF_HI, MOD_2P);	// LO: TS = HI: S1D0 + S0D1
+			uop_calc(SUB, BANK_HI, LADDER_S1D0, LADDER_S0D1, BANK_LO, LADDER_TD,   BUF_LO, BUF_HI, MOD_2P);	// LO: TD = HI: S1D0 - S0D1
+
+			uop_calc(MUL, BANK_LO, LADDER_TD,   LADDER_TD,   BANK_HI, LADDER_QTD,  BUF_LO, BUF_HI, MOD_2P);	// HI: QTD = LO: TD * TD
+
+			uop_calc(SUB, BANK_HI, LADDER_QS0,  LADDER_QD0,  BANK_LO, LADDER_T0,   BUF_LO, BUF_HI, MOD_2P);	// LO: T0 = HI: QS0 - QD0
+			uop_calc(MUL, BANK_LO, LADDER_T0,   CONST_A24,   BANK_HI, LADDER_TA,   BUF_LO, BUF_HI, MOD_2P);	// HI: TA = LO: T0 * A24
+			uop_calc(ADD, BANK_HI, LADDER_TA,   LADDER_QD0,  BANK_LO, LADDER_T1,   BUF_LO, BUF_HI, MOD_2P);	// LO: T1 = HI: TA * QD0
+			
+			uop_calc(MUL, BANK_HI, LADDER_QS0,  LADDER_QD0,  BANK_LO, LADDER_R0_X, BUF_LO, BUF_HI, MOD_2P);	// LO: R0_X = HI: QS0 * QD0
+			uop_calc(MUL, BANK_LO, LADDER_T0,   LADDER_T1,   BANK_HI, LADDER_R0_Z, BUF_LO, BUF_HI, MOD_2P);	// HI: R0_Z = LO: T0 * T1
+			uop_calc(MUL, BANK_LO, LADDER_TS,   LADDER_TS,   BANK_HI, LADDER_R1_X, BUF_LO, BUF_HI, MOD_2P);	// HI: R1_X = LO: TS * TS
+			uop_calc(MUL, BANK_HI, LADDER_P_X,  LADDER_QTD,  BANK_LO, LADDER_R1_Z, BUF_LO, BUF_HI, MOD_2P);	// LO: R1_Z = HI: PX * QTD
+
+			uop_move(BANK_HI, LADDER_R0_Z, LADDER_R1_X, BANK_LO, LADDER_R0_Z, LADDER_R1_X, BUF_LO, BUF_HI);	// LO: R0_Z, R1_X = HI: R0_Z, R1_X
+		}
+	}
+
+		// inversion expects result to be in LO: T1
+	uop_move(BANK_HI, LADDER_R0_Z, LADDER_R0_Z, BANK_LO, INVERT_T_1, INVERT_T_1, BUF_LO, BUF_HI);	
+
+		// just call piece of microcode
+	fpga_modular_inv_microcode(BUF_LO, BUF_HI);
+	
+		// inversion places result in HI: R1
+	uop_move(BANK_HI, INVERT_R1, INVERT_R1, BANK_LO, INVERT_R1, INVERT_R1, BUF_LO, BUF_HI);
+	uop_calc(MUL, BANK_LO, INVERT_R1, LADDER_R0_X, BANK_HI, INVERT_R2, BUF_LO, BUF_HI, MOD_2P);
+
+		// finally reduce to just 1*P
+	uop_calc(ADD, BANK_HI, INVERT_R2, CONST_ZERO, BANK_LO, INVERT_R1, BUF_LO, BUF_HI, MOD_1P);	// !!!
+
+		// store result
+	uop_stor(BUF_LO, BUF_HI, BANK_LO, INVERT_R1, QX);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------



More information about the Commits mailing list