[Cryptech-Commits] [user/shatov/x25519_fpga_model] branch master updated: Working C model of how an FPGA would do X25519.

git at cryptech.is git at cryptech.is
Fri Jun 1 11:53:23 UTC 2018
Next message (by thread): [Cryptech-Commits] [user/shatov/x25519_fpga_model] branch master updated (43110be -> 23cc981)
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/x25519_fpga_model.

The following commit(s) were added to refs/heads/master by this push:
     new 43110be  Working C model of how an FPGA would do X25519.
43110be is described below

commit 43110befcad2db81ba1051e3e2f09bbb9406ea8d
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Fri Jun 1 14:51:50 2018 +0300

    Working C model of how an FPGA would do X25519.
---
 x25519_fpga_curve.h            |  78 ++++++
 x25519_fpga_curve_abstract.cpp | 309 ++++++++++++++++++++++++
 x25519_fpga_lowlevel.cpp       | 136 +++++++++++
 x25519_fpga_lowlevel.h         |  73 ++++++
 x25519_fpga_model.cpp          | 228 ++++++++++++++++++
 x25519_fpga_model.h            |  52 ++++
 x25519_fpga_modular.cpp        | 521 +++++++++++++++++++++++++++++++++++++++++
 x25519_fpga_modular.h          |  76 ++++++
 x25519_fpga_multiword.cpp      |  89 +++++++
 x25519_fpga_multiword.h        |  78 ++++++
 10 files changed, 1640 insertions(+)

diff --git a/x25519_fpga_curve.h b/x25519_fpga_curve.h
new file mode 100644
index 0000000..be647c5
--- /dev/null
+++ b/x25519_fpga_curve.h
@@ -0,0 +1,78 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve.h
+// ---------------------------------------------------
+// Elliptic curve arithmetic procedures for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Curve25519 Parameters
+//------------------------------------------------------------------------------
+
+/* x-coordinate of the base point */
+#define X25519_G_X_INIT		{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+							 0x00000000, 0x00000000, 0x00000000, 0x00000009}
+
+/* coefficient (A + 2) / 4 */
+#define X25519_A24_INIT		{0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+							 0x00000000, 0x00000000, 0x00000000, 0x0001DB42}
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER X25519_G_X;	// the base point
+extern FPGA_BUFFER X25519_A24;	// coefficient (A + 2) / 4
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_curve_init				();
+
+void	fpga_curve_scalar_multiply	(FPGA_BUFFER *P_X, FPGA_BUFFER *K, FPGA_BUFFER *Q_X);
+
+void	fpga_curve_ladder_step		(FPGA_BUFFER *P_X,
+									 FPGA_BUFFER *R0_X_in,  FPGA_BUFFER *R0_Z_in,
+									 FPGA_BUFFER *R1_X_in,  FPGA_BUFFER *R1_Z_in,
+									 FPGA_BUFFER *R0_X_out, FPGA_BUFFER *R0_Z_out,
+									 FPGA_BUFFER *R1_X_out, FPGA_BUFFER *R1_Z_out);
+
+void	fpga_curve_to_affine		(FPGA_BUFFER *P_X, FPGA_BUFFER *P_Z,
+									 FPGA_BUFFER *Q_X);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_curve_abstract.cpp b/x25519_fpga_curve_abstract.cpp
new file mode 100644
index 0000000..b182ca3
--- /dev/null
+++ b/x25519_fpga_curve_abstract.cpp
@@ -0,0 +1,309 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_curve_abstract.cpp
+// ---------------------------------------------------
+// Elliptic curve arithmetic procedures for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdint.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER X25519_G_X;		// x-coordinate of the base point
+FPGA_BUFFER X25519_A24;		// coefficient (A + 2) / 4
+
+
+//------------------------------------------------------------------------------
+void fpga_curve_init()
+//------------------------------------------------------------------------------
+{
+	int w_src, w_dst;	// word counters
+
+	FPGA_WORD TMP_G_X[FPGA_OPERAND_NUM_WORDS]	= X25519_G_X_INIT;
+	FPGA_WORD TMP_A24[FPGA_OPERAND_NUM_WORDS]	= X25519_A24_INIT;
+
+		/* fill buffers for large multi-word integers */
+	for (	w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1;
+			w_src < FPGA_OPERAND_NUM_WORDS;
+			w_src++, w_dst--)
+	{
+		X25519_G_X.words[w_dst]	= TMP_G_X[w_src];
+		X25519_A24.words[w_dst] = TMP_A24[w_src];
+	}
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Elliptic curve point scalar multiplication routine.
+//
+// This uses the Montgomery ladder to do the multiplication and then
+// converts the result to affine coordinates.
+//
+// The algorithm is based on Algorithm 3 from "How to (pre-)compute a ladder"
+// https://eprint.iacr.org/2017/264.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_scalar_multiply(FPGA_BUFFER *PX, FPGA_BUFFER *K, FPGA_BUFFER *QX)
+//------------------------------------------------------------------------------
+{
+	int word_count, bit_count;	// counters
+
+		// temporary buffers
+	FPGA_BUFFER R0_X;
+	FPGA_BUFFER R0_Z;
+	FPGA_BUFFER R1_X;
+	FPGA_BUFFER R1_Z;
+
+	FPGA_BUFFER T0_X;
+	FPGA_BUFFER T0_Z;
+	FPGA_BUFFER T1_X;
+	FPGA_BUFFER T1_Z;
+
+		// initialization
+	fpga_multiword_copy(&X25519_ONE,  &R0_X);
+	fpga_multiword_copy(&X25519_ZERO, &R0_Z);
+	fpga_multiword_copy(PX,           &R1_X);
+	fpga_multiword_copy(&X25519_ONE,  &R1_Z);
+
+		// handy vars
+	FPGA_WORD k_word;
+	bool k_bit, r_swap = false;
+
+		// multiply
+	for (word_count=FPGA_OPERAND_NUM_WORDS; word_count>0; word_count--)
+	{
+		for (bit_count=FPGA_WORD_WIDTH; bit_count>0; bit_count--)
+		{
+				// get current bit of K
+			k_word = K->words[word_count - 1] >> (bit_count - 1);
+			k_bit = (k_word & (FPGA_WORD)1) == 1;
+
+				// we feed either R0, R1 or R1, R0 into the ladder
+			fpga_multiword_copy(r_swap == k_bit ? &R0_X : &R1_X, &T0_X);
+			fpga_multiword_copy(r_swap == k_bit ? &R0_Z : &R1_Z, &T0_Z);
+			fpga_multiword_copy(r_swap == k_bit ? &R1_X : &R0_X, &T1_X);
+			fpga_multiword_copy(r_swap == k_bit ? &R1_Z : &R0_Z, &T1_Z);
+
+				// remember whether we did swapping
+			r_swap = k_bit;
+
+				// montgomery ladder step
+			fpga_curve_ladder_step(	PX,
+									&T0_X, &T0_Z, &T1_X, &T1_Z,
+									&R0_X, &R0_Z, &R1_X, &R1_Z);
+		}
+	}
+
+		// since the lower three bits of the private key are always ...000,
+		// the result is in R0_X, R0_Z and 
+
+		// now conversion to affine coordinates
+	fpga_curve_to_affine(&R0_X, &R0_Z, &T0_X);
+
+		// so far we've done everything modulo 2*P, we now need
+		// to do final reduction modulo P, this can be done using
+		// our modular adder this way:
+	fpga_modular_add(&T0_X, &X25519_ZERO, QX, &X25519_1P);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Montgomery Ladder Step
+//
+// There are many papers describing Montgomery ladder, this particular
+// implementation is based on Algorithm 2 from "Fast elliptic-curve
+// cryptography on the Cell Broadband Engine" by Neil Costigan and Peter
+// Schwabe
+// https://cryptojedi.org/papers/celldh-20090107.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_curve_ladder_step	(FPGA_BUFFER *PX,
+							 FPGA_BUFFER *R0X_in,  FPGA_BUFFER *R0Z_in,
+							 FPGA_BUFFER *R1X_in,  FPGA_BUFFER *R1Z_in,
+							 FPGA_BUFFER *R0X_out, FPGA_BUFFER *R0Z_out,
+							 FPGA_BUFFER *R1X_out, FPGA_BUFFER *R1Z_out)
+//------------------------------------------------------------------------------
+{
+	FPGA_BUFFER S0, S1;
+	FPGA_BUFFER D0, D1;
+	FPGA_BUFFER QS0, QD0;
+	FPGA_BUFFER S0D1, S1D0;
+	FPGA_BUFFER TS, TD;
+	FPGA_BUFFER QTD;
+	FPGA_BUFFER T0, TA, T1;
+
+	fpga_modular_add(R0X_in, R0Z_in,      &S0,     &X25519_2P);
+	fpga_modular_add(R1X_in, R1Z_in,      &S1,     &X25519_2P);
+	fpga_modular_sub(R0X_in, R0Z_in,      &D0,     &X25519_2P);
+	fpga_modular_sub(R1X_in, R1Z_in,      &D1,     &X25519_2P);
+	//
+	fpga_modular_mul(&S0,    &S0,         &QS0,    &X25519_2P);
+	fpga_modular_mul(&D0,    &D0,         &QD0,    &X25519_2P);
+	fpga_modular_mul(&S0,    &D1,         &S0D1,   &X25519_2P);
+	fpga_modular_mul(&S1,    &D0,         &S1D0,   &X25519_2P);
+	//
+	fpga_modular_add(&S1D0,  &S0D1,       &TS,     &X25519_2P);
+	fpga_modular_sub(&S1D0,  &S0D1,       &TD,     &X25519_2P);
+	//
+	fpga_modular_mul(&TD,    &TD,         &QTD,    &X25519_2P);
+	//
+	fpga_modular_sub(&QS0,   &QD0,        &T0,     &X25519_2P);
+	fpga_modular_mul(&T0,    &X25519_A24, &TA,     &X25519_2P);
+	fpga_modular_add(&TA,    &QD0,        &T1,     &X25519_2P);
+	//
+	fpga_modular_mul(&QS0,   &QD0,        R0X_out, &X25519_2P);
+	fpga_modular_mul(&T0,    &T1,         R0Z_out, &X25519_2P);
+	fpga_modular_mul(&TS,    &TS,         R1X_out, &X25519_2P);
+	fpga_modular_mul(PX,     &QTD,        R1Z_out, &X25519_2P);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Convertion to affine coordinates.
+//
+// This uses Fermat's little theorem to calculate Q_X = P_X * P_Z ^ -1.
+//
+// The corresponding addition chain which Bernstein calls "straightforward
+// sequence of 254 squarings and 11 multiplications" in his original paper was
+// taken from here:
+// https://briansmith.org/ecc-inversion-addition-chains-01
+//
+//------------------------------------------------------------------------------
+void fpga_curve_to_affine	(FPGA_BUFFER *P_X, FPGA_BUFFER *P_Z,
+							 FPGA_BUFFER *Q_X)
+//------------------------------------------------------------------------------
+{
+	int cyc_count;	// counter
+
+		// temporary variables
+	FPGA_BUFFER R1, R2;
+	FPGA_BUFFER T_1;
+	FPGA_BUFFER T_10;
+	FPGA_BUFFER T_1001;
+	FPGA_BUFFER T_1011;
+	FPGA_BUFFER T_X5;
+	FPGA_BUFFER T_X10;
+	FPGA_BUFFER T_X20;
+	FPGA_BUFFER T_X40;
+	FPGA_BUFFER T_X50;
+	FPGA_BUFFER T_X100;
+
+		// let's go
+	fpga_multiword_copy(P_Z, &T_1);
+	//
+	fpga_modular_mul(&T_1, &T_1, &T_10, &X25519_2P);
+	//
+	fpga_modular_mul(&T_10, &T_10, &R1, &X25519_2P);
+	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+	fpga_modular_mul(&R2, &T_1, &T_1001, &X25519_2P);
+	//
+	fpga_modular_mul(&T_1001, &T_10, &T_1011, &X25519_2P);
+	//
+	fpga_modular_mul(&T_1011, &T_1011, &R1, &X25519_2P);
+	fpga_modular_mul(&R1, &T_1001, &T_X5, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X5, &R1);
+	for (cyc_count=0; cyc_count<5; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	fpga_modular_mul(&R2, &T_X5, &T_X10, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X10, &R1);
+	for (cyc_count=0; cyc_count<10; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	fpga_modular_mul(&R1, &T_X10, &T_X20, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X20, &R1);
+	for (cyc_count=0; cyc_count<20; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	fpga_modular_mul(&R1, &T_X20, &T_X40, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X40, &R1);
+	for (cyc_count=0; cyc_count<10; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	fpga_modular_mul(&R1, &T_X10, &T_X50, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X50, &R1);
+	for (cyc_count=0; cyc_count<50; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	fpga_modular_mul(&R1, &T_X50, &T_X100, &X25519_2P);
+	//
+	fpga_multiword_copy(&T_X100, &R1);
+	for (cyc_count=0; cyc_count<100; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	//
+	fpga_modular_mul(&R1, &T_X100, &R2, &X25519_2P);
+	//
+	for (cyc_count=0; cyc_count<50; cyc_count++)
+	{	if ((cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);	// inverse order here!
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	//
+	fpga_modular_mul(&R2, &T_X50, &R1, &X25519_2P);
+	//
+	for (cyc_count=0; cyc_count<5; cyc_count++)
+	{	if (!(cyc_count % 2))	fpga_modular_mul(&R1, &R1, &R2, &X25519_2P);
+		else					fpga_modular_mul(&R2, &R2, &R1, &X25519_2P);
+	}
+	//
+	fpga_modular_mul(&R2, &T_1011, &R1, &X25519_2P);
+	//
+	fpga_modular_mul(P_X, &R1, Q_X, &X25519_2P);
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_lowlevel.cpp b/x25519_fpga_lowlevel.cpp
new file mode 100644
index 0000000..e92c882
--- /dev/null
+++ b/x25519_fpga_lowlevel.cpp
@@ -0,0 +1,136 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_lowlevel.cpp
+// -----------------------------------
+// Models of low-level FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdint.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 32-bit adder with carry input and carry output.
+//
+// Carries are 1 bit wide.
+//
+// {c_out, s} = x + y + c_in
+//
+//------------------------------------------------------------------------------
+void fpga_lowlevel_add32(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out)
+//------------------------------------------------------------------------------
+{
+		// calculate sum
+	FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x + (FPGA_WORD_EXTENDED)y;
+
+		// process carry input
+	if (c_in) r++;
+
+		// store sum
+	*s = (FPGA_WORD)r;
+
+		// shift sum to obtain carry flag
+	r >>= FPGA_WORD_WIDTH;
+
+		// store carry
+	*c_out = (r != 0);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 32-bit subtractor with borrow input and borrow output.
+//
+// Borrows are 1 bit wide.
+//
+// {b_out, d} = x - y - b_in
+//
+void fpga_lowlevel_sub32(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out)
+//------------------------------------------------------------------------------
+{
+		// calculate difference
+	FPGA_WORD_EXTENDED r = (FPGA_WORD_EXTENDED)x - (FPGA_WORD_EXTENDED)y;
+
+		// process borrow input
+	if (b_in) r--;
+
+		// store difference
+	*d = (FPGA_WORD)r;
+
+		// shift difference to obtain borrow flag
+	r >>= FPGA_WORD_WIDTH;
+
+		// store borrow
+	*b_out = (r != 0);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level 16x16-bit multiplier.
+//
+// Inputs are 16 bits wide, output is 32 bits wide.
+//
+// p = x * y
+//
+void fpga_lowlevel_mul16(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p)
+//------------------------------------------------------------------------------
+{
+		// multiply and store result
+	*p = (FPGA_WORD)x * (FPGA_WORD)y;
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Low-level wide 47-bit adder without carry logic.
+//
+// s = (x + y)[47:0]
+//
+void fpga_lowlevel_add47(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s)
+//------------------------------------------------------------------------------
+{
+		// add and store result truncated to 47 bits
+	*s = (x + y) & FPGA_MASK_ADDER47;
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_lowlevel.h b/x25519_fpga_lowlevel.h
new file mode 100644
index 0000000..28bd48b
--- /dev/null
+++ b/x25519_fpga_lowlevel.h
@@ -0,0 +1,73 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_lowlevel.h
+// -----------------------------------
+// Models of low-level FPGA primitives
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// FPGA Pipeline Settings
+//------------------------------------------------------------------------------
+#define FPGA_WORD_WIDTH				(32)
+
+
+//------------------------------------------------------------------------------
+// Word Types (Normal 32-bit, Extended 47-bit, Reduced 16-bit)
+//------------------------------------------------------------------------------
+typedef uint32_t FPGA_WORD;
+typedef uint64_t FPGA_WORD_EXTENDED;
+typedef uint16_t FPGA_WORD_REDUCED;
+
+
+//------------------------------------------------------------------------------
+// Wide Adder Mask
+//------------------------------------------------------------------------------
+#define FPGA_MASK_ADDER47	((FPGA_WORD_EXTENDED)0x00007FFFFFFFFFFF)
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_lowlevel_add32		(FPGA_WORD x, FPGA_WORD y, bool c_in, FPGA_WORD *s, bool *c_out);
+void	fpga_lowlevel_sub32		(FPGA_WORD x, FPGA_WORD y, bool b_in, FPGA_WORD *d, bool *b_out);
+
+void	fpga_lowlevel_mul16		(FPGA_WORD_REDUCED x, FPGA_WORD_REDUCED y, FPGA_WORD *p);
+
+void	fpga_lowlevel_add47		(FPGA_WORD_EXTENDED x, FPGA_WORD_EXTENDED y, FPGA_WORD_EXTENDED *s);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_model.cpp b/x25519_fpga_model.cpp
new file mode 100644
index 0000000..1f2101a
--- /dev/null
+++ b/x25519_fpga_model.cpp
@@ -0,0 +1,228 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_model.cpp
+// --------------------------------------------
+// X25519 Model
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_model_init			();
+
+bool	test_point_multiplier	(FPGA_BUFFER *px, FPGA_BUFFER *k, FPGA_BUFFER *qx);
+
+void	print_fpga_buffer		(const char *s, FPGA_BUFFER *v);
+bool	compare_fpga_buffers	(FPGA_BUFFER *ax, FPGA_BUFFER *bx);
+
+
+//------------------------------------------------------------------------------
+// Locals
+//------------------------------------------------------------------------------
+static FPGA_BUFFER x25519_da, x25519_db;
+static FPGA_BUFFER x25519_qa_x, x25519_qb_x;
+static FPGA_BUFFER x25519_qab_x;
+
+
+//------------------------------------------------------------------------------
+int main()
+//------------------------------------------------------------------------------
+{
+	bool	ok_a, ok_b;		// flags
+
+
+		//
+		// initialize buffers
+		//
+	fpga_multiword_init();
+	fpga_modular_init();
+	fpga_curve_init();
+	fpga_model_init();
+
+
+		//
+		// test point multiplier: QA = dA * G
+		//                        QB = dB * G
+		//
+	printf("Trying to derive public keys from private keys...\n\n");
+	ok_a = test_point_multiplier(&X25519_G_X, &x25519_da, &x25519_qa_x);
+	ok_b = test_point_multiplier(&X25519_G_X, &x25519_db, &x25519_qb_x);
+	if (!ok_a || !ok_b) return EXIT_FAILURE;
+
+
+		//
+		// test point multiplier: QAB = dA * QB
+		//						  QAB = dB * QA
+		//
+	printf("Trying to derive shared secret key...\n\n");
+	ok_a = test_point_multiplier(&x25519_qb_x, &x25519_da, &x25519_qab_x);
+	ok_b = test_point_multiplier(&x25519_qa_x, &x25519_db, &x25519_qab_x);
+	if (!ok_a || !ok_b) return EXIT_FAILURE;
+
+
+		//
+		// everything went just fine
+		//
+	return EXIT_SUCCESS;
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_model_init()
+//------------------------------------------------------------------------------
+{
+	int w_src, w_dst;	// word counters
+
+	FPGA_WORD tmp_da[FPGA_OPERAND_NUM_WORDS]	= X25519_DA;
+	FPGA_WORD tmp_db[FPGA_OPERAND_NUM_WORDS]	= X25519_DB;
+
+	FPGA_WORD tmp_qa_x[FPGA_OPERAND_NUM_WORDS]	= X25519_QA_X;
+	FPGA_WORD tmp_qb_x[FPGA_OPERAND_NUM_WORDS]	= X25519_QB_X;
+
+	FPGA_WORD tmp_qab_x[FPGA_OPERAND_NUM_WORDS]	= X25519_QAB_X;
+
+		/* fill buffers for large multi-word integers */
+	for (	w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1;
+			w_src < FPGA_OPERAND_NUM_WORDS;
+			w_src++, w_dst--)
+	{	
+		x25519_da.words[w_dst]	= tmp_da[w_src];
+		x25519_db.words[w_dst]	= tmp_db[w_src];
+
+		x25519_qa_x.words[w_dst] = tmp_qa_x[w_src];
+		x25519_qb_x.words[w_dst] = tmp_qb_x[w_src];
+
+		x25519_qab_x.words[w_dst] = tmp_qab_x[w_src];
+	}
+}
+
+
+//------------------------------------------------------------------------------
+bool test_point_multiplier(FPGA_BUFFER *px, FPGA_BUFFER *k, FPGA_BUFFER *qx)
+//------------------------------------------------------------------------------
+//
+// (px, ...) - multiplicand
+// k - multiplier
+//
+// (qx, ...) - expected coordinates of product
+//
+// Returns true when point (rx, ...) = k * P matches the point (qx, ...).
+//
+//------------------------------------------------------------------------------
+{
+	bool ok;			// flag
+	FPGA_BUFFER rx;		// result
+
+		/* run the model */
+	fpga_curve_scalar_multiply(px, k, &rx);
+
+		/* handle result */
+	ok = compare_fpga_buffers(qx, &rx);
+	if (!ok)
+	{	printf("\n    ERROR\n\n");
+		return false;
+	}
+	else printf("\n    OK\n\n");
+
+		// everything went just fine
+	return true;
+}
+
+
+//------------------------------------------------------------------------------
+bool compare_fpga_buffers(FPGA_BUFFER *ax, FPGA_BUFFER *bx)
+//------------------------------------------------------------------------------
+//
+// Compare affine coordinates of two points and return true when they match.
+//
+//------------------------------------------------------------------------------
+{
+	int w;	// word counter
+
+		// print all the values
+	print_fpga_buffer("  Expected:   X = ", ax);
+	print_fpga_buffer("  Calculated: X = ", bx);
+
+		// compare values
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+	{
+			// compare x
+		if (ax->words[w] != bx->words[w]) return false;
+	}
+
+		// values are the same
+	return true;
+}
+
+
+//------------------------------------------------------------------------------
+void print_fpga_buffer(const char *s, FPGA_BUFFER *buf)
+//------------------------------------------------------------------------------
+//
+// Pretty print large multi-word integer.
+//
+//------------------------------------------------------------------------------
+{
+	int w;	// word counter
+
+		// print header
+	printf("%s", s);
+
+		// print all bytes
+	for (w=FPGA_OPERAND_NUM_WORDS; w>0; w--)
+	{	
+		printf("%08x", buf->words[w-1]);
+
+			// insert space after every group of 4 bytes
+		if (w > 1) printf(" ");
+	}
+
+		// print footer
+	printf("\n");
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_model.h b/x25519_fpga_model.h
new file mode 100644
index 0000000..e378cc1
--- /dev/null
+++ b/x25519_fpga_model.h
@@ -0,0 +1,52 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_model.h
+// --------------------------------------------
+// X25519 FPGA Model
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include "test_vectors\x25519_test_vector.h"
+#include "x25519_fpga_lowlevel.h"
+#include "x25519_fpga_multiword.h"
+#include "x25519_fpga_modular.h"
+#include "x25519_fpga_curve.h"
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_modular.cpp b/x25519_fpga_modular.cpp
new file mode 100644
index 0000000..90b96dc
--- /dev/null
+++ b/x25519_fpga_modular.cpp
@@ -0,0 +1,521 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_modular.cpp
+// ------------------------------------------
+// Modular arithmetic routines for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdint.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER X25519_1P;
+FPGA_BUFFER X25519_2P;
+
+
+//------------------------------------------------------------------------------
+void fpga_modular_init()
+//------------------------------------------------------------------------------
+{
+	int w_src, w_dst;	// word counters
+
+		// temporary things
+	FPGA_WORD TMP_1P[FPGA_OPERAND_NUM_WORDS]	= X25519_1P_INIT;
+	FPGA_WORD TMP_2P[FPGA_OPERAND_NUM_WORDS]	= X25519_2P_INIT;
+
+		/* fill buffers for large multi-word integers, we need to fill them in
+		 * reverse order because of the way C arrays are initialized
+		 */
+	for (	w_src = 0, w_dst = FPGA_OPERAND_NUM_WORDS - 1;
+			w_src < FPGA_OPERAND_NUM_WORDS;
+			w_src++, w_dst--)
+	{
+		X25519_1P.words[w_dst]	= TMP_1P[w_src];
+		X25519_2P.words[w_dst]	= TMP_2P[w_src];
+	}
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular addition.
+//
+// This routine implements algorithm 3. from "Ultra High Performance ECC over
+// NIST Primes on Commercial FPGAs".
+//
+// s = (a + b) mod q
+//
+// The naive approach is like the following:
+//
+// 1. s = a + b
+// 2. if (s >= q) s -= q
+//
+// The speed-up trick is to simultaneously calculate (a + b) and (a + b - q)
+// and then select the right variant.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_add(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *S, FPGA_BUFFER *N)
+//------------------------------------------------------------------------------
+{
+	int w;					// word counter
+	FPGA_BUFFER AB, AB_N;	// intermediate buffers
+	bool c_in, c_out;		// carries
+	bool b_in, b_out;		// borrows
+
+	c_in = false;			// first word has no carry
+	b_in = false;			// first word has no borrow
+	
+		// run parallel addition and subtraction
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+	{
+		fpga_lowlevel_add32(A->words[w], B->words[w], c_in, &AB.words[w],   &c_out);
+		fpga_lowlevel_sub32(AB.words[w], N->words[w], b_in, &AB_N.words[w], &b_out);
+
+		c_in = c_out;	// propagate carry
+		b_in = b_out;	// propagate borrow
+	}
+
+		// now select the right buffer
+
+		/*
+		 * We select the right variant based on borrow and carry flags after
+		 * addition and subtraction of the very last pair of words. Note, that
+		 * we only need to select the first variant (a + b) when (a + b) < q.
+		 * This way if we get negative number after subtraction, we discard it
+		 * and use the output of the adder instead. The subtractor output is
+		 * negative when borrow flag is set *and* carry flag is not set. When
+		 * both borrow and carry are set, the number is non-negative, because
+		 * borrow and carry cancel each other out.
+		 */
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+		S->words[w] = (b_out && !c_out) ? AB.words[w] : AB_N.words[w];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular subtraction.
+//
+// This routine implements algorithm 3. from "Ultra High Performance ECC over
+// NIST Primes on Commercial FPGAs".
+//
+// d = (a - b) mod q
+//
+// The naive approach is like the following:
+//
+// 1. d = a - b
+// 2. if (a < b) d += q
+//
+// The speed-up trick is to simultaneously calculate (a - b) and (a - b + q)
+// and then select the right variant.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_sub(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *D, FPGA_BUFFER *N)
+//------------------------------------------------------------------------------
+{
+	int w;					// word counter
+	FPGA_BUFFER AB, AB_N;	// intermediate buffers
+	bool c_in, c_out;		// carries
+	bool b_in, b_out;		// borrows
+
+	c_in = false;			// first word has no carry
+	b_in = false;			// first word has no borrow
+	
+		// run parallel subtraction and addition
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+	{
+		fpga_lowlevel_sub32(A->words[w], B->words[w], b_in, &AB.words[w],   &b_out);
+		fpga_lowlevel_add32(AB.words[w], N->words[w], c_in, &AB_N.words[w], &c_out);
+
+		b_in = b_out;	// propagate borrow
+		c_in = c_out;	// propagate carry
+	}
+
+		// now select the right buffer
+
+		/*
+		 * We select the right variant based on borrow flag after subtraction
+		 * and addition of the very last pair of words. Note, that we only
+		 * need to select the second variant (a - b + q) when a < b. This way
+		 * if we get negative number after subtraction, we discard it
+		 * and use the output of the adder instead. The Subtractor output is
+		 * negative when borrow flag is set.
+		 */
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+		D->words[w] = b_out ? AB_N.words[w] : AB.words[w];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular multiplication for Curve25519.
+//
+// p = (a * b) mod q
+//
+// The complex algorithm is split into three parts:
+//
+// 1. Calculation of partial words
+// 2. Acccumulation of partial words into full-size product
+// 3. Modular reduction of the full-size product
+//
+// See comments for corresponding helper routines for more information.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *P, FPGA_BUFFER *N)
+//------------------------------------------------------------------------------
+{
+	FPGA_WORD_EXTENDED SI[4*FPGA_OPERAND_NUM_WORDS-1];	// parts of intermediate product
+	FPGA_WORD C[2*FPGA_OPERAND_NUM_WORDS];				// full-size intermediate product
+
+		/* multiply to get partial words */
+	fpga_modular_mul_helper_multiply(A, B, SI);
+
+		/* accumulate partial words into full-size product */
+	fpga_modular_mul_helper_accumulate(SI, C);
+
+		/* reduce full-size product using special routine */
+	fpga_modular_mul_helper_reduce(C, P, N);
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Modular reduction for Curve25519.
+//
+// Note, that this routine reduces 512-bit product modulo 2*P, i.e.
+// 2 * (2^255 -19) = 2^256 - 19. It is computationally more effective to not
+// fully reduce the result until the very end of X25519 calculation.
+//
+// See the "Special Reduction" section of "High-Performance Modular
+// Multiplication on the Cell Processor" by Joppe W. Bos for more information
+// about the math behind reduction: http://joppebos.com/files/waifi09.pdf
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul_helper_reduce(FPGA_WORD *C, FPGA_BUFFER *P, FPGA_BUFFER *N)
+//------------------------------------------------------------------------------
+{
+	int w;	// word counter
+
+		// handy vars
+	FPGA_WORD y;
+	FPGA_WORD x1_msb, x1_lsb;
+	FPGA_WORD x2_msb, x2_lsb;
+	FPGA_WORD x5_msb, x5_lsb;
+	FPGA_WORD n_word;
+
+		// S1 is 262-bit result after the first reduction attempt
+		// S2 is 257-bit result after the second reduction attempt
+	FPGA_WORD S1[FPGA_OPERAND_NUM_WORDS + 1];
+	FPGA_WORD S2[FPGA_OPERAND_NUM_WORDS + 1];
+
+		// carries during the first and the second stages
+	FPGA_WORD_REDUCED t_carry1;
+	FPGA_WORD_EXTENDED t_carry2;
+
+		// borrows for the final stage
+	bool b_in, b_out;
+
+		// temporary result of the final stage
+	FPGA_WORD S2_N[FPGA_OPERAND_NUM_WORDS + 1];
+
+		// outputs of adders
+	FPGA_WORD_EXTENDED T1[FPGA_OPERAND_NUM_WORDS + 1];
+	FPGA_WORD_EXTENDED T2[FPGA_OPERAND_NUM_WORDS + 1];
+	FPGA_WORD_EXTENDED T3[FPGA_OPERAND_NUM_WORDS + 1];
+	FPGA_WORD_EXTENDED T4[FPGA_OPERAND_NUM_WORDS + 1];
+
+		// parts of full input product
+	FPGA_BUFFER P_LO, P_HI;
+
+		// split 512-bit input C into two 256-bit parts
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+	{
+		P_LO.words[w] = C[w];
+		P_HI.words[w] = C[FPGA_OPERAND_NUM_WORDS + w];
+	}
+
+		/* We need to calculate S1 = P_HI * 38 + P_LO, this is done using
+		 * additions instead of multiplications, because our low-level
+		 * multiplier can only process 16 bits at a time, while an adder
+		 * can do 47. This is done by replacing 38 with 32 + 4 + 2 = 
+		 * 2^5 + 2^2 + 2^1, this way:
+		 * S1 = P_LO + (P_HI << 5) + (P_HI << 2) + (P_HI << 1)
+		 */
+
+		/* for every word we need to calculate a sum of five values: three
+		 * shifted copies of P_HI[w], P_LO[w] and carry from the previous word.
+		 * This is done using four adders in a pipelined fashion.
+		 */
+
+	t_carry1 = 0;	// no carry in the very first word
+	for (w=0; w<(FPGA_OPERAND_NUM_WORDS+1); w++)
+	{
+			// upper parts of shifted copies of P_HI[w-i] (from the previous cycle)
+		x1_msb = (w > 0) ? (FPGA_WORD)(P_HI.words[w-1] >> (32 - 1)) : 0;
+		x2_msb = (w > 0) ? (FPGA_WORD)(P_HI.words[w-1] >> (32 - 2)) : 0;
+		x5_msb = (w > 0) ? (FPGA_WORD)(P_HI.words[w-1] >> (32 - 5)) : 0;
+
+			// lower parts of shifted copies of P_HI[w]
+		x1_lsb = (w < FPGA_OPERAND_NUM_WORDS) ? (FPGA_WORD)(P_HI.words[w] << 1) : 0;
+		x2_lsb = (w < FPGA_OPERAND_NUM_WORDS) ? (FPGA_WORD)(P_HI.words[w] << 2) : 0;
+		x5_lsb = (w < FPGA_OPERAND_NUM_WORDS) ? (FPGA_WORD)(P_HI.words[w] << 5) : 0;
+
+			// take care of uppers parts from the previous cycle
+		x1_lsb |= x1_msb;
+		x2_lsb |= x2_msb;
+		x5_lsb |= x5_msb;
+
+			// current word of P_LO
+		y = (w < FPGA_OPERAND_NUM_WORDS) ? P_LO.words[w] : 0;
+
+			// run addition
+		fpga_lowlevel_add47(x1_lsb, x2_lsb,   &T1[w]);	// T1 obtained after clock cycle w
+		fpga_lowlevel_add47(x5_lsb, y,        &T2[w]);	// T2 obtained after clock cycle w
+		fpga_lowlevel_add47(T1[w],  T2[w],    &T3[w]);	// T3 obtained after clock cycle w+1 (when T1 and T2 are available)
+		fpga_lowlevel_add47(T3[w],  t_carry1, &T4[w]);	// T4 obtained after clock cycle w+2 (when T3 is available)
+
+			// store carry
+		t_carry1 = (FPGA_WORD_REDUCED)(T4[w] >> FPGA_WORD_WIDTH);
+
+			// store word of sum
+		S1[w] = (FPGA_WORD)T4[w];
+	}
+
+		/* now repeat what we've just done once again with S1, but this time S1_HI
+		 * is 6-bit wide at most, so we can calculate S1_HI * 38 beforehand,
+		 * add it to the lowest word of S1_LO and then propagate the carry
+		 */
+
+	t_carry2 = 0;
+	t_carry2 += (FPGA_WORD_EXTENDED)S1[FPGA_OPERAND_NUM_WORDS] << 1;
+	t_carry2 += (FPGA_WORD_EXTENDED)S1[FPGA_OPERAND_NUM_WORDS] << 2;
+	t_carry2 += (FPGA_WORD_EXTENDED)S1[FPGA_OPERAND_NUM_WORDS] << 5;
+
+	for (w=0; w<(FPGA_OPERAND_NUM_WORDS+1); w++)
+	{
+			// current word of S1_LO
+		y = (w < FPGA_OPERAND_NUM_WORDS) ? S1[w] : 0;
+
+			// do addition
+		fpga_lowlevel_add47(t_carry2, y, &T1[w]);
+
+			// store carry
+		t_carry2 = (FPGA_WORD_REDUCED)(T1[w] >> FPGA_WORD_WIDTH);
+
+			// store word of sum
+		S2[w] = (FPGA_WORD)T1[w];
+	}
+
+		/* So we've ended up with 257-bit result in S2. Note that there can only be
+		 * two situations, given our modulus N is 2^256 - 38:
+		 *
+		 * a) 0 <= S < N
+		 * b) N <= S < 2*N
+		 *
+		 * This is because we've obtained S2 by adding 256-bit quantity S1_LO and
+		 * 12-bit quantity 38 * S_HI. S1_LO is at most 2^256 - 1 or N + 37, while
+		 * the 12-bit quantity is at most 4095, so the largest possible value of
+		 * S2 is N + 4132, which is obviously less than 2*N.
+		 *
+		 * What we now do is we try subtracting N from S2 to obtain S2_N, if we end up
+		 * with a negative number, we return S2 (reduction was not necessary), otherwise
+		 * we return S2_N.
+		 */
+
+	b_in = false;	// first word has no borrow
+	
+		// run parallel subtraction
+	for (w=0; w<=FPGA_OPERAND_NUM_WORDS; w++)
+	{
+			// current word of N
+		n_word = (w < FPGA_OPERAND_NUM_WORDS) ? N->words[w] : 0;
+
+			// do subtraction
+		fpga_lowlevel_sub32(S2[w], n_word, b_in, &S2_N[w], &b_out);
+
+			// propagate borrow
+		b_in = b_out;
+	}
+	
+		// copy result to output buffer
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+	{
+			// if subtraction of the highest words produced carry, we ended up
+			// with a negative number and S2 must be returned, not S2_N
+		P->words[w] = b_out ? S2[w] : S2_N[w];
+	}
+
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Parallelized multiplication.
+//
+// This routine implements the algorithm in Fig. 3. from "Ultra High
+// Performance ECC over NIST Primes on Commercial FPGAs".
+//
+// Inputs A and B are split into 2*OPERAND_NUM_WORDS words of FPGA_WORD_WIDTH/2
+// bits each, because FPGA multipliers can't handle full FPGA_WORD_WIDTH-wide
+// inputs. These smaller words are multiplied by an array of 2*OPERAND_NUM_WORDS
+// multiplers and accumulated into an array of 4*OPERAND_NUM_WORDS-1 partial
+// output words SI[].
+//
+// The order of loading A and B into the multipliers is a bit complicated,
+// during the first 2*OPERAND_NUM_WORDS-1 cycles one SI word per cycle is
+// obtained, during the very last 2*OPERAND_NUM_WORDS'th cycle all the
+// remaining 2*OPERAND_NUM_WORDS partial words are obtained simultaneously.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul_helper_multiply(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_WORD_EXTENDED *SI)
+//------------------------------------------------------------------------------
+{
+	int w;			// counter
+	int t, x;		// more counters
+	int j, i;		// word indices
+	FPGA_WORD p;	// product
+
+		// buffers for smaller words that multipliers can handle
+	FPGA_WORD_REDUCED AI[2*FPGA_OPERAND_NUM_WORDS];
+	FPGA_WORD_REDUCED BJ[2*FPGA_OPERAND_NUM_WORDS];
+	
+		// split a and b into smaller words
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+		AI[2*w] = (FPGA_WORD_REDUCED)A->words[w], AI[2*w + 1] = (FPGA_WORD_REDUCED)(A->words[w] >> (FPGA_WORD_WIDTH / 2)),
+		BJ[2*w] = (FPGA_WORD_REDUCED)B->words[w], BJ[2*w + 1] = (FPGA_WORD_REDUCED)(B->words[w] >> (FPGA_WORD_WIDTH / 2));
+
+		// accumulators
+	FPGA_WORD_EXTENDED mac[2*FPGA_OPERAND_NUM_WORDS];
+	
+		// clear accumulators
+	for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++) mac[w] = 0;
+
+		// run the crazy algorithm :)
+	for (t=0; t<(2*FPGA_OPERAND_NUM_WORDS); t++)
+	{
+			// save upper half of si[] (one word per cycle)
+		if (t > 0)
+		{	SI[4*FPGA_OPERAND_NUM_WORDS - (t+1)] = mac[t];
+			mac[t] = 0;
+		}
+
+			// update index
+		j = 2*FPGA_OPERAND_NUM_WORDS - (t+1);
+
+			// parallel multiplication
+		for (x=0; x<(2*FPGA_OPERAND_NUM_WORDS); x++)
+		{
+				// update index
+			i = t - x;
+			if (i < 0) i += 2*FPGA_OPERAND_NUM_WORDS;
+
+				// multiply...
+			fpga_lowlevel_mul16(AI[i], BJ[j], &p);
+
+				// ...accumulate
+			mac[x] += p;
+		}
+
+	}
+
+		// now finally save lower half of SI[] (2*OPERAND_NUM_WORDS words at once)
+	for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++)
+		SI[w] = mac[2*FPGA_OPERAND_NUM_WORDS - (w+1)];
+}
+
+
+//------------------------------------------------------------------------------
+//
+// Accumulation of partial words into full-size product.
+//
+// This routine implements the Algorithm 4. from "Ultra High Performance ECC
+// over NIST Primes on Commercial FPGAs".
+//
+// Input words SI[] are accumulated into full-size product C[].
+//
+// The algorithm is a bit tricky, there are 4*OPERAND_NUM_WORDS-1 words in
+// SI[]. Complete operation takes 2*OPERAND_NUM_WORDS cycles, even words are
+// summed in full, odd words are split into two parts. During every cycle the
+// upper part of the previous word and the lower part of the following word are
+// summed too.
+//
+//------------------------------------------------------------------------------
+void fpga_modular_mul_helper_accumulate(FPGA_WORD_EXTENDED *SI, FPGA_WORD *C)
+//------------------------------------------------------------------------------
+{
+	int w;							// word counter
+	FPGA_WORD_EXTENDED cw0, cw1;	// intermediate sums
+	FPGA_WORD_REDUCED  cw_carry;	// wide carry
+
+		// clear carry
+	cw_carry = 0;
+
+		// execute the algorithm
+	for (w=0; w<(2*FPGA_OPERAND_NUM_WORDS); w++)
+	{
+			// handy flags
+		bool w_is_first = (w == 0);
+		bool w_is_last  = (w == (2*FPGA_OPERAND_NUM_WORDS-1));
+
+			// accumulate full current even word...
+			// ...and also the upper part of the previous odd word (if not the first word)
+		fpga_lowlevel_add47(SI[2*w], w_is_first ? 0 : SI[2*w-1] >> (FPGA_WORD_WIDTH / 2), &cw0);
+
+			// generate another word from "carry" part of the previous even word...
+			// ...and also the lower part of the following odd word (if not the last word)
+		cw1 = w_is_last ? 0 : (FPGA_WORD)(SI[2*w+1] << (FPGA_WORD_WIDTH / 2));
+		cw1 |= (FPGA_WORD_EXTENDED)cw_carry;
+
+			// accumulate once again
+		fpga_lowlevel_add47(cw0, cw1, &cw1);
+
+			// store current word...
+		C[w] = (FPGA_WORD)cw1;
+
+			// ...and carry
+		cw_carry = (FPGA_WORD_REDUCED) (cw1 >> FPGA_WORD_WIDTH);
+	}
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_modular.h b/x25519_fpga_modular.h
new file mode 100644
index 0000000..3faa348
--- /dev/null
+++ b/x25519_fpga_modular.h
@@ -0,0 +1,76 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_modular.h
+// ------------------------------------------
+// Modular arithmetic routines for Curve25519
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Curve25519 Parameters
+//------------------------------------------------------------------------------
+
+/* modulus (2^255 - 19) */
+#define X25519_1P_INIT		{0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
+							 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFED}
+
+/* double modulus (2^256 - 38) */
+#define X25519_2P_INIT		{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
+							 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFDA}
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER X25519_1P;
+extern FPGA_BUFFER X25519_2P;
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_modular_init					();
+
+void	fpga_modular_add					(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *S, FPGA_BUFFER *N);
+void	fpga_modular_sub					(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *D, FPGA_BUFFER *N);
+
+void	fpga_modular_mul					(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_BUFFER *P, FPGA_BUFFER *N);
+
+void	fpga_modular_mul_helper_multiply	(FPGA_BUFFER *A, FPGA_BUFFER *B, FPGA_WORD_EXTENDED *SI);
+void	fpga_modular_mul_helper_accumulate	(FPGA_WORD_EXTENDED *SI, FPGA_WORD *C);
+void	fpga_modular_mul_helper_reduce		(FPGA_WORD *C, FPGA_BUFFER *P, FPGA_BUFFER *N);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_multiword.cpp b/x25519_fpga_multiword.cpp
new file mode 100644
index 0000000..d105272
--- /dev/null
+++ b/x25519_fpga_multiword.cpp
@@ -0,0 +1,89 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_multiword.cpp
+// -----------------------------
+// Multi-precision FPGA routines
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Headers
+//------------------------------------------------------------------------------
+#include <stdint.h>
+#include "x25519_fpga_model.h"
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+FPGA_BUFFER X25519_ZERO;
+FPGA_BUFFER X25519_ONE;
+
+
+//------------------------------------------------------------------------------
+void fpga_multiword_init()
+//------------------------------------------------------------------------------
+{
+	int w;	// word counter
+
+		/* fill buffers for large multi-word integers */
+	for (	w = FPGA_OPERAND_NUM_WORDS - 1;
+			w >= 0;
+			w -= 1)
+	{
+		X25519_ZERO.words[w]	= 0;			// all words are zero
+		X25519_ONE.words[w]		= w ? 0 : 1;	// only the lowest word is 1
+	}
+}
+
+
+//------------------------------------------------------------------------------
+void fpga_multiword_copy(FPGA_BUFFER *src, FPGA_BUFFER *dst)
+//------------------------------------------------------------------------------
+//
+// Copies large multi-word integer from src into dst.
+//
+//------------------------------------------------------------------------------
+{
+	int w;	// word counter
+
+		// copy all the words from src into dst
+	for (w=0; w<FPGA_OPERAND_NUM_WORDS; w++)
+		dst->words[w] = src->words[w];
+}
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/x25519_fpga_multiword.h b/x25519_fpga_multiword.h
new file mode 100644
index 0000000..8e94893
--- /dev/null
+++ b/x25519_fpga_multiword.h
@@ -0,0 +1,78 @@
+//------------------------------------------------------------------------------
+//
+// x25519_fpga_multiword.h
+// -----------------------------
+// Multi-precision FPGA routines
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+// Curve25519 Parameters
+//------------------------------------------------------------------------------
+#define X25519_OPERAND_WIDTH	256
+
+
+//------------------------------------------------------------------------------
+// FPGA Pipeline Settings
+//------------------------------------------------------------------------------
+#define FPGA_OPERAND_NUM_WORDS		(X25519_OPERAND_WIDTH / FPGA_WORD_WIDTH)
+
+
+//------------------------------------------------------------------------------
+// Operand Data Type
+//------------------------------------------------------------------------------
+typedef struct FPGA_BUFFER
+{
+	FPGA_WORD words[FPGA_OPERAND_NUM_WORDS];
+}
+FPGA_BUFFER;
+
+
+//------------------------------------------------------------------------------
+// Globals
+//------------------------------------------------------------------------------
+extern FPGA_BUFFER X25519_ZERO;
+extern FPGA_BUFFER X25519_ONE;
+
+
+//------------------------------------------------------------------------------
+// Prototypes
+//------------------------------------------------------------------------------
+void	fpga_multiword_init		();
+void	fpga_multiword_copy		(FPGA_BUFFER *src, FPGA_BUFFER *dst);
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.
Next message (by thread): [Cryptech-Commits] [user/shatov/x25519_fpga_model] branch master updated (43110be -> 23cc981)
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Commits mailing list