[Cryptech-Commits] [user/shatov/ModExpA7] 01/01: Ported ModExpS6 core to the new Alpha platform, hence the core now becomes ModExpA7.

git at cryptech.is git at cryptech.is
Wed Jun 1 08:02:19 UTC 2016


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/ModExpA7.

commit 2c92500715a13e017ae7e792ba8283c91a716b7d
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Jun 1 10:44:54 2016 +0300

    Ported ModExpS6 core to the new Alpha platform, hence the core now becomes ModExpA7.
    
    Note, that the core takes advantage of built-in DSP slices available in 7-Series FPGAs.
    This considerably speeds up computations, because the core can operate in 32-bit-word-serial
    mode instead of just bit-serial mode. The core directly instantiates DSP slices instead of
    using IP wizard to avoid using CoreGen during console bitstream builds.
---
 bench/tb_modexpa7.v                  | 534 ++++++++++++++++++++++++++
 rtl/dsp_multiplier_a7.v              | 522 ++++++++++++++++++++++++++
 rtl/dsp_subtractor_a7.v              | 142 +++++++
 rtl/modexpa7_adder64_carry32.v       |  81 ++++
 rtl/modexpa7_buffer_core.v           | 218 +++++++++++
 rtl/modexpa7_buffer_user.v           | 197 ++++++++++
 rtl/modexpa7_modinv32.v              | 141 +++++++
 rtl/modexpa7_montgomery_coeff.v      | 425 +++++++++++++++++++++
 rtl/modexpa7_montgomery_multiplier.v | 408 ++++++++++++++++++++
 rtl/modexpa7_top.v                   | 706 +++++++++++++++++++++++++++++++++++
 rtl/modexpa7_wrapper.v               | 211 +++++++++++
 rtl/ram_1rw_1ro_readfirst.v          |  88 +++++
 12 files changed, 3673 insertions(+)

diff --git a/bench/tb_modexpa7.v b/bench/tb_modexpa7.v
new file mode 100644
index 0000000..27b8928
--- /dev/null
+++ b/bench/tb_modexpa7.v
@@ -0,0 +1,534 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module tb_modexpa7_top;
+
+
+		//
+		// Settings
+		//
+	localparam MAX_MODULUS_WIDTH = 256;
+	
+		
+		//
+		// Control Registers
+		//
+	reg				use_public_mode;
+	reg	[ 8: 0]	modulus_width;
+	reg	[ 8: 0]	exponent_width;
+		
+		
+		//
+		// Test Vectors (128-bit)
+		//
+	localparam	[127:0]	M_TEST_128		= 128'h0001FF00544553545445535454455354;
+	localparam	[127:0]	M_ABC_128		= 128'h0001FFFFFFFF00414243414243414243;
+	localparam	[127:0]	M_XYZ_128		= 128'h0001FFFFFFFF0058595A58595A58595A;
+	
+	localparam	[127:0]	N_1_128			= 128'h56247F8A1582CD1C96ED0ECD3E60FCB1;
+	localparam	[127:0]	N_2_128			= 128'h708CC1BEA087DCA3D0999E3AC033A50B;
+	localparam	[127:0]	D_1_128			= 128'h40C9E4AEEB6CDC6D12E4526089BCB8FD;
+	localparam	[127:0]	D_2_128			= 128'h1ADFF16F74639CB28976EEC528C866C1;
+	
+	localparam	[127:0]	S_TEST_1_128	= 128'h310AB3124D4CB1DEE1CFA4694DC8BCA2;
+	localparam	[127:0]	S_TEST_2_128	= 128'h5D621FC2642AEB705FDB7B90693FE3BF;
+	localparam	[127:0]	S_ABC_1_128		= 128'h235EA2712140F90344DFE2BF74B13075;
+	localparam	[127:0]	S_XYZ_1_128		= 128'h1A501D80B1F8648DE08C1AE673E13770;
+	localparam	[127:0]	S_ABC_2_128		= 128'h2258423C6CDB0E1AA73F70CA41BC6633;
+	localparam	[127:0]	S_XYZ_2_128		= 128'h5B278DEB75C055C8DA8E2993A791F3C2;
+
+
+		//
+		// Test Vectors (256-bit)
+		//
+	localparam	[255:0]	M_TEST_256		= 256'h0001FF0054455354544553545445535454455354544553545445535454455354;
+	localparam	[255:0]	M_ABC_256		= 256'h0001FFFFFFFFFF00414243414243414243414243414243414243414243414243;
+	localparam	[255:0]	M_XYZ_256		= 256'h0001FFFFFFFFFF0058595A58595A58595A58595A58595A58595A58595A58595A;
+	
+	localparam	[255:0]	N_1_256			= 256'h624341A8670845B4AA39CC2C08437E2568E11441CB5461CB49162EAC2E751FC5;
+	localparam	[255:0]	N_2_256			= 256'h772F7C4A4D8C92CE59D8688897795F7208D3B1D3B5C7D83BF0E1A2A59D879A13;
+	localparam	[255:0]	D_1_256			= 256'h41D555B401F3A4DFF9FC673249070F08247EF8A52A270F283FFAA7EA65B25F01;
+	localparam	[255:0]	D_2_256			= 256'h1606E0A19AD3700E52EC7BE542995BDA26AA386DD677A74B7B2846AC44BCAD81;
+	
+	localparam	[255:0]	S_TEST_1_256	= 256'h07EFB60CF6E6F2350E8D2D5C5261F36CF0B2B71833386AC6E7958A8F39DAD331;
+	localparam	[255:0]	S_TEST_2_256	= 256'h5C18F857BF6E4D6A013F554A680329BDC14C31FF38D0EEAD1DE02CFD85337A53;
+	localparam	[255:0]	S_ABC_1_256		= 256'h2AA0A5D3B2AE1FFD23640714F652DD147B6C202905B4D273E837313FB392487C;
+	localparam	[255:0]	S_XYZ_1_256		= 256'h27850A998B867CB67146EEFF15147668D7CF6B7BD5B048D9735D5767EB6D68CB;
+	localparam	[255:0]	S_ABC_2_256		= 256'h56A0DA5F9987DF4E921A771CCC8A1D68F8DD7D9244A0DD37A4095D61F455489D;
+	localparam	[255:0]	S_XYZ_2_256		= 256'h08A7AF13E058754278EE6582626A6675D09A08C0C898E218FCDF7FFA37C9876C;
+
+
+		//
+		// Locals
+		//
+	localparam	[ 1: 0]	BANK_MODULUS	= 2'b00;
+	localparam	[ 1: 0]	BANK_MESSAGE	= 2'b01;
+	localparam	[ 1: 0]	BANK_EXPONENT	= 2'b10;
+	localparam	[ 1: 0]	BANK_RESULT		= 2'b11;
+	
+	
+		//
+		// Integers
+		//
+	integer addr;
+	
+	
+		//
+		// Inputs, Outputs
+		//
+	reg	init;
+	reg	next;
+	wire	ready;
+	wire	valid;
+
+
+		//
+		// Bus
+		//
+	reg				bus_cs;
+	reg				bus_we;
+	reg	[ 4: 0]	bus_addr;
+	reg	[31: 0]	bus_data_wr;
+	wire	[31: 0]	bus_data_rd;
+
+
+		//
+		// Clock (100 MHz)
+		//
+	reg clk = 1'b0;
+	always #5 clk = ~clk;
+	
+
+		//
+		// UUT
+		//
+	modexpa7_top #
+	(
+		.MAX_MODULUS_WIDTH	(MAX_MODULUS_WIDTH)
+	)
+	uut
+	(
+		.clk					(clk),
+		
+		.init					(init),
+		.ready				(ready),
+		
+		.next					(next),
+		.valid				(valid),
+		
+		.modulus_width		(modulus_width),
+		.exponent_width	(exponent_width),
+		
+		.fast_public_mode	(use_public_mode),
+		
+		.bus_cs				(bus_cs),
+		.bus_we				(bus_we),
+		.bus_addr			(bus_addr),
+		.bus_data_wr		(bus_data_wr),
+		.bus_data_rd		(bus_data_rd)
+	);
+
+
+		//
+		// Script
+		//
+	initial begin
+		//
+		$display("Testbench started.");
+		//
+		init = 0;
+		next = 0;
+		//
+		bus_cs			= 0;
+		bus_we			= 0;
+		bus_addr			= 5'd0;
+		bus_data_wr		= 32'h00000000;
+		//
+		#200;
+		//
+		$display("Running in public (unsafe, faster) mode...");
+		use_public_mode = 1;
+		//
+		run_tests_128();
+		run_tests_256();
+		//
+		$display("Running in private (safe, slower) mode...");
+		use_public_mode = 0;
+		//
+		run_tests_128();
+		run_tests_256();
+		//
+		$display("All tests passed.");
+		$display("Testbench finished.");
+		//
+		$finish;
+		//
+	end
+	
+		
+		//
+		// run_tests_128()
+		//
+	task run_tests_128;
+		begin
+			//
+			$display("Testing 128-bit mode...");
+			//
+			$display("Setting modulus #1...");
+			set_modulus_128(N_1_128);
+			$display("Signing TEST message...");
+			test_vector_128(M_TEST_128, D_1_128, S_TEST_1_128);
+			//
+			$display("Setting modulus #2...");
+			set_modulus_128(N_2_128);
+			$display("Signing TEST message...");
+			test_vector_128(M_TEST_128, D_2_128, S_TEST_2_128);
+			//
+			$display("Setting modulus #1...");
+			set_modulus_128(N_1_128);
+			$display("Signing ABC message...");
+			test_vector_128(M_ABC_128, D_1_128, S_ABC_1_128);
+			$display("Signing XYZ message...");
+			test_vector_128(M_XYZ_128, D_1_128, S_XYZ_1_128);
+			//
+			$display("Setting modulus #2...");
+			set_modulus_128(N_2_128);
+			$display("Signing ABC message...");
+			test_vector_128(M_ABC_128, D_2_128, S_ABC_2_128);
+			$display("Signing XYZ message...");
+			test_vector_128(M_XYZ_128, D_2_128, S_XYZ_2_128);
+			//
+		end
+	endtask
+	
+	
+		//
+		// run_tests_256()
+		//
+	task run_tests_256;
+		begin
+			//
+			$display("Testing 256-bit mode...");
+			//
+			$display("Setting modulus #1...");
+			set_modulus_256(N_1_256);
+			$display("Signing TEST message...");
+			test_vector_256(M_TEST_256, D_1_256, S_TEST_1_256);
+			//
+			$display("Setting modulus #2...");
+			set_modulus_256(N_2_256);
+			$display("Signing TEST message...");
+			test_vector_256(M_TEST_256, D_2_256, S_TEST_2_256);
+			//
+			$display("Setting modulus #1...");
+			set_modulus_256(N_1_256);
+			$display("Signing ABC message...");
+			test_vector_256(M_ABC_256, D_1_256, S_ABC_1_256);
+			$display("Signing XYZ message...");
+			test_vector_256(M_XYZ_256, D_1_256, S_XYZ_1_256);
+			//
+			$display("Setting modulus #2...");
+			set_modulus_256(N_2_256);
+			$display("Signing ABC message...");
+			test_vector_256(M_ABC_256, D_2_256, S_ABC_2_256);
+			$display("Signing XYZ message...");
+			test_vector_256(M_XYZ_256, D_2_256, S_XYZ_2_256);
+			//		
+		end
+	endtask
+		
+		
+	
+		//
+		// set_modulus_128()
+		//
+	task set_modulus_128;
+		input	[127:0]	modulus;
+		begin
+			//
+			modulus_width = 9'd128;
+			//
+			bank_write_128(BANK_MODULUS, modulus);
+			#100;
+			init = 1;
+			wait_ready();
+			init = 0;
+			#100;
+			//
+		end
+	endtask
+	
+	
+		//
+		// set_modulus_256()
+		//
+	task set_modulus_256;
+		input	[255:0]	modulus;
+		begin
+			//
+			modulus_width = 9'd256;
+			//
+			bank_write_256(BANK_MODULUS, modulus);
+			#100;
+			init = 1;
+			wait_ready();
+			init = 0;
+			#100;
+			//
+		end
+	endtask
+	
+	
+		//
+		// test_vector_128()
+		//
+	task test_vector_128;
+		input	[127:0]	message;
+		input	[127:0]	exponent;
+		input	[127:0]	signature;
+		reg	[127:0]	readback;
+		begin
+			//
+			exponent_width = 9'd128;
+			//
+			bank_write_128(BANK_MESSAGE, message);
+			#100;
+			bank_write_128(BANK_EXPONENT, exponent);
+			#100;
+			//
+			next = 1;
+			wait_valid();
+			next = 0;
+			#100;
+			//
+			bank_read_128(BANK_RESULT, readback);
+			//
+			$display("    signature: %032x", signature);
+			$display("    readback:  %032x", readback);
+			//
+			if (readback == signature) $display("Got correct S = M ** E mod N [OK]");
+			else begin
+				$display("Got wrong S = M ** E mod N [ERROR]");
+				$finish;
+			end
+			//
+		end
+	endtask
+      
+		
+		//
+		// test_vector_256()
+		//
+	task test_vector_256;
+		input	[255:0]	message;
+		input	[255:0]	exponent;
+		input	[255:0]	signature;
+		reg	[255:0]	readback;
+		begin
+			//
+			exponent_width = 9'd256;
+			//
+			bank_write_256(BANK_MESSAGE, message);
+			#100;
+			bank_write_256(BANK_EXPONENT, exponent);
+			#100;
+			//
+			next = 1;
+			wait_valid();
+			next = 0;
+			#100;
+			//
+			bank_read_256(BANK_RESULT, readback);
+			//
+			$display("    signature: %064x", signature);
+			$display("    readback:  %064x", readback);
+			//
+			if (readback == signature) $display("Got correct S = M ** E mod N [OK]");
+			else begin
+				$display("Got wrong S = M ** E mod N [ERROR]");
+				$finish;
+			end
+			//
+		end
+	endtask
+	
+	
+		//
+		// bus_write_reg()
+		//
+	task bus_write_reg;
+		input	[ 1: 0]	bank;
+		input	[ 2: 0]	offset;
+		input	[31: 0]	data;
+		begin
+			//
+			bus_cs 			= 1;
+			bus_we			= 1;
+			bus_addr			= {bank, offset};
+			bus_data_wr		= data;
+			//
+			#10;
+			//
+			bus_cs 			= 0;
+			bus_we			= 0;
+			//
+			#10;
+			//
+		end
+	endtask
+		
+		
+		//
+		// bus_read_reg()
+		//
+	task bus_read_reg;
+		input		[ 1: 0]	bank;
+		input		[ 2: 0]	offset;
+		output	[31: 0]	data;
+		begin
+			//
+			bus_cs 			= 1;
+			bus_we			= 0;
+			bus_addr			= {bank, offset};
+			//
+			#10;
+			//
+			bus_cs 			= 0;
+			bus_we			= 0;
+			data				= bus_data_rd;
+			//
+			#10;
+			//
+		end
+	endtask
+	
+	
+		//
+		// bank_write_128()
+		//
+	task bank_write_128;
+		input	[  1:0]	bank;
+		input	[127:0]	value;
+		reg	[127:0]	shreg;
+		begin
+			//
+			shreg = value;
+			//
+			for (addr=0; addr<4; addr=addr+1) begin
+				bus_write_reg(bank, addr[2:0], shreg[31:0]);
+				shreg = {{32{1'b0}}, shreg[127:32]};
+			end
+			//
+		end
+	endtask
+	
+	
+		//
+		// bank_write_256()
+		//
+	task bank_write_256;
+		input	[  1:0]	bank;
+		input	[255:0]	value;
+		reg	[255:0]	shreg;
+		begin
+			//
+			shreg = value;
+			//
+			for (addr=0; addr<8; addr=addr+1) begin
+				bus_write_reg(bank, addr[2:0], shreg[31:0]);
+				shreg = {{32{1'b0}}, shreg[255:32]};
+			end
+			//
+		end
+	endtask
+	
+	
+		//
+		// bank_read_128()
+		//
+	task bank_read_128;
+		input		[  1:0]	bank;
+		output	[127:0]	value;
+		reg		[ 31:0]	shreg;
+		begin
+			//
+			for (addr=0; addr<4; addr=addr+1) begin
+				bus_read_reg(bank, addr[2:0], shreg);
+				value = {shreg, value[127:32]};
+			end
+			//
+		end
+	endtask
+	
+		
+		//
+		// bank_read_256()
+		//
+	task bank_read_256;
+		input		[  1:0]	bank;
+		output	[255:0]	value;
+		reg		[ 31:0]	shreg;
+		begin
+			//
+			for (addr=0; addr<8; addr=addr+1) begin
+				bus_read_reg(bank, addr[2:0], shreg);
+				value = {shreg, value[255:32]};
+			end
+			//
+		end
+	endtask
+	
+	
+		//
+		// wait_ready()
+		//
+	task wait_ready;
+		begin
+			while (!ready) #10;
+		end
+	endtask
+	
+	
+		//
+		// wait_valid()
+		//
+	task wait_valid;
+		begin
+			while (!valid) #10;
+		end
+	endtask
+	
+	
+endmodule
+
diff --git a/rtl/dsp_multiplier_a7.v b/rtl/dsp_multiplier_a7.v
new file mode 100644
index 0000000..bb6a139
--- /dev/null
+++ b/rtl/dsp_multiplier_a7.v
@@ -0,0 +1,522 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module dsp_multiplier_a7
+	(
+		input					clk,
+		input		[31: 0]	a,
+		input		[31: 0]	b,
+		output	[63: 0]	p
+	);
+
+		//
+		// Split a, b into parts
+		//
+	wire	[16: 0]	a_lo = a[16: 0];
+	wire	[16: 0]	b_lo = b[16: 0];
+	
+	wire	[14: 0]	a_hi = a[31:17];
+	wire	[14: 0]	b_hi = b[31:17];
+
+		//
+		// Products
+		//
+	wire	[47: 0]	p_dsp1;
+	wire	[47: 0]	p_dsp3;
+	wire	[47: 0]	p_dsp4;
+
+		//
+		// Cascade p
+		//
+	wire	[47: 0]	p_dsp1_chain;
+	wire	[47: 0]	p_dsp2_chain;
+	wire	[47: 0]	p_dsp3_chain;
+	
+		//
+		// Cascade a
+		//
+	wire	[29: 0]	a_lo_chain;
+	wire	[29: 0]	a_hi_chain;
+	
+		//
+		// Register parts of p
+		//
+	genvar i;
+	generate for (i=0; i<17; i=i+1)
+		begin : FD_gen
+			//
+			FD #(.INIT (1'b0))
+			FD_inst_1
+			(
+				.C	(clk),
+				.D	(p_dsp1[i]),
+				.Q	(p[i])
+			);
+			//
+			FD #(.INIT(1'b0))
+			FD_inst_2
+			(
+				.C (clk),
+				.D	(p_dsp3[i]),
+				.Q	(p[17+i])
+			);
+			//
+		end
+	endgenerate
+
+		//
+		// Mapping
+		//
+	assign p[63:34] = p_dsp4[29:0];
+
+		//
+		// a_lo * b_lo
+		//
+	DSP48E1 #
+	(
+		.AREG						(1),
+		.BREG						(1),
+		.CREG						(0),
+		.DREG						(0),
+		.MREG						(0),
+		.PREG						(0),
+		.ADREG					(0),
+		
+		.INMODEREG				(0),
+		.OPMODEREG				(0),
+		.ALUMODEREG				(0),
+		.CARRYINREG				(0),
+		.CARRYINSELREG			(0),
+		
+		.ACASCREG				(1),
+		.BCASCREG				(1),
+
+		.A_INPUT					("DIRECT"),
+		.B_INPUT					("DIRECT"),
+
+		.USE_SIMD				("ONE48"),
+		.USE_DPORT				("FALSE"),
+		.USE_MULT				("MULTIPLY"),
+		.USE_PATTERN_DETECT	("NO_PATDET"),
+
+		.SEL_MASK				("MASK"),
+		.SEL_PATTERN			("PATTERN"),
+		
+		.MASK						(48'h000000000000),
+		.PATTERN					(48'h000000000000),
+		
+		.AUTORESET_PATDET		("NO_RESET")
+	)
+	DSP48E1_inst1
+	(
+		.CLK						(clk),
+		
+		.RSTA						(1'b0),
+		.RSTB						(1'b0),
+		.RSTC						(1'b0),
+		.RSTD						(1'b0),
+		.RSTM						(1'b0),
+		.RSTP						(1'b0),
+
+		.RSTCTRL					(1'b0),
+		.RSTINMODE				(1'b0),
+		.RSTALUMODE				(1'b0),
+		.RSTALLCARRYIN			(1'b0),
+
+		.CEA1						(1'b0),
+		.CEA2						(1'b1),
+		.CEB1						(1'b0),
+		.CEB2						(1'b1),
+		.CEC						(1'b0),
+		.CED						(1'b0),
+		.CEM						(1'b0),
+		.CEP						(1'b0),		
+		.CEAD						(1'b0),
+		
+		.CECTRL					(1'b0),
+		.CEINMODE				(1'b0),
+		.CEALUMODE				(1'b0),
+		.CECARRYIN				(1'b0),
+		
+		.CARRYINSEL				(3'b000),
+		.ALUMODE					(4'b0000),
+		.INMODE					(5'b00000),
+		.OPMODE					(7'b0110101),
+		
+		.A							({{13{1'b0}}, a_lo}), 
+		.B							({{ 1{1'b0}}, b_lo}),
+		.C							({48{1'b0}}),
+		.D							({25{1'b0}}),
+		.P							(p_dsp1),
+
+		.CARRYIN					(1'b0),
+		.CARRYOUT				(),
+		
+		.CARRYCASCIN			(1'b0),
+		.CARRYCASCOUT			(),
+
+		.ACIN						({30{1'b0}}),
+		.BCIN						({18{1'b0}}),
+		
+		.ACOUT					(a_lo_chain),
+		.BCOUT					(),
+		
+		.PCIN						({48{1'b0}}),
+		.PCOUT					(p_dsp1_chain),
+		
+		.MULTSIGNIN				(1'b0),
+		.MULTSIGNOUT			(),
+	
+		.PATTERNDETECT			(),
+		.PATTERNBDETECT		(),
+
+		.UNDERFLOW				(),
+		.OVERFLOW				()
+	);
+
+		//
+		// a_lo * b_hi
+		//
+	DSP48E1 #
+	(
+		.AREG						(0),
+		.BREG						(1),
+		.CREG						(0),
+		.DREG						(0),
+		.MREG						(0),
+		.PREG						(0),
+		.ADREG					(0),
+
+		.INMODEREG				(0),
+		.OPMODEREG				(0),
+		.ALUMODEREG				(0),
+		.CARRYINREG				(0),
+		.CARRYINSELREG			(0),
+		
+		.ACASCREG				(0),
+		.BCASCREG				(1),
+		
+		.A_INPUT					("CASCADE"),
+		.B_INPUT					("DIRECT"),
+		
+		.USE_SIMD				("ONE48"),
+		.USE_DPORT				("FALSE"),
+		.USE_MULT				("MULTIPLY"),
+		.USE_PATTERN_DETECT	("NO_PATDET"),
+
+		.SEL_MASK				("MASK"),
+		.SEL_PATTERN			("PATTERN"),
+
+		.MASK						(48'h000000000000),
+		.PATTERN					(48'h000000000000),
+		
+		.AUTORESET_PATDET		("NO_RESET")
+	)
+	DSP48E1_inst2
+	(
+		.CLK						(clk),
+
+		.RSTA						(1'b0),
+		.RSTB						(1'b0),
+		.RSTC						(1'b0),
+		.RSTD						(1'b0),
+		.RSTM						(1'b0),
+		.RSTP						(1'b0),
+
+		.RSTCTRL					(1'b0),
+		.RSTINMODE				(1'b0),
+		.RSTALUMODE				(1'b0),
+		.RSTALLCARRYIN			(1'b0),
+
+		.CEA1						(1'b0),
+		.CEA2						(1'b0),
+		.CEB1						(1'b0),
+		.CEB2						(1'b1),
+		.CEC						(1'b0),
+		.CED						(1'b0),
+		.CEP						(1'b0),
+		.CEM						(1'b0),
+		.CEAD						(1'b0),
+
+		.CECTRL					(1'b0),
+		.CEINMODE				(1'b0),
+		.CEALUMODE				(1'b0),
+		.CECARRYIN				(1'b0),
+
+		.CARRYINSEL				(3'b000),
+		.ALUMODE					(4'b0000),
+		.INMODE					(5'b00000),
+		.OPMODE					(7'b1010101),
+	
+		.A							({30{1'b0}}),
+		.B							({{3{1'b0}}, b_hi}),
+		.C							({48{1'b0}}),
+		.D							({25{1'b0}}),
+		.P							(),
+
+		.CARRYIN					(1'b0),
+		.CARRYOUT				(),
+		
+		.CARRYCASCIN			(1'b0),
+		.CARRYCASCOUT			(),
+		
+		.ACIN						(a_lo_chain),
+		.BCIN						({18{1'b0}}),
+		
+		.ACOUT					(),
+		.BCOUT					(),
+		
+		.PCIN						(p_dsp1_chain),
+		.PCOUT					(p_dsp2_chain),
+		
+		.MULTSIGNIN				(1'b0),
+		.MULTSIGNOUT			(),
+
+		.PATTERNDETECT			(),
+		.PATTERNBDETECT		(),
+
+		.UNDERFLOW				(),
+		.OVERFLOW				()
+	);
+	
+		//
+		// a_hi * b_lo
+		//
+	DSP48E1 #
+	(
+		.AREG						(1),
+		.BREG						(1),
+		.CREG						(0),
+		.DREG						(0),
+		.MREG						(0),
+		.PREG						(0),
+		.ADREG					(0),
+		
+		.INMODEREG				(0),
+		.OPMODEREG				(0),
+		.ALUMODEREG				(0),
+		.CARRYINREG				(0),
+		.CARRYINSELREG			(0),
+
+		.ACASCREG				(1),
+		.BCASCREG				(1),
+
+		.A_INPUT					("DIRECT"),
+		.B_INPUT					("DIRECT"),
+
+		.USE_SIMD				("ONE48"),		
+		.USE_DPORT				("FALSE"),
+		.USE_MULT				("MULTIPLY" ),
+		.USE_PATTERN_DETECT	("NO_PATDET"),
+		
+		.SEL_MASK				("MASK"),
+		.SEL_PATTERN			("PATTERN"),
+
+		.MASK						(48'h000000000000),
+		.PATTERN					(48'h000000000000),
+		
+		.AUTORESET_PATDET		("NO_RESET")
+	)
+	DSP48E1_inst3
+	(
+		.CLK						(clk),
+		
+		.RSTA						(1'b0),
+		.RSTB						(1'b0),
+		.RSTC						(1'b0),
+		.RSTD						(1'b0),
+		.RSTM						(1'b0),
+		.RSTP						(1'b0),
+
+		.RSTCTRL					(1'b0),
+		.RSTINMODE				(1'b0),
+		.RSTALUMODE				(1'b0),
+		.RSTALLCARRYIN			(1'b0),
+
+		.CEA1						(1'b0),
+		.CEA2						(1'b1),
+		.CEB1						(1'b0),
+		.CEB2						(1'b1),
+		.CEC						(1'b0),
+		.CED						(1'b0),
+		.CEM						(1'b0),
+		.CEP						(1'b0),
+		.CEAD						(1'b0),
+
+		.CECTRL					(1'b0),
+		.CEINMODE				(1'b0),
+		.CEALUMODE				(1'b0),
+		.CECARRYIN				(1'b0),
+
+		.CARRYINSEL				(3'b000),
+		.ALUMODE					(4'b0000),
+		.INMODE					(5'b00000),
+		.OPMODE					(7'b0010101),
+
+		.A							({{15{1'b0}}, a_hi}),
+		.B							({{ 1{1'b0}}, b_lo}),
+		.C							({48{1'b0}}),
+		.D							({25{1'b0}}),
+		.P							(p_dsp3),
+		
+		.CARRYIN					(1'b0),
+		.CARRYOUT				(),
+		
+		.CARRYCASCIN			(1'b0),		
+		.CARRYCASCOUT			(),
+
+		.ACIN						({30{1'b0}}),
+		.BCIN						({18{1'b0}}),
+		
+		.ACOUT					(a_hi_chain),
+		.BCOUT					(),
+		
+		.PCIN						(p_dsp2_chain),
+		.PCOUT					(p_dsp3_chain),
+
+		.MULTSIGNIN				(1'b0),
+		.MULTSIGNOUT			(),
+		
+		.PATTERNDETECT			(),
+		.PATTERNBDETECT		(),
+
+		.UNDERFLOW				(),
+		.OVERFLOW				()
+	);
+
+		//
+		// a_hi * b_hi
+		//
+	DSP48E1 #
+	(
+		.AREG						(0),
+		.BREG						(1),
+		.CREG						(0),
+		.DREG						(0),
+		.MREG						(0),
+		.PREG						(1),
+		.ADREG					(0),
+
+		.INMODEREG				(0),
+		.OPMODEREG				(0),		
+		.ALUMODEREG				(0),
+		.CARRYINREG				(0),
+		.CARRYINSELREG			(0),
+
+		.ACASCREG				(0),
+		.BCASCREG				(1),
+
+		.A_INPUT					("CASCADE"),		
+		.B_INPUT					("DIRECT"),
+
+		.USE_SIMD				("ONE48"),		
+		.USE_DPORT				("FALSE"),
+		.USE_MULT				("MULTIPLY"),
+		.USE_PATTERN_DETECT	("NO_PATDET"),
+		
+		.SEL_MASK				("MASK"),
+		.SEL_PATTERN			("PATTERN"),
+
+		.MASK						(48'h000000000000),
+		.PATTERN					(48'h000000000000),
+		
+		.AUTORESET_PATDET		("NO_RESET")
+	)
+	DSP48E1_inst4
+	(
+		.CLK						(clk),
+		
+		.RSTA						(1'b0),
+		.RSTB						(1'b0),
+		.RSTC						(1'b0),
+		.RSTD						(1'b0),
+		.RSTM						(1'b0),
+		.RSTP						(1'b0),
+
+		.RSTCTRL					(1'b0),
+		.RSTINMODE				(1'b0),
+		.RSTALUMODE				(1'b0),
+		.RSTALLCARRYIN			(1'b0),
+	
+		.CEA1						(1'b0),
+		.CEA2						(1'b0),
+		.CEB1						(1'b0),
+		.CEB2						(1'b1),
+		.CEC						(1'b0),
+		.CED						(1'b0),
+		.CEM						(1'b0),
+		.CEP						(1'b1),
+		.CEAD						(1'b0),	
+		
+		.CECTRL					(1'b0),
+		.CEINMODE				(1'b0),			
+		.CEALUMODE				(1'b0),
+		.CECARRYIN				(1'b0),
+		
+		.CARRYINSEL				(3'b000),
+		.ALUMODE					(4'b0000),
+		.INMODE					(5'b00000),
+		.OPMODE					(7'b1010101),
+		
+		.A							({30{1'b0}}),
+		.B							({{3{1'b0}}, b_hi}),
+		.C							({48{1'b0}}),
+		.D							({25{1'b0}}),
+		.P							(p_dsp4),
+		
+		.CARRYIN					(1'b0),
+		.CARRYOUT				(),
+		
+		.CARRYCASCIN			(1'b0),
+		.CARRYCASCOUT			(),
+		
+		.ACIN						(a_hi_chain),
+		.BCIN						({18{1'b0}}),
+
+		.ACOUT					(),
+		.BCOUT					(),		
+		
+		.PCIN						(p_dsp3_chain),
+		.PCOUT					(),
+
+		.MULTSIGNIN				(1'b0),
+		.MULTSIGNOUT			(),
+		
+		.PATTERNDETECT			(),
+		.PATTERNBDETECT		(),
+
+		.UNDERFLOW				(),
+		.OVERFLOW				()
+	);
+	
+
+endmodule
diff --git a/rtl/dsp_subtractor_a7.v b/rtl/dsp_subtractor_a7.v
new file mode 100644
index 0000000..00c2f95
--- /dev/null
+++ b/rtl/dsp_subtractor_a7.v
@@ -0,0 +1,142 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module dsp_subtractor_a7
+	(
+	input		[31: 0]	a,
+	input		[31: 0]	b,
+	output	[31: 0]	s,
+	input					c_in,
+	output				c_out
+	);
+
+	wire	[47: 0]	p;
+	assign s = p[31:0];
+	assign c_out = p[32];
+
+	DSP48E1 #
+	(
+		.AREG						(0),
+		.BREG						(0),
+		.CREG						(0),
+		.DREG						(0),
+		.MREG						(0),
+		.PREG						(0),
+		.ADREG					(0),
+		.ACASCREG				(0),
+		.BCASCREG				(0),
+		.ALUMODEREG				(0),
+		.INMODEREG				(0),
+		.OPMODEREG				(0),
+		.CARRYINREG				(0),
+		.CARRYINSELREG			(0),
+
+		.A_INPUT					("DIRECT"),
+		.B_INPUT					("DIRECT"),
+		.USE_DPORT				("FALSE"),
+		.USE_MULT				("NONE"),
+		.USE_SIMD				("ONE48"),
+
+		.USE_PATTERN_DETECT	("NO_PATDET"),
+		.SEL_PATTERN			("PATTERN"),
+		.SEL_MASK				("MASK"),
+		.PATTERN					(48'h000000000000),
+		.MASK						(48'h3fffffffffff),
+		.AUTORESET_PATDET		("NO_RESET")
+	)
+	DSP48E1_inst
+	(
+		.CLK					(1'b0),
+
+		.RSTA					(1'b0),
+		.RSTB					(1'b0),
+		.RSTC					(1'b0),
+		.RSTD					(1'b0),
+		.RSTM					(1'b0),
+		.RSTP					(1'b0),
+
+		.RSTCTRL				(1'b0),
+		.RSTINMODE			(1'b0),
+		.RSTALUMODE			(1'b0),
+		.RSTALLCARRYIN		(1'b0),
+
+		.CEA1					(1'b0),
+		.CEA2					(1'b0),
+		.CEB1					(1'b0),
+		.CEB2					(1'b0),
+		.CEC					(1'b0),
+		.CED					(1'b0),
+		.CEM					(1'b0),
+		.CEP					(1'b0),
+		.CEAD					(1'b0),
+		.CEALUMODE			(1'b0),
+		.CEINMODE			(1'b0),
+
+		.CECTRL				(1'b0),
+		.CECARRYIN			(1'b0),
+
+		.A						({{16{1'b0}}, b[31:18]}),
+		.B						(b[17: 0]),
+		.C						({{16{1'b0}}, a[31:0]}),
+		.D						(25'd0),
+		.P						(p),
+
+		.CARRYIN				(c_in),
+		.CARRYOUT			(),
+		.CARRYINSEL			(3'b000),
+
+		.CARRYCASCIN		(1'b0),
+		.CARRYCASCOUT		(),
+
+		.PATTERNDETECT		(),
+		.PATTERNBDETECT	(),
+
+		.OPMODE				(7'b0110011),
+		.ALUMODE				(4'b0011),
+		.INMODE				(5'b00000),
+
+		.MULTSIGNIN			(1'b0),
+		.MULTSIGNOUT		(),
+
+		.UNDERFLOW			(),
+		.OVERFLOW			(),
+
+		.ACIN					(30'd0),
+		.BCIN					(18'd0),
+		.PCIN					(48'd0),
+
+		.ACOUT				(),
+		.BCOUT				(),
+		.PCOUT				()
+  );
+ 
+endmodule
diff --git a/rtl/modexpa7_adder64_carry32.v b/rtl/modexpa7_adder64_carry32.v
new file mode 100644
index 0000000..093f660
--- /dev/null
+++ b/rtl/modexpa7_adder64_carry32.v
@@ -0,0 +1,81 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_adder64_carry32
+  (
+   input wire          clk,
+   input wire [31: 0]  t,
+   input wire [31: 0]  x,
+   input wire [31: 0]  y,
+   output wire [31: 0] s,
+   input wire [31: 0]  c_in,
+   output wire [31: 0] c_out
+   );
+
+
+   //
+   // Multiplier
+   //
+   wire [63: 0]        multiplier_out;
+
+   dsp_multiplier_a7 dsp_multiplier
+     (
+      .clk             (clk),
+      .a               (x),
+      .b               (y),
+      .p               (multiplier_out)
+      );
+
+
+   //
+   // Carry and T
+   //
+   wire [63: 0]        t_ext = {{32{1'b0}}, t};
+   wire [63: 0]        c_ext = {{32{1'b0}}, c_in};
+
+
+   //
+   // Sum
+   //
+   wire [63: 0]        sum = multiplier_out + c_in + t;
+
+
+   //
+   // Output
+   //
+   assign s = sum[31: 0];
+   assign c_out = sum[63:32];
+
+
+endmodule
diff --git a/rtl/modexpa7_buffer_core.v b/rtl/modexpa7_buffer_core.v
new file mode 100644
index 0000000..a48686e
--- /dev/null
+++ b/rtl/modexpa7_buffer_core.v
@@ -0,0 +1,218 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_buffer_core
+  #(parameter OPERAND_ADDR_WIDTH      =  5)     // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                        clk,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr,
+    input wire                        rw_coeff_bram_wr,
+    input wire [31:0]                 rw_coeff_bram_in,
+    output wire [31:0]                rw_coeff_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr,
+    input wire                        rw_mm_bram_wr,
+    input wire [31:0]                 rw_mm_bram_in,
+    output wire [31:0]                rw_mm_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr,
+    input wire                        rw_nn_bram_wr,
+    input wire [31:0]                 rw_nn_bram_in,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr,
+    input wire                        rw_y_bram_wr,
+    input wire [31:0]                 rw_y_bram_in,
+    output wire [31:0]                rw_y_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr,
+    input wire                        rw_r_bram_wr,
+    input wire [31:0]                 rw_r_bram_in,
+    output wire [31:0]                rw_r_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr,
+    input wire                        rw_t_bram_wr,
+    input wire [31:0]                 rw_t_bram_in,
+    output wire [31:0]                rw_t_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr,
+    output wire [31:0]                ro_coeff_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr,
+    output wire [31:0]                ro_mm_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr,
+    output wire [31:0]                ro_nn_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr,
+    output wire [31:0]                ro_r_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr,
+    output wire [31:0]                ro_t_bram_out
+    );
+
+
+   //
+   // Montgomery Coefficient
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_coeff
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_coeff_bram_addr),
+      .a_wr             (rw_coeff_bram_wr),
+      .a_in             (rw_coeff_bram_in),
+      .a_out            (rw_coeff_bram_out),
+
+      .b_addr           (ro_coeff_bram_addr),
+      .b_out            (ro_coeff_bram_out)
+      );
+
+
+   //
+   // Powers of Message
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_mm
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_mm_bram_addr),
+      .a_wr             (rw_mm_bram_wr),
+      .a_in             (rw_mm_bram_in),
+      .a_out            (rw_mm_bram_out),
+
+      .b_addr           (ro_mm_bram_addr),
+      .b_out            (ro_mm_bram_out)
+      );
+
+
+   //
+   // Extended Modulus
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_nn
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_nn_bram_addr),
+      .a_wr             (rw_nn_bram_wr),
+      .a_in             (rw_nn_bram_in),
+      .a_out            (),
+
+      .b_addr           (ro_nn_bram_addr),
+      .b_out            (ro_nn_bram_out)
+      );
+
+
+   //
+   // Output
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_y
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_y_bram_addr),
+      .a_wr             (rw_y_bram_wr),
+      .a_in             (rw_y_bram_in),
+      .a_out            (rw_y_bram_out),
+
+      .b_addr           ({(OPERAND_ADDR_WIDTH+1){1'b0}}),
+      .b_out            ()
+      );
+
+
+   //
+   // Result of Multiplication
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_r
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_r_bram_addr),
+      .a_wr             (rw_r_bram_wr),
+      .a_in             (rw_r_bram_in),
+      .a_out            (rw_r_bram_out),
+
+      .b_addr           (ro_r_bram_addr),
+      .b_out            (ro_r_bram_out)
+      );
+
+
+   //
+   // Temporary Buffer
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_t
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_t_bram_addr),
+      .a_wr             (rw_t_bram_wr),
+      .a_in             (rw_t_bram_in),
+      .a_out            (rw_t_bram_out),
+
+      .b_addr           (ro_t_bram_addr),
+      .b_out            (ro_t_bram_out)
+      );
+
+
+endmodule
diff --git a/rtl/modexpa7_buffer_user.v b/rtl/modexpa7_buffer_user.v
new file mode 100644
index 0000000..abb772b
--- /dev/null
+++ b/rtl/modexpa7_buffer_user.v
@@ -0,0 +1,197 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_buffer_user
+  #(parameter OPERAND_ADDR_WIDTH        =  5)     // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                          clk,
+
+    input wire                          bus_cs,
+    input wire                          bus_we,
+    input wire [ADDR_WIDTH_TOTAL-1:0]   bus_addr,
+    input wire [31:0]                   bus_data_wr,
+    output wire [31:0]                  bus_data_rd,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr,
+    output wire [31:0]                  ro_modulus_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr,
+    output wire [31:0]                  ro_message_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr,
+    output wire [31:0]                  ro_exponent_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr,
+    input wire                          rw_result_bram_wr,
+    input wire [31:0]                   rw_result_bram_in
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   ADDR_WIDTH_TOTAL                = OPERAND_ADDR_WIDTH + 2;
+
+   localparam   [1: 0]  BUS_ADDR_BANK_MODULUS   = 2'b00;
+   localparam   [1: 0]  BUS_ADDR_BANK_MESSAGE   = 2'b01;
+   localparam   [1: 0]  BUS_ADDR_BANK_EXPONENT  = 2'b10;
+   localparam   [1: 0]  BUS_ADDR_BANK_RESULT    = 2'b11;
+
+   //
+   // Address Decoder
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0]        bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
+   wire [                  1:0]         bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2];
+
+
+   //
+   // Modulus Memory
+   //
+   wire [31: 0]                         bus_data_rd_modulus;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_modulus
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_modulus),
+
+      .b_addr           (ro_modulus_bram_addr),
+      .b_out            (ro_modulus_bram_out)
+      );
+
+
+   //
+   // Message Memory
+   //
+   wire [31: 0]         bus_data_rd_message;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_message
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_message),
+
+      .b_addr           (ro_message_bram_addr),
+      .b_out            (ro_message_bram_out)
+      );
+
+
+   //
+   // Exponent Memory
+   //
+   wire [31: 0]         bus_data_rd_exponent;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_exponent
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_exponent),
+
+      .b_addr           (ro_exponent_bram_addr),
+      .b_out            (ro_exponent_bram_out)
+      );
+
+
+   //
+   // Result Memory
+   //
+   wire [31: 0]         bus_data_rd_result;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_result
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_result_bram_addr),
+      .a_wr             (rw_result_bram_wr),
+      .a_in             (rw_result_bram_in),
+      .a_out            (),
+
+      .b_addr           (bus_addr_operand_word),
+      .b_out            (bus_data_rd_result)
+      );
+
+
+   //
+   // Output Selector
+   //
+   reg [1: 0]           bus_addr_operand_bank_prev;
+   always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank;
+
+   reg [31: 0]          bus_data_rd_mux;
+   assign bus_data_rd = bus_data_rd_mux;
+
+   always @(*)
+     //
+     case (bus_addr_operand_bank_prev)
+       //
+       BUS_ADDR_BANK_MODULUS:   bus_data_rd_mux = bus_data_rd_modulus;
+       BUS_ADDR_BANK_MESSAGE:   bus_data_rd_mux = bus_data_rd_message;
+       BUS_ADDR_BANK_EXPONENT:  bus_data_rd_mux = bus_data_rd_exponent;
+       BUS_ADDR_BANK_RESULT:    bus_data_rd_mux = bus_data_rd_result;
+       //
+       default:                 bus_data_rd_mux = {32{1'bX}};
+       //
+     endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_modinv32.v b/rtl/modexpa7_modinv32.v
new file mode 100644
index 0000000..cb47746
--- /dev/null
+++ b/rtl/modexpa7_modinv32.v
@@ -0,0 +1,141 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_modinv32
+  (
+   input wire          clk,
+
+   input wire          ena,
+   output wire         rdy,
+
+   input wire [31: 0]  n0,
+   output wire [31: 0] n0_modinv
+   );
+
+
+   //
+   // Trigger
+   //
+   reg                 ena_dly = 1'b0;
+   wire                ena_trig = ena && !ena_dly;
+   always @(posedge clk) ena_dly <= ena;
+
+
+   //
+   // Ready Register
+   //
+   reg                 rdy_reg = 1'b0;
+   assign rdy = rdy_reg;
+
+
+   //
+   // Counter
+   //
+   reg [7: 0]          cnt = 8'd0;
+   wire [7: 0]         cnt_zero = 8'd0;
+   wire [7: 0]         cnt_last = 8'd132;
+   wire [7: 0]         cnt_next = cnt + 1'b1;
+   wire [1: 0]         cnt_phase = cnt[1:0];
+   wire [5: 0]         cnt_cycle = cnt[7:2];
+
+   always @(posedge clk)
+     //
+     if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero;
+     else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (cnt == cnt_last) rdy_reg <= 1'b1;
+     else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0;
+
+
+   //
+   // Output Register
+   //
+   reg [31: 0]         n0_modinv_reg;
+   assign n0_modinv = n0_modinv_reg;
+
+
+   //
+   // Multiplier
+   //
+	(* KEEP="TRUE" *)
+   wire [63: 0]        multiplier_out;
+   wire [31: 0]        multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1};
+
+   dsp_multiplier_a7 dsp_multiplier
+     (
+      .clk              (clk),
+      .a                (n0),
+      .b                (n0_modinv_reg),
+      .p                (multiplier_out)
+      );
+
+
+   //
+   // Mask and Power
+   //
+   reg [30: 0]         mask_reg;
+   reg [31: 0]         power_reg;
+
+   always @(posedge clk)
+     //
+     if (cnt_phase == 2'd1) begin
+        //
+        if (cnt_cycle == 6'd0) begin
+           //
+           mask_reg <= 31'd0;
+           power_reg <= 32'd1;
+           //
+           n0_modinv_reg <= 32'd0;
+           //
+        end else begin
+           //
+           mask_reg <= { mask_reg[29:0], 1'b1};
+           power_reg <= {power_reg[30:0], 1'b0};
+           //
+           if (multiplier_out_masked != 32'd1)
+             //
+             n0_modinv_reg <= n0_modinv_reg + power_reg;
+           //
+        end
+        //
+     end
+
+
+endmodule
diff --git a/rtl/modexpa7_montgomery_coeff.v b/rtl/modexpa7_montgomery_coeff.v
new file mode 100644
index 0000000..009fd99
--- /dev/null
+++ b/rtl/modexpa7_montgomery_coeff.v
@@ -0,0 +1,425 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_montgomery_coeff
+  #(parameter MODULUS_NUM_BITS          = 11,           // 1024 -> 11 bits
+    parameter OPERAND_ADDR_WIDTH        =  5)           // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                           clk,
+
+    input wire                           ena,
+    output wire                          rdy,
+
+    input wire [MODULUS_NUM_BITS-1:0]    modulus_width,
+
+    output wire [OPERAND_ADDR_WIDTH :0]  coeff_bram_addr,
+    output wire                          coeff_bram_wr,
+    output wire [31:0]                   coeff_bram_in,
+    input wire [31:0]                    coeff_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0]  nn_bram_addr,
+    output wire                          nn_bram_wr,
+    output wire [31:0]                   nn_bram_in,
+
+    output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr,
+    input wire [31:0]                    modulus_bram_out,
+
+    output wire [31:0]                   modinv_n0,
+    output wire                          modinv_ena,
+    input wire                           modinv_rdy
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   [  MODULUS_NUM_BITS  :0] round_count_zero       = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_zero   = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT            = 6'd10;
+
+   localparam FSM_STATE_SHIFT_READ      = 6'd21;
+   localparam FSM_STATE_SHIFT_WRITE     = 6'd22;
+
+   localparam FSM_STATE_COMPARE_READ    = 6'd31;
+   localparam FSM_STATE_COMPARE_COMPARE = 6'd32;
+
+   localparam FSM_STATE_SUBTRACT_READ   = 6'd41;
+   localparam FSM_STATE_SUBTRACT_WRITE  = 6'd42;
+
+   localparam FSM_STATE_ROUND           = 6'd50;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                            fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                   ena_dly = 1'b0;
+
+   wire                                  ena_trig = ena && !ena_dly;
+
+   always @(posedge clk) ena_dly <= ena;
+
+
+   //
+   // Ready Register
+   //
+   reg                                   rdy_reg = 1'b0;
+
+   assign rdy = rdy_reg;
+
+
+   //
+   // ModInv Control
+   //
+   reg                                   modinv_ena_reg = 1'b0;
+   reg [31: 0]                           modinv_n0_reg;
+
+   assign modinv_ena = modinv_ena_reg;
+   assign modinv_n0 = modinv_n0_reg;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        if (modinv_rdy) rdy_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (rdy_reg && !ena) rdy_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // Flags
+   //
+   reg  reg_shift_carry = 1'b0;
+   reg  reg_subtractor_borrow = 1'b0;
+
+
+   //
+   // Round Counter
+   //
+   reg [MODULUS_NUM_BITS:0] round_count         = round_count_zero;
+   wire [MODULUS_NUM_BITS:0] round_count_last   = {modulus_width, 1'b0} + 6'd63;
+   wire [MODULUS_NUM_BITS:0] round_count_next   = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Modulus BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero;
+
+   assign modulus_bram_addr = modulus_bram_addr_reg;
+
+
+   //
+   // Coeff BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   coeff_bram_addr_reg     = coeff_bram_addr_zero;
+   reg                          coeff_bram_wr_reg               = 1'b0;
+
+   assign coeff_bram_addr = coeff_bram_addr_reg;
+   assign coeff_bram_wr = coeff_bram_wr_reg;
+
+
+   //
+   // NN BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   nn_bram_addr_reg        = coeff_bram_addr_zero;
+   reg                          nn_bram_wr_reg          = 1'b0;
+
+   assign nn_bram_addr = nn_bram_addr_reg;
+   assign nn_bram_wr = nn_bram_wr_reg;
+
+
+   //
+   // Hardware Subtractor
+   //
+   wire [31: 0]                 subtractor_out;
+   wire                         subtractor_out_nonzero = |subtractor_out;
+   wire                         subtractor_borrow_out;
+   wire                         subtractor_borrow_in;
+
+   assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow;
+
+   dsp_subtractor_a7 dsp_subtractor
+     (
+      .a                (coeff_bram_out),
+      .b                (modulus_bram_out),
+      .s                (subtractor_out),
+      .c_in             (subtractor_borrow_in),
+      .c_out            (subtractor_borrow_out)
+      );
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb                                              = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_last           = {modulus_width_msb, 1'b0};
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_next_or_zero   = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero;
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_next_or_last   = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last;
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_prev_or_zero   = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero;
+
+   wire [OPERAND_ADDR_WIDTH  :0] modulus_bram_addr_last_ext     = coeff_bram_addr_last - 1'b1;
+
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last         = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero;
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero;
+
+
+   //
+   // Coeff BRAM Input Logic
+   //
+   reg [31: 0]                   coeff_bram_in_mux;
+
+   assign coeff_bram_in = coeff_bram_in_mux;
+
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001;
+         else coeff_bram_in_mux = 32'h00000000;
+
+       FSM_STATE_SHIFT_WRITE:
+         //
+         coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry};
+
+       FSM_STATE_SUBTRACT_WRITE:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000;
+         else coeff_bram_in_mux = subtractor_out;
+
+       default:
+         //
+         coeff_bram_in_mux = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // NN BRAM Input Logic
+   //
+   reg [31: 0]                   nn_bram_in_mux;
+
+   assign nn_bram_in = nn_bram_in_mux;
+
+   always @(*)
+                                  //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}};
+         else nn_bram_in_mux = modulus_bram_out;
+
+         default:
+           //
+           nn_bram_in_mux       = {32{1'bX}};
+
+      endcase
+
+
+   //
+   // Comparison Functions
+   //
+   reg                           compare_greater_or_equal;
+   reg                           compare_less_than;
+
+   wire                          compare_done = compare_greater_or_equal | compare_less_than;
+
+   always @(*)
+                                  //
+     if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0];
+   //
+     else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out;
+   //
+     else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero;
+
+   always @(*)
+                              //
+     if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0;
+   //
+     else compare_less_than = subtractor_borrow_out;
+
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT: begin
+          //
+          coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+          //
+          nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+          nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+          //
+          if (!coeff_bram_wr_reg) begin
+             //
+             modinv_ena_reg <= 1'b1;
+             modinv_n0_reg <= modulus_bram_out;
+             //
+          end
+          //
+          if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin
+             //
+             if (!coeff_bram_wr_reg)
+               //
+               modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+             //
+          end else begin
+             //
+             modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+             //
+          end
+          //
+       end
+
+       FSM_STATE_SHIFT_READ: begin
+          //
+          coeff_bram_wr_reg <= 1'b1;
+          //
+          if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+            //
+            reg_shift_carry <= 1'b0;
+          //
+       end
+
+       FSM_STATE_SHIFT_WRITE: begin
+          //
+          coeff_bram_wr_reg <= 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_addr_next_or_last;
+          //
+          reg_shift_carry <= coeff_bram_out[31];
+          //
+       end
+
+       FSM_STATE_COMPARE_COMPARE: begin
+          //
+          coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero;
+          //
+          modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero);
+          //
+       end
+
+       FSM_STATE_SUBTRACT_READ: begin
+          //
+          coeff_bram_wr_reg <= 1'b1;
+          //
+          if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+            //
+            reg_subtractor_borrow <= 1'b0;
+          //
+       end
+
+       FSM_STATE_SUBTRACT_WRITE: begin
+          //
+          coeff_bram_wr_reg <= 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero;
+          //
+          modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero;
+          //
+          reg_subtractor_borrow <= subtractor_borrow_out;
+          //
+       end
+
+       FSM_STATE_ROUND: begin
+          //
+          round_count <= round_count_next;
+          //
+       end
+
+       FSM_STATE_FINAL: begin
+          //
+          if (modinv_rdy) modinv_ena_reg <= 1'b0;
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_IDLE:          fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+       FSM_STATE_SHIFT_READ:    fsm_state <= FSM_STATE_SHIFT_WRITE;
+       FSM_STATE_COMPARE_READ:  fsm_state <= FSM_STATE_COMPARE_COMPARE;
+       FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE;
+
+       FSM_STATE_INIT:          fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT          : FSM_STATE_SHIFT_READ;
+       FSM_STATE_SHIFT_WRITE:   fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ    : FSM_STATE_COMPARE_READ;
+       FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND;
+
+       FSM_STATE_ROUND:         fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL;
+
+       FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ;
+
+       FSM_STATE_FINAL:         fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL;
+
+       default:                 fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_montgomery_multiplier.v b/rtl/modexpa7_montgomery_multiplier.v
new file mode 100644
index 0000000..6637231
--- /dev/null
+++ b/rtl/modexpa7_montgomery_multiplier.v
@@ -0,0 +1,408 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_montgomery_multiplier
+  #(parameter OPERAND_NUM_BITS          = 11,           // 1024 -> 11 bits
+    parameter OPERAND_ADDR_WIDTH        =  5)           // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                          clk,
+
+    input wire                          ena,
+    output wire                         rdy,
+
+    input wire [OPERAND_NUM_BITS-1:0]   operand_width,
+
+    output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr,
+    input wire [31:0]                   x_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr,
+    input wire [31:0]                   y_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr,
+    input wire [31:0]                   n_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr,
+    output wire                         z_bram_wr,
+    output wire [31:0]                  z_bram_in,
+    input wire [31:0]                   z_bram_out,
+
+    input wire [31:0]                   n0_modinv
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   [OPERAND_ADDR_WIDTH:0]  round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH:0]  bram_addr_zero   = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT            = 6'd10;
+
+   localparam FSM_STATE_MUL_XY_CALC     = 6'd21;
+   localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22;
+   localparam FSM_STATE_MUL_XY_REGISTER = 6'd23;
+   localparam FSM_STATE_MUL_XY_WRITE    = 6'd24;
+
+   localparam FSM_STATE_MAGIC_CALC      = 6'd31;
+   localparam FSM_STATE_MAGIC_PIPELINE  = 6'd32;
+   localparam FSM_STATE_MAGIC_REGISTER  = 6'd33;
+
+   localparam FSM_STATE_MUL_MN_CALC     = 6'd41;
+   localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42;
+   localparam FSM_STATE_MUL_MN_REGISTER = 6'd43;
+   localparam FSM_STATE_MUL_MN_WRITE    = 6'd44;
+
+   localparam FSM_STATE_SHIFT           = 6'd50;
+
+   localparam FSM_STATE_ROUND           = 6'd55;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                           fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                  ena_dly = 1'b0;
+   always @(posedge clk) ena_dly <= ena;
+   wire                                 ena_trig = (ena == 1'b1) && (ena_dly == 1'b0);
+
+
+   //
+   // Ready Register
+   //
+   reg                                  rdy_reg = 1'b0;
+   assign rdy = rdy_reg;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        rdy_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (rdy_reg && !ena) rdy_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // X, Y, N BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   x_bram_addr_reg = bram_addr_zero;
+   reg [OPERAND_ADDR_WIDTH:0]   y_bram_addr_reg = bram_addr_zero;
+   reg [OPERAND_ADDR_WIDTH:0]   n_bram_addr_reg = bram_addr_zero;
+
+   assign x_bram_addr = x_bram_addr_reg;
+   assign y_bram_addr = y_bram_addr_reg;
+   assign n_bram_addr = n_bram_addr_reg;
+
+
+   //
+   // Z BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   z_bram_addr_reg = bram_addr_zero;
+   reg                          z_bram_wr_reg           = 1'b0;
+   reg [                31:0]   z_bram_in_mux;
+
+   assign z_bram_addr = z_bram_addr_reg;
+   assign z_bram_wr = z_bram_wr_reg;
+   assign z_bram_in = z_bram_in_mux;
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_addr_last = {operand_width_msb, 1'b1};    // +1
+
+
+   //
+   // Hardware Multiplier (X * Y)
+   //
+   reg [31: 0]                   multiplier_xy_carry_in;
+   wire [31: 0]                  multiplier_xy_out;
+   wire [31: 0]                  multiplier_xy_carry_out;
+
+   modexpa7_adder64_carry32 dsp_multiplier_xy
+     (
+      .clk      (clk),
+      .t        (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/),
+      .x        (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/),
+      .y        (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/),
+      .s        (multiplier_xy_out),
+      .c_in     (multiplier_xy_carry_in),
+      .c_out    (multiplier_xy_carry_out)
+      );
+
+
+   //
+   // Hardware Multiplier (Magic)
+   //
+	(* KEEP="TRUE" *)
+   wire [63: 0]                  multiplier_magic_out;
+   reg [31: 0]                   magic_value_reg;
+
+   dsp_multiplier_a7 dsp_multiplier_magic
+     (
+      .clk      (clk),
+      .a        (z_bram_out),
+      .b        (n0_modinv),
+      .p        (multiplier_magic_out)
+      );
+
+
+   //
+   // Hardware Multiplier (M * N)
+   //
+   reg [31: 0]                   multiplier_mn_carry_in;
+   wire [31: 0]                  multiplier_mn_out;
+   wire [31: 0]                  multiplier_mn_carry_out;
+
+   modexpa7_adder64_carry32 dsp_multiplier_mn
+     (
+      .clk      (clk),
+      .t        (z_bram_out),
+      .x        (magic_value_reg),
+      .y        (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/),
+      .s        (multiplier_mn_out),
+      .c_in     (multiplier_mn_carry_in),
+      .c_out    (multiplier_mn_carry_out)
+      );
+
+
+   //
+   // Z BRAM Input Selector
+   //
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         z_bram_in_mux  = {32{1'b0}};
+
+       FSM_STATE_MUL_XY_WRITE:
+         //
+         if (z_bram_addr_reg < bram_addr_last)  z_bram_in_mux   = multiplier_xy_out;
+         else                                                                                           z_bram_in_mux   = multiplier_xy_carry_in;
+
+       FSM_STATE_MUL_MN_WRITE:
+         //
+         if (z_bram_addr_reg < bram_addr_last)  z_bram_in_mux   = multiplier_mn_out;
+         else                                                                                           z_bram_in_mux   = multiplier_mn_carry_in + z_bram_out;
+
+       FSM_STATE_SHIFT:
+         //
+         z_bram_in_mux  = z_bram_out;
+
+       default:
+         //
+         z_bram_in_mux  = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Handy Functions
+   //
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_next_or_zero;
+      input [OPERAND_ADDR_WIDTH:0] bram_addr;
+      begin
+         bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_next_or_last;
+      input     [OPERAND_ADDR_WIDTH:0]  bram_addr;
+      begin
+         bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_prev_or_zero;
+      input     [OPERAND_ADDR_WIDTH:0]  bram_addr;
+      begin
+         bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero;
+      end
+   endfunction
+
+
+   //
+   // Round Counter
+   //
+   reg  [OPERAND_ADDR_WIDTH:0]  round_count                     = round_count_zero;
+   wire [OPERAND_ADDR_WIDTH:0]  round_count_last        = {operand_width_msb, 1'b0};
+   wire [OPERAND_ADDR_WIDTH:0]  round_count_next        = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT: begin
+          //
+          z_bram_wr_reg         <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0;
+          z_bram_addr_reg       <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero;
+          //
+       end
+
+       FSM_STATE_MUL_XY_CALC: begin
+          //
+          if (z_bram_addr_reg == bram_addr_zero) begin
+             //
+             multiplier_xy_carry_in <= {32{1'b0}};
+          //
+       end
+          //
+       end
+
+       FSM_STATE_MUL_XY_REGISTER: begin
+          //
+          z_bram_wr_reg <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MUL_XY_WRITE: begin
+          //
+          z_bram_wr_reg         <= 1'b0;
+          z_bram_addr_reg       <= bram_addr_next_or_zero(z_bram_addr_reg);
+          //
+          x_bram_addr_reg       <= bram_addr_next_or_zero(x_bram_addr_reg);
+          //
+          multiplier_xy_carry_in <= multiplier_xy_carry_out;
+          //
+       end
+
+       FSM_STATE_MUL_MN_CALC: begin
+          //
+          if (z_bram_addr_reg == bram_addr_zero) begin
+             //
+             multiplier_mn_carry_in <= {32{1'b0}};
+          //
+          magic_value_reg <= multiplier_magic_out[31:0];
+          //
+       end
+          //
+       end
+
+       FSM_STATE_MUL_MN_REGISTER: begin
+          //
+          z_bram_wr_reg <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MUL_MN_WRITE: begin
+          //
+          z_bram_wr_reg         <= 1'b0;
+          z_bram_addr_reg       <= bram_addr_next_or_last(z_bram_addr_reg);
+          //
+          n_bram_addr_reg       <= bram_addr_next_or_zero(n_bram_addr_reg);
+          //
+          multiplier_mn_carry_in <= multiplier_mn_carry_out;
+          //
+       end
+
+       FSM_STATE_SHIFT: begin
+          //
+          if (z_bram_wr_reg == 1'b0)                                                    z_bram_wr_reg <= 1'b1;
+          else if (z_bram_addr_reg == bram_addr_zero)   z_bram_wr_reg <= 1'b0;
+
+          z_bram_addr_reg       <= bram_addr_prev_or_zero(z_bram_addr_reg);
+          //
+       end
+
+       FSM_STATE_ROUND: begin
+          //
+          y_bram_addr_reg       <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero;
+          //
+          round_count <= round_count_next;
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+       //
+       FSM_STATE_IDLE:            fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+       FSM_STATE_INIT:            fsm_state <= (z_bram_addr < bram_addr_last  ) ? FSM_STATE_INIT        : FSM_STATE_MUL_XY_CALC;
+       FSM_STATE_ROUND:           fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL;
+
+       FSM_STATE_MUL_XY_CALC:     fsm_state <= FSM_STATE_MUL_XY_PIPELINE;
+       FSM_STATE_MAGIC_CALC:      fsm_state <= FSM_STATE_MAGIC_PIPELINE;
+       FSM_STATE_MUL_MN_CALC:     fsm_state <= FSM_STATE_MUL_MN_PIPELINE;
+
+       FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER;
+       FSM_STATE_MAGIC_PIPELINE:  fsm_state <= FSM_STATE_MAGIC_REGISTER;
+       FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER;
+
+       FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE;
+       FSM_STATE_MAGIC_REGISTER:  fsm_state <= FSM_STATE_MUL_MN_CALC;
+       FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE;
+
+       FSM_STATE_MUL_XY_WRITE:    fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC;
+       FSM_STATE_MUL_MN_WRITE:    fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT;
+       FSM_STATE_SHIFT:           fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT       : FSM_STATE_ROUND;
+
+       FSM_STATE_FINAL:           fsm_state <= FSM_STATE_IDLE;
+
+       default:                   fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_top.v b/rtl/modexpa7_top.v
new file mode 100644
index 0000000..6c5a922
--- /dev/null
+++ b/rtl/modexpa7_top.v
@@ -0,0 +1,706 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_top
+  #(parameter MAX_MODULUS_WIDTH = 1024)
+  (
+   input wire                        clk,
+
+   input wire                        init,
+   output wire                       ready,
+
+   input wire                        next,
+   output wire                       valid,
+
+   input wire [MODULUS_NUM_BITS-1:0] modulus_width,
+   input wire [MODULUS_NUM_BITS-1:0] exponent_width,
+
+   input wire                        fast_public_mode,
+
+   input wire                        bus_cs,
+   input wire                        bus_we,
+   input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr,
+   input wire [31:0]                 bus_data_wr,
+   output wire [31:0]                bus_data_rd
+   );
+
+
+   //
+   // modexpa7_clog2()
+   //
+   function     integer modexpa7_clog2;
+      input     integer              value;
+      integer                        ret;
+      begin
+         value = value - 1;
+         for (ret = 0; value > 0; ret = ret + 1)
+           value = value >> 1;
+         modexpa7_clog2 = ret;
+      end
+   endfunction
+
+
+   //
+   // Locals
+   //
+   localparam   OPERAND_ADDR_WIDTH = modexpa7_clog2(MAX_MODULUS_WIDTH / 32);
+   localparam   MODULUS_NUM_BITS   = modexpa7_clog2(MAX_MODULUS_WIDTH + 1);
+   localparam   ADDR_WIDTH_TOTAL   = OPERAND_ADDR_WIDTH + 2;
+
+   localparam   [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+   localparam   [OPERAND_ADDR_WIDTH  :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+   localparam   [   MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+
+
+   //
+   // User Memory
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0]        ro_modulus_bram_addr;
+   wire [                 31:0]         ro_modulus_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         ro_message_bram_addr    = bram_user_addr_zero;
+   wire [                 31:0]         ro_message_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         ro_exponent_bram_addr   = bram_user_addr_zero;
+   wire [                 31:0]         ro_exponent_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         rw_result_bram_addr     = bram_user_addr_zero;
+   wire [                 31:0]         rw_result_bram_out;
+   reg                                  rw_result_bram_wr       = 1'b0;
+   wire [                 31:0]         rw_result_bram_in;
+
+   modexpa7_buffer_user #
+     (
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   mem_user
+     (
+      .clk                      (clk),
+
+      .bus_cs                   (bus_cs),
+      .bus_we                   (bus_we),
+      .bus_addr                 (bus_addr),
+      .bus_data_wr              (bus_data_wr),
+      .bus_data_rd              (bus_data_rd),
+
+      .ro_modulus_bram_addr     (ro_modulus_bram_addr),
+      .ro_modulus_bram_out      (ro_modulus_bram_out),
+
+      .ro_message_bram_addr     (ro_message_bram_addr),
+      .ro_message_bram_out      (ro_message_bram_out),
+
+      .ro_exponent_bram_addr    (ro_exponent_bram_addr),
+      .ro_exponent_bram_out     (ro_exponent_bram_out),
+
+      .rw_result_bram_addr      (rw_result_bram_addr),
+      .rw_result_bram_wr        (rw_result_bram_wr),
+      .rw_result_bram_in        (rw_result_bram_in)
+      );
+
+
+   //
+   // Core (Internal) Memory
+   //
+   wire [OPERAND_ADDR_WIDTH:0]          rw_coeff_bram_addr;
+   wire                                 rw_coeff_bram_wr;
+   wire [               31:0]           rw_coeff_bram_in;
+   wire [               31:0]           rw_coeff_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_mm_bram_addr         = bram_core_addr_zero;
+   reg                                  rw_mm_bram_wr           = 1'b0;
+   reg [               31:0]            rw_mm_bram_in;
+   wire [               31:0]           rw_mm_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          rw_nn_bram_addr;
+   wire                                 rw_nn_bram_wr;
+   wire [               31:0]           rw_nn_bram_in;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_y_bram_addr          = bram_core_addr_zero;
+   reg                                  rw_y_bram_wr            = 1'b0;
+   reg [              31:0]             rw_y_bram_in;
+   wire [              31:0]            rw_y_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          rw_r_bram_addr;
+   wire                                 rw_r_bram_wr;
+   wire [              31:0]            rw_r_bram_in;
+   wire [              31:0]            rw_r_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_t_bram_addr          = bram_core_addr_zero;
+   reg                                  rw_t_bram_wr            = 1'b0;
+   reg [              31:0]             rw_t_bram_in;
+   wire [              31:0]            rw_t_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           ro_coeff_bram_addr      = bram_core_addr_zero;
+   wire [               31:0]           ro_coeff_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_mm_bram_addr;
+   wire [               31:0]           ro_mm_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_nn_bram_addr;
+   wire [               31:0]           ro_nn_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           ro_r_bram_addr          = bram_core_addr_zero;
+   wire [               31:0]           ro_r_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_t_bram_addr;
+   wire [              31:0]            ro_t_bram_out;
+
+   modexpa7_buffer_core #
+     (
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   mem_core
+     (
+      .clk                      (clk),
+
+      .rw_coeff_bram_addr       (rw_coeff_bram_addr),
+      .rw_coeff_bram_wr         (rw_coeff_bram_wr),
+      .rw_coeff_bram_in         (rw_coeff_bram_in),
+      .rw_coeff_bram_out        (rw_coeff_bram_out),
+
+      .rw_mm_bram_addr          (rw_mm_bram_addr),
+      .rw_mm_bram_wr            (rw_mm_bram_wr),
+      .rw_mm_bram_in            (rw_mm_bram_in),
+      .rw_mm_bram_out           (rw_mm_bram_out),
+
+      .rw_nn_bram_addr          (rw_nn_bram_addr),
+      .rw_nn_bram_wr            (rw_nn_bram_wr),
+      .rw_nn_bram_in            (rw_nn_bram_in),
+
+      .rw_y_bram_addr           (rw_y_bram_addr),
+      .rw_y_bram_wr             (rw_y_bram_wr),
+      .rw_y_bram_in             (rw_y_bram_in),
+      .rw_y_bram_out            (rw_y_bram_out),
+
+      .rw_r_bram_addr           (rw_r_bram_addr),
+      .rw_r_bram_wr             (rw_r_bram_wr),
+      .rw_r_bram_in             (rw_r_bram_in),
+      .rw_r_bram_out            (rw_r_bram_out),
+
+      .rw_t_bram_addr           (rw_t_bram_addr),
+      .rw_t_bram_wr             (rw_t_bram_wr),
+      .rw_t_bram_in             (rw_t_bram_in),
+      .rw_t_bram_out            (rw_t_bram_out),
+
+      .ro_coeff_bram_addr       (ro_coeff_bram_addr),
+      .ro_coeff_bram_out        (ro_coeff_bram_out),
+
+      .ro_mm_bram_addr          (ro_mm_bram_addr),
+      .ro_mm_bram_out           (ro_mm_bram_out),
+
+      .ro_nn_bram_addr          (ro_nn_bram_addr),
+      .ro_nn_bram_out           (ro_nn_bram_out),
+
+      .ro_r_bram_addr           (ro_r_bram_addr),
+      .ro_r_bram_out            (ro_r_bram_out),
+
+      .ro_t_bram_addr           (ro_t_bram_addr),
+      .ro_t_bram_out            (ro_t_bram_out)
+      );
+
+
+   //
+   // Small 32-bit ModInv Core
+   //
+   wire                                 modinv_ena;
+   wire                                 modinv_rdy;
+
+   wire [31: 0]                         modinv_n0;
+   wire [31: 0]                         modinv_n0_negative = ~modinv_n0 + 1'b1;
+   wire [31: 0]                         modinv_n0_modinv;
+
+   modexpa7_modinv32 core_modinv32
+     (
+      .clk                      (clk),
+
+      .ena                      (modinv_ena),
+      .rdy                      (modinv_rdy),
+
+      .n0                       (modinv_n0_negative),
+      .n0_modinv                (modinv_n0_modinv)
+      );
+
+
+   //
+   // Montgomery Coefficient Calculator
+   //
+   modexpa7_montgomery_coeff #
+     (
+      .MODULUS_NUM_BITS         (MODULUS_NUM_BITS),
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   core_montgomery_coeff
+     (
+      .clk                      (clk),
+
+      .ena                      (init),
+      .rdy                      (ready),
+
+      .modulus_width            (modulus_width),
+
+      .coeff_bram_addr          (rw_coeff_bram_addr),
+      .coeff_bram_wr            (rw_coeff_bram_wr),
+      .coeff_bram_in            (rw_coeff_bram_in),
+      .coeff_bram_out           (rw_coeff_bram_out),
+
+      .nn_bram_addr             (rw_nn_bram_addr),
+      .nn_bram_wr               (rw_nn_bram_wr),
+      .nn_bram_in               (rw_nn_bram_in),
+
+      .modulus_bram_addr        (ro_modulus_bram_addr),
+      .modulus_bram_out         (ro_modulus_bram_out),
+
+      .modinv_n0                (modinv_n0),
+      .modinv_ena               (modinv_ena),
+      .modinv_rdy               (modinv_rdy)
+      );
+
+
+   //
+   // Montgomery Multiplier
+   //
+   reg                                  mul_ena = 1'b0;
+   wire                                 mul_rdy;
+
+   modexpa7_montgomery_multiplier #
+     (
+      .OPERAND_NUM_BITS         (MODULUS_NUM_BITS),
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   core_montgomery_multiplier
+     (
+      .clk                      (clk),
+
+      .ena                      (mul_ena),
+      .rdy                      (mul_rdy),
+
+      .operand_width            (modulus_width),
+
+      .x_bram_addr              (ro_t_bram_addr),
+      .x_bram_out               (ro_t_bram_out),
+
+      .y_bram_addr              (ro_mm_bram_addr),
+      .y_bram_out               (ro_mm_bram_out),
+
+      .n_bram_addr              (ro_nn_bram_addr),
+      .n_bram_out               (ro_nn_bram_out),
+
+      .z_bram_addr              (rw_r_bram_addr),
+      .z_bram_wr                (rw_r_bram_wr),
+      .z_bram_in                (rw_r_bram_in),
+      .z_bram_out               (rw_r_bram_out),
+
+      .n0_modinv                (modinv_n0_modinv)
+      );
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT_LOAD       = 6'd11;
+   localparam FSM_STATE_INIT_WAIT       = 6'd12;
+   localparam FSM_STATE_INIT_UNLOAD     = 6'd13;
+
+   localparam FSM_STATE_READ_EI         = 6'd20;
+
+   localparam FSM_STATE_ROUND_BEGIN     = 6'd25;
+
+   localparam FSM_STATE_MULTIPLY_LOAD   = 6'd31;
+   localparam FSM_STATE_MULTIPLY_WAIT   = 6'd32;
+   localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33;
+
+   localparam FSM_STATE_SQUARE_LOAD     = 6'd41;
+   localparam FSM_STATE_SQUARE_WAIT     = 6'd42;
+   localparam FSM_STATE_SQUARE_UNLOAD   = 6'd43;
+
+   localparam FSM_STATE_ROUND_END       = 6'd50;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                           fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                  next_dly = 1'b0;
+   always @(posedge clk) next_dly <= next;
+   wire                                 next_trig = (next == 1'b1) && (next_dly == 1'b0);
+
+
+   //
+   // Valid Register
+   //
+   reg                                  valid_reg = 1'b0;
+   assign valid = valid_reg;
+
+
+   //
+   // Next/ Valid Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        valid_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (valid_reg && !next) valid_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // Exponent Bit Counter
+   //
+   reg  [4: 0]  ei_bit_count = 5'd0;
+   wire         ei_bit = ro_exponent_bram_out[ei_bit_count];
+
+
+   //
+   // Round Counter
+   //
+   reg [MODULUS_NUM_BITS:0] round_count         = round_count_zero;
+   wire [MODULUS_NUM_BITS:0] round_count_last   = exponent_width - 1'b1;
+   wire [MODULUS_NUM_BITS:0] round_count_next   = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_core_addr_last = {modulus_width_msb, 1'b0};
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1;
+   wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+
+
+   //
+   // Handy Functions
+   //
+   function     [OPERAND_ADDR_WIDTH:0]  bram_core_addr_next_or_zero;
+      input [OPERAND_ADDR_WIDTH:0] bram_core_addr;
+      begin
+         bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH-1:0]        bram_user_addr_next_or_zero;
+      input     [OPERAND_ADDR_WIDTH-1:0]        bram_user_addr;
+      begin
+         bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero;
+      end
+   endfunction
+
+
+   //
+   // Result BRAM Input
+   //
+   assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out;
+
+
+   //
+   // MM BRAM Input Selector
+   //
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}};
+
+       FSM_STATE_INIT_UNLOAD:
+         //
+         rw_mm_bram_in = ro_r_bram_out;
+
+       FSM_STATE_SQUARE_UNLOAD:
+         //
+         rw_mm_bram_in = ro_r_bram_out;
+
+       default:
+         //
+         rw_mm_bram_in  = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Y BRAM Input Selector
+   //
+   always @(*)
+                         //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000;
+
+       FSM_STATE_MULTIPLY_UNLOAD:
+         //
+         rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW!
+
+       default:
+         //
+         rw_y_bram_in   = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // T BRAM Input Selector
+   //
+   always @(*)
+                        //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_t_bram_in = ro_coeff_bram_out;
+
+       FSM_STATE_MULTIPLY_LOAD:
+         //
+         rw_t_bram_in = rw_y_bram_out;
+
+       FSM_STATE_SQUARE_LOAD:
+         //
+         rw_t_bram_in = rw_mm_bram_out;
+
+       default:
+         //
+         rw_t_bram_in   = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD: begin
+          //
+          rw_mm_bram_wr         <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          rw_y_bram_wr          <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          rw_t_bram_wr          <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr       <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          rw_y_bram_addr        <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          rw_t_bram_addr        <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+          else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+          //
+          if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr);
+          else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr);
+          //
+       end
+
+       FSM_STATE_INIT_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_INIT_UNLOAD: begin
+          //
+          rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+          else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+          //
+       end
+
+       FSM_STATE_MULTIPLY_LOAD: begin
+          //
+          rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+          //
+          if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr);
+          else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr);
+          //
+       end
+
+       FSM_STATE_MULTIPLY_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MULTIPLY_UNLOAD: begin
+          //
+          rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero;
+          //
+          if (ei_bit) begin
+             //
+             if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+             else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+             //
+          end else begin
+             //
+             if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr);
+             else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr);
+             //
+          end
+          //
+          if (round_count == round_count_last) begin
+             //
+             if (rw_result_bram_addr == bram_user_addr_zero) begin
+                //
+                if (rw_y_bram_wr) begin
+                   //
+                   rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1;
+                   rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr);
+                   //
+                end else begin
+                   //
+                   rw_result_bram_wr <= 1'b1;
+                   rw_result_bram_addr <= bram_user_addr_zero;
+                   //
+                end
+                //
+             end else begin
+                //
+                rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0;
+                rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr);
+                //
+             end
+             //
+          end
+          //
+       end
+
+       FSM_STATE_SQUARE_LOAD: begin
+          //
+          rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+          //
+          if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr);
+          else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr);
+          //
+       end
+
+       FSM_STATE_SQUARE_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_SQUARE_UNLOAD: begin
+          //
+          rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+          else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+          //
+       end
+
+       FSM_STATE_ROUND_END: begin
+          //
+          round_count <= round_count_next;
+          //
+          if (round_count < round_count_last) begin
+             //
+             ei_bit_count <= ei_bit_count + 1'b1;
+             //
+             if (ei_bit_count == 5'd31)
+               //
+               ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr);
+             //
+          end else begin
+             //
+             ei_bit_count <= 5'd0;
+             //
+             ro_exponent_bram_addr <= bram_user_addr_zero;
+             //
+          end
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_IDLE:          fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE;
+
+       FSM_STATE_INIT_LOAD:     fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT;
+       FSM_STATE_INIT_WAIT:     fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT;
+       FSM_STATE_INIT_UNLOAD:   fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI;
+
+       FSM_STATE_READ_EI:       fsm_state <= FSM_STATE_ROUND_BEGIN;
+
+       FSM_STATE_ROUND_BEGIN:   fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD;
+
+       FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT;
+       FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT;
+       FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD;
+
+       FSM_STATE_SQUARE_LOAD:   fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT;
+       FSM_STATE_SQUARE_WAIT:   fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT;
+       FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END;
+
+       FSM_STATE_ROUND_END:     fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL;
+
+       FSM_STATE_FINAL:         fsm_state <= FSM_STATE_IDLE;
+
+       default:                 fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/rtl/modexpa7_wrapper.v b/rtl/modexpa7_wrapper.v
new file mode 100644
index 0000000..271cb20
--- /dev/null
+++ b/rtl/modexpa7_wrapper.v
@@ -0,0 +1,211 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_wrapper
+  (
+   input wire          clk,
+   input wire          reset_n,
+
+   input wire          cs,
+   input wire          we,
+
+   input wire [9: 0]   address,
+   input wire [31: 0]  write_data,
+   output wire [31: 0] read_data
+   );
+
+
+   //
+   // Address Decoder
+   //
+   localparam ADDR_MSB_REGS     = 1'b0;
+   localparam ADDR_MSB_CORE     = 1'b1;
+   wire                address_msb = address[9];
+   wire [8: 0]         address_lsb = address[8:0];
+
+
+   //
+   // Output Mux
+   //
+   wire [31: 0]        read_data_regs;
+   wire [31: 0]        read_data_core;
+
+
+   //
+   // Registers
+   //
+   localparam ADDR_NAME0        = 9'h000;
+   localparam ADDR_NAME1        = 9'h001;
+   localparam ADDR_VERSION      = 9'h002;
+
+   localparam ADDR_CONTROL      = 9'h008;               // {next, init}
+   localparam ADDR_STATUS       = 9'h009;               // {valid, ready}
+   localparam ADDR_MODE         = 9'h010;               // 0 = slow secure, 1 = fast unsafe (public)
+   localparam ADDR_MODULUS_BITS = 9'h011;               //
+   localparam ADDR_EXPONENT_BITS = 9'h012;              //
+   localparam ADDR_GPIO_REG     = 9'h020;               //
+
+   localparam CONTROL_INIT_BIT  = 0;
+   localparam CONTROL_NEXT_BIT  = 1;
+
+   localparam STATUS_READY_BIT  = 0;
+   localparam STATUS_VALID_BIT  = 1;
+
+   localparam CORE_NAME0        = 32'h6D6F6465; // "mode"
+   localparam CORE_NAME1        = 32'h78706137; // "xpa7"
+   localparam CORE_VERSION      = 32'h302E3130; // "0.10"
+
+
+   //
+   // Registers
+   //
+   reg [1: 0]          reg_control;
+   reg                 reg_mode;
+   reg [12: 0]         reg_modulus_width;
+   reg [12: 0]         reg_exponent_width;
+   reg [31: 0]         reg_gpio;
+
+
+   //
+   // Wires
+   //
+   wire [1: 0]         reg_status;
+
+
+   //
+   // ModExpA7
+   //
+   modexpa7_top #
+     (
+      .MAX_MODULUS_WIDTH        (4096)
+      )
+   modexpa7_core
+     (
+      .clk                      (clk),
+
+      .init                     (reg_control[CONTROL_INIT_BIT]),
+      .ready                    (reg_status[STATUS_READY_BIT]),
+      .next                     (reg_control[CONTROL_NEXT_BIT]),
+      .valid                    (reg_status[STATUS_VALID_BIT]),
+
+      .modulus_width            (reg_modulus_width),
+      .exponent_width           (reg_exponent_width),
+
+      .fast_public_mode         (reg_mode),
+
+      .bus_cs                   (cs && (address_msb == ADDR_MSB_CORE)),
+      .bus_we                   (we),
+      .bus_addr                 (address_lsb),
+      .bus_data_wr              (write_data),
+      .bus_data_rd              (read_data_core)
+      );
+
+
+   //
+   // Read Latch
+   //
+   reg [31: 0]         tmp_read_data;
+
+
+   //
+   // Read/Write Interface
+   //
+   always @(posedge clk)
+     //
+     if (!reset_n) begin
+        //
+        reg_control             <= 2'b00;
+        reg_mode                <= 1'b0;
+        reg_modulus_width       <= 13'd1024;
+        reg_exponent_width      <= 13'd1024;
+        //
+     end else if (cs && (address_msb == ADDR_MSB_REGS)) begin
+        //
+        if (we) begin
+           //
+           // Write Handler
+           //
+           case (address_lsb)
+             //
+             ADDR_CONTROL:      reg_control             <= write_data[1: 0];
+             ADDR_MODE:         reg_mode                <= write_data[0];
+             ADDR_MODULUS_BITS: reg_modulus_width       <= write_data[12: 0];
+             ADDR_EXPONENT_BITS: reg_exponent_width     <= write_data[12: 0];
+             ADDR_GPIO_REG:     reg_gpio                <= write_data;
+             //
+           endcase
+           //
+        end else begin
+           //
+           // Read Handler
+           //
+           case (address)
+             //
+             ADDR_NAME0:        tmp_read_data <= CORE_NAME0;
+             ADDR_NAME1:        tmp_read_data <= CORE_NAME1;
+             ADDR_VERSION:      tmp_read_data <= CORE_VERSION;
+             ADDR_CONTROL:      tmp_read_data <= {{30{1'b0}}, reg_control};
+             ADDR_STATUS:       tmp_read_data <= {{30{1'b0}}, reg_status};
+             ADDR_MODE:         tmp_read_data <= {{31{1'b0}}, reg_mode};
+             ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width};
+             ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width};
+             ADDR_GPIO_REG:     tmp_read_data <= reg_gpio;
+             //
+             default:           tmp_read_data <= 32'h00000000;
+             //
+           endcase
+           //
+        end
+        //
+     end
+
+
+   //
+   // Register / Core Memory Selector
+   //
+   reg address_msb_last;
+   always @(posedge clk) address_msb_last = address_msb;
+
+   reg [31: 0] read_data_mux;
+   assign read_data = read_data_mux;
+
+   always @(*)
+     //
+     case (address_msb_last)
+       //
+       ADDR_MSB_REGS:           read_data_mux = tmp_read_data;
+       ADDR_MSB_CORE:           read_data_mux = read_data_core;
+       //
+     endcase
+
+
+endmodule
diff --git a/rtl/ram_1rw_1ro_readfirst.v b/rtl/ram_1rw_1ro_readfirst.v
new file mode 100644
index 0000000..25b708f
--- /dev/null
+++ b/rtl/ram_1rw_1ro_readfirst.v
@@ -0,0 +1,88 @@
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module ram_1rw_1ro_readfirst
+  #(parameter MEM_WIDTH            = 32,
+    parameter MEM_ADDR_BITS        = 8)
+   (
+    input wire                     clk,
+
+    input wire [MEM_ADDR_BITS-1:0] a_addr,
+    input wire                     a_wr,
+    input wire [MEM_WIDTH-1:0]     a_in,
+    output wire [MEM_WIDTH-1:0]    a_out,
+
+    input wire [MEM_ADDR_BITS-1:0] b_addr,
+    output wire [MEM_WIDTH-1:0]    b_out
+    );
+
+
+   //
+   // BRAM
+   //
+   (* RAM_STYLE="BLOCK" *)
+   reg [MEM_WIDTH-1:0]             bram[0:(2**MEM_ADDR_BITS)-1];
+
+
+   //
+   // Output Registers
+   //
+   reg [MEM_WIDTH-1:0]             bram_reg_a;
+   reg [MEM_WIDTH-1:0]             bram_reg_b;
+
+   assign a_out = bram_reg_a;
+   assign b_out = bram_reg_b;
+
+
+   //
+   // Read-Write Port A
+   //
+   always @(posedge clk) begin
+      //
+      bram_reg_a <= bram[a_addr];
+      //
+      if (a_wr) bram[a_addr] <= a_in;
+      //
+   end
+
+
+   //
+   // Read-Only Port B
+   //
+   always @(posedge clk)
+     //
+     bram_reg_b <= bram[b_addr];
+
+
+endmodule



More information about the Commits mailing list