[Cryptech-Commits] [core/math/modexps6] 03/03: reformat, add copyright

git at cryptech.is git at cryptech.is
Mon Nov 16 21:40:23 UTC 2015


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/math/modexps6.

commit eb3702ee1f2be0316ebac1883b479cff699470e4
Author: Paul Selkirk <paul at psgd.org>
Date:   Mon Nov 16 15:33:59 2015 -0500

    reformat, add copyright
---
 src/rtl/modexps6_adder64_carry32.v       |  166 ++--
 src/rtl/modexps6_buffer_core.v           |  420 ++++-----
 src/rtl/modexps6_buffer_user.v           |  382 ++++----
 src/rtl/modexps6_modinv32.v              |  256 +++---
 src/rtl/modexps6_montgomery_coeff.v      |  835 +++++++++---------
 src/rtl/modexps6_montgomery_multiplier.v |  799 ++++++++---------
 src/rtl/modexps6_top.v                   | 1402 +++++++++++++++---------------
 src/rtl/modexps6_wrapper.v               |  396 +++++----
 src/rtl/ram_1rw_1ro_readfirst.v          |  149 ++--
 9 files changed, 2483 insertions(+), 2322 deletions(-)

diff --git a/src/rtl/modexps6_adder64_carry32.v b/src/rtl/modexps6_adder64_carry32.v
index 87869d1..fb71b45 100644
--- a/src/rtl/modexps6_adder64_carry32.v
+++ b/src/rtl/modexps6_adder64_carry32.v
@@ -1,70 +1,96 @@
-`timescale 1ns / 1ps
-
-module modexps6_adder64_carry32
-	(
-		clk, t, x, y, s, c_in, c_out
-	);
-	
-	
-		//
-		// Ports
-		//
-	input		wire				clk;
-	input		wire	[31: 0]	t;
-	input		wire	[31: 0]	x;
-	input		wire	[31: 0]	y;
-	output	wire	[31: 0]	s;
-	input		wire	[31: 0]	c_in;
-	output	wire	[31: 0]	c_out;
-	
-	
-		//
-		// Multiplier
-		//
-	wire	[63: 0]	multiplier_out;
-	
-	multiplier_s6 dsp_multiplier
-	(
-		.clk	(clk),
-		.a		(x),
-		.b		(y),
-		.p		(multiplier_out)
-	);
-	
-	
-		//
-		// Carry and T
-		//
-	wire	[63: 0]	t_ext = {{32{1'b0}}, t};
-	wire	[63: 0]	c_ext = {{32{1'b0}}, c_in};
-	
-	
-		//
-		// Sum
-		//
-	wire	[63: 0]	sum = multiplier_out + c_in + t;
-		
-		
-		//
-		// Output
-		//
-	assign s = sum[31: 0];
-	assign c_out = sum[63:32];
-		
-	/*
-	reg	[31: 0]	s_reg;
-	reg	[31: 0]	c_out_reg;
-	
-	assign s = s_reg;
-	assign c_out = c_out_reg;
-	
-	always @(posedge clk) begin
-		//
-		s_reg			<= sum[31: 0];
-		c_out_reg	<= sum[63:32];
-		//
-	end
-	*/
-	
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_adder64_carry32
+  (
+   input wire          clk,
+   input wire [31: 0]  t,
+   input wire [31: 0]  x,
+   input wire [31: 0]  y,
+   output wire [31: 0] s,
+   input wire [31: 0]  c_in,
+   output wire [31: 0] c_out
+   );
+
+
+   //
+   // Multiplier
+   //
+   wire [63: 0]        multiplier_out;
+
+   multiplier_s6 dsp_multiplier
+     (
+      .clk             (clk),
+      .a               (x),
+      .b               (y),
+      .p               (multiplier_out)
+      );
+
+
+   //
+   // Carry and T
+   //
+   wire [63: 0]        t_ext = {{32{1'b0}}, t};
+   wire [63: 0]        c_ext = {{32{1'b0}}, c_in};
+
+
+   //
+   // Sum
+   //
+   wire [63: 0]        sum = multiplier_out + c_in + t;
+
+
+   //
+   // Output
+   //
+   assign s = sum[31: 0];
+   assign c_out = sum[63:32];
+
+   /*
+    reg [31: 0] s_reg;
+    reg [31: 0] c_out_reg;
+
+    assign s = s_reg;
+    assign c_out = c_out_reg;
+
+    always @(posedge clk) begin
+      //
+      s_reg            <= sum[31: 0];
+      c_out_reg        <= sum[63:32];
+      //
+    end
+    */
+
+
+endmodule
diff --git a/src/rtl/modexps6_buffer_core.v b/src/rtl/modexps6_buffer_core.v
index 86a6a4d..a468154 100644
--- a/src/rtl/modexps6_buffer_core.v
+++ b/src/rtl/modexps6_buffer_core.v
@@ -1,202 +1,218 @@
-`timescale 1ns / 1ps
-
-module modexps6_buffer_core
-	(
-		clk,
-		rw_coeff_bram_addr, rw_coeff_bram_wr, rw_coeff_bram_in, rw_coeff_bram_out, ro_coeff_bram_addr, ro_coeff_bram_out,
-		rw_mm_bram_addr,    rw_mm_bram_wr,    rw_mm_bram_in,    rw_mm_bram_out,    ro_mm_bram_addr,    ro_mm_bram_out,
-		rw_nn_bram_addr,    rw_nn_bram_wr,    rw_nn_bram_in,                       ro_nn_bram_addr,    ro_nn_bram_out,
-		rw_y_bram_addr,     rw_y_bram_wr,     rw_y_bram_in,     rw_y_bram_out,
-		rw_r_bram_addr,     rw_r_bram_wr,     rw_r_bram_in,     rw_r_bram_out,     ro_r_bram_addr,     ro_r_bram_out,
-		rw_t_bram_addr,     rw_t_bram_wr,     rw_t_bram_in,     rw_t_bram_out,     ro_t_bram_addr,     ro_t_bram_out
-	);
-
-		//
-		// Parameters
-		//
-	parameter OPERAND_ADDR_WIDTH	=  5;		// 1024 / 32 = 32 -> 5 bits
-	
-	
-		//
-		// Ports
-		//
-	input		wire									clk;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_coeff_bram_addr;
-	input		wire									rw_coeff_bram_wr;
-	input		wire	[                31:0]	rw_coeff_bram_in;
-	output	wire	[                31:0]	rw_coeff_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_mm_bram_addr;
-	input		wire									rw_mm_bram_wr;
-	input		wire	[                31:0]	rw_mm_bram_in;
-	output	wire	[                31:0]	rw_mm_bram_out;	
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_nn_bram_addr;
-	input		wire									rw_nn_bram_wr;
-	input		wire	[                31:0]	rw_nn_bram_in;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_y_bram_addr;
-	input		wire									rw_y_bram_wr;
-	input		wire	[                31:0]	rw_y_bram_in;
-	output	wire	[                31:0]	rw_y_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_r_bram_addr;
-	input		wire									rw_r_bram_wr;
-	input		wire	[                31:0]	rw_r_bram_in;
-	output	wire	[                31:0]	rw_r_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	rw_t_bram_addr;
-	input		wire									rw_t_bram_wr;
-	input		wire	[                31:0]	rw_t_bram_in;
-	output	wire	[                31:0]	rw_t_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	ro_coeff_bram_addr;
-	output	wire	[                31:0]	ro_coeff_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	ro_mm_bram_addr;
-	output	wire	[                31:0]	ro_mm_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	ro_nn_bram_addr;
-	output	wire	[                31:0]	ro_nn_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	ro_r_bram_addr;
-	output	wire	[                31:0]	ro_r_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH:0]	ro_t_bram_addr;
-	output	wire	[                31:0]	ro_t_bram_out;
-	
-	
-		//
-		// Montgomery Coefficient
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_coeff
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_coeff_bram_addr),
-		.a_wr		(rw_coeff_bram_wr),
-		.a_in		(rw_coeff_bram_in),
-		.a_out	(rw_coeff_bram_out),
-		
-		.b_addr	(ro_coeff_bram_addr),
-		.b_out	(ro_coeff_bram_out)
-	);
-	
-	
-		//
-		// Powers of Message
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_mm
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_mm_bram_addr),
-		.a_wr		(rw_mm_bram_wr),
-		.a_in		(rw_mm_bram_in),
-		.a_out	(rw_mm_bram_out),
-		
-		.b_addr	(ro_mm_bram_addr),
-		.b_out	(ro_mm_bram_out)
-	);
-	
-	
-		//
-		// Extended Modulus
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_nn
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_nn_bram_addr),
-		.a_wr		(rw_nn_bram_wr),
-		.a_in		(rw_nn_bram_in),
-		.a_out	(),
-		
-		.b_addr	(ro_nn_bram_addr),
-		.b_out	(ro_nn_bram_out)
-	);
-	
-	
-		//
-		// Output
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_y
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_y_bram_addr),
-		.a_wr		(rw_y_bram_wr),
-		.a_in		(rw_y_bram_in),
-		.a_out	(rw_y_bram_out),
-		
-		.b_addr	({(OPERAND_ADDR_WIDTH+1){1'b0}}),
-		.b_out	()
-	);
-		
-		
-		//
-		// Result of Multiplication
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_r
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_r_bram_addr),
-		.a_wr		(rw_r_bram_wr),
-		.a_in		(rw_r_bram_in),
-		.a_out	(rw_r_bram_out),
-		
-		.b_addr	(ro_r_bram_addr),
-		.b_out	(ro_r_bram_out)
-	);
-	
-	
-		//
-		// Temporary Buffer
-		//
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH+1)
-	)
-	mem_t
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_t_bram_addr),
-		.a_wr		(rw_t_bram_wr),
-		.a_in		(rw_t_bram_in),
-		.a_out	(rw_t_bram_out),
-		
-		.b_addr	(ro_t_bram_addr),
-		.b_out	(ro_t_bram_out)
-	);
-	
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_buffer_core
+  #(parameter OPERAND_ADDR_WIDTH      =  5)     // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                        clk,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr,
+    input wire                        rw_coeff_bram_wr,
+    input wire [31:0]                 rw_coeff_bram_in,
+    output wire [31:0]                rw_coeff_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr,
+    input wire                        rw_mm_bram_wr,
+    input wire [31:0]                 rw_mm_bram_in,
+    output wire [31:0]                rw_mm_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr,
+    input wire                        rw_nn_bram_wr,
+    input wire [31:0]                 rw_nn_bram_in,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr,
+    input wire                        rw_y_bram_wr,
+    input wire [31:0]                 rw_y_bram_in,
+    output wire [31:0]                rw_y_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr,
+    input wire                        rw_r_bram_wr,
+    input wire [31:0]                 rw_r_bram_in,
+    output wire [31:0]                rw_r_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr,
+    input wire                        rw_t_bram_wr,
+    input wire [31:0]                 rw_t_bram_in,
+    output wire [31:0]                rw_t_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr,
+    output wire [31:0]                ro_coeff_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr,
+    output wire [31:0]                ro_mm_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr,
+    output wire [31:0]                ro_nn_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr,
+    output wire [31:0]                ro_r_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr,
+    output wire [31:0]                ro_t_bram_out
+    );
+
+
+   //
+   // Montgomery Coefficient
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_coeff
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_coeff_bram_addr),
+      .a_wr             (rw_coeff_bram_wr),
+      .a_in             (rw_coeff_bram_in),
+      .a_out            (rw_coeff_bram_out),
+
+      .b_addr           (ro_coeff_bram_addr),
+      .b_out            (ro_coeff_bram_out)
+      );
+
+
+   //
+   // Powers of Message
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_mm
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_mm_bram_addr),
+      .a_wr             (rw_mm_bram_wr),
+      .a_in             (rw_mm_bram_in),
+      .a_out            (rw_mm_bram_out),
+
+      .b_addr           (ro_mm_bram_addr),
+      .b_out            (ro_mm_bram_out)
+      );
+
+
+   //
+   // Extended Modulus
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_nn
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_nn_bram_addr),
+      .a_wr             (rw_nn_bram_wr),
+      .a_in             (rw_nn_bram_in),
+      .a_out            (),
+
+      .b_addr           (ro_nn_bram_addr),
+      .b_out            (ro_nn_bram_out)
+      );
+
+
+   //
+   // Output
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_y
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_y_bram_addr),
+      .a_wr             (rw_y_bram_wr),
+      .a_in             (rw_y_bram_in),
+      .a_out            (rw_y_bram_out),
+
+      .b_addr           ({(OPERAND_ADDR_WIDTH+1){1'b0}}),
+      .b_out            ()
+      );
+
+
+   //
+   // Result of Multiplication
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_r
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_r_bram_addr),
+      .a_wr             (rw_r_bram_wr),
+      .a_in             (rw_r_bram_in),
+      .a_out            (rw_r_bram_out),
+
+      .b_addr           (ro_r_bram_addr),
+      .b_out            (ro_r_bram_out)
+      );
+
+
+   //
+   // Temporary Buffer
+   //
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH+1)
+      )
+   mem_t
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_t_bram_addr),
+      .a_wr             (rw_t_bram_wr),
+      .a_in             (rw_t_bram_in),
+      .a_out            (rw_t_bram_out),
+
+      .b_addr           (ro_t_bram_addr),
+      .b_out            (ro_t_bram_out)
+      );
+
+
+endmodule
diff --git a/src/rtl/modexps6_buffer_user.v b/src/rtl/modexps6_buffer_user.v
index 6072fc9..fde6105 100644
--- a/src/rtl/modexps6_buffer_user.v
+++ b/src/rtl/modexps6_buffer_user.v
@@ -1,185 +1,197 @@
-`timescale 1ns / 1ps
-
-module modexps6_buffer_user
-	(
-		clk,
-		
-		bus_cs, bus_we,
-		bus_addr, bus_data_wr, bus_data_rd,
-		
-		ro_modulus_bram_addr,  ro_modulus_bram_out,
-		ro_message_bram_addr,  ro_message_bram_out,
-		ro_exponent_bram_addr, ro_exponent_bram_out,
-		rw_result_bram_addr,
-		rw_result_bram_wr,     rw_result_bram_in
-	);
-	
-	
-		//
-		// Parameters
-		//
-	parameter OPERAND_ADDR_WIDTH	=  5;		// 1024 / 32 = 32 -> 5 bits
-	
-	
-		//
-		// Locals
-		//
-	localparam	ADDR_WIDTH_TOTAL	= OPERAND_ADDR_WIDTH + 2;
-	
-	localparam	[ 1: 0]	BUS_ADDR_BANK_MODULUS	= 2'b00;
-	localparam	[ 1: 0]	BUS_ADDR_BANK_MESSAGE	= 2'b01;
-	localparam	[ 1: 0]	BUS_ADDR_BANK_EXPONENT	= 2'b10;
-	localparam	[ 1: 0]	BUS_ADDR_BANK_RESULT		= 2'b11;
-	
-		//
-		// Ports
-		//
-	input		wire										clk;
-		
-	input		wire										bus_cs;
-	input		wire										bus_we;
-	input		wire	[  ADDR_WIDTH_TOTAL-1:0]	bus_addr;
-	input		wire	[                  31:0]	bus_data_wr;
-	output	wire	[                  31:0]	bus_data_rd;
-		
-	input		wire	[OPERAND_ADDR_WIDTH-1:0]	ro_modulus_bram_addr;
-	output	wire	[                  31:0]	ro_modulus_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH-1:0]	ro_message_bram_addr;
-	output	wire	[                  31:0]	ro_message_bram_out;
-		
-	input		wire	[OPERAND_ADDR_WIDTH-1:0]	ro_exponent_bram_addr;
-	output	wire	[                  31:0]	ro_exponent_bram_out;
-	
-	input		wire	[OPERAND_ADDR_WIDTH-1:0]	rw_result_bram_addr;
-	input		wire										rw_result_bram_wr;
-	input		wire	[                  31:0]	rw_result_bram_in;
-		
-		
-		//
-		// Address Decoder
-		//
-	wire	[OPERAND_ADDR_WIDTH-1:0]	bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
-	wire	[                   1:0]	bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2];
-	
-
-		//
-		// Modulus Memory
-		//
-	wire	[31: 0]	bus_data_rd_modulus;
-	
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH)
-	)
-	mem_modulus
-	(
-		.clk		(clk),
-		
-		.a_addr	(bus_addr_operand_word),
-		.a_wr		(bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)),
-		.a_in		(bus_data_wr),
-		.a_out	(bus_data_rd_modulus),
-		
-		.b_addr	(ro_modulus_bram_addr),
-		.b_out	(ro_modulus_bram_out)
-	);
-	
-	
-		//
-		// Message Memory
-		//
-	wire	[31: 0]	bus_data_rd_message;
-	
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH)
-	)
-	mem_message
-	(
-		.clk		(clk),
-		
-		.a_addr	(bus_addr_operand_word),
-		.a_wr		(bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)),
-		.a_in		(bus_data_wr),
-		.a_out	(bus_data_rd_message),
-		
-		.b_addr	(ro_message_bram_addr),
-		.b_out	(ro_message_bram_out)
-	);
-	
-	
-		//
-		// Exponent Memory
-		//
-	wire	[31: 0]	bus_data_rd_exponent;
-	
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH)
-	)
-	mem_exponent
-	(
-		.clk		(clk),
-		
-		.a_addr	(bus_addr_operand_word),
-		.a_wr		(bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)),
-		.a_in		(bus_data_wr),
-		.a_out	(bus_data_rd_exponent),
-		
-		.b_addr	(ro_exponent_bram_addr),
-		.b_out	(ro_exponent_bram_out)
-	);
-	
-	
-		//
-		// Result Memory
-		//
-	wire	[31: 0]	bus_data_rd_result;
-	
-	ram_1rw_1ro_readfirst #
-	(
-		.MEM_WIDTH		(32),
-		.MEM_ADDR_BITS	(OPERAND_ADDR_WIDTH)
-	)
-	mem_result
-	(
-		.clk		(clk),
-		
-		.a_addr	(rw_result_bram_addr),
-		.a_wr		(rw_result_bram_wr),
-		.a_in		(rw_result_bram_in),
-		.a_out	(),
-		
-		.b_addr	(bus_addr_operand_word),
-		.b_out	(bus_data_rd_result)
-	);
-	
-	
-		//
-		// Output Selector
-		//
-	reg	[ 1: 0]	bus_addr_operand_bank_prev;
-	always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank;
-	
-	reg	[31: 0]	bus_data_rd_mux;
-	assign bus_data_rd = bus_data_rd_mux;	
-	
-	always @(*)
-		//
-		case (bus_addr_operand_bank_prev)
-			//
-			BUS_ADDR_BANK_MODULUS:		bus_data_rd_mux = bus_data_rd_modulus;
-			BUS_ADDR_BANK_MESSAGE:		bus_data_rd_mux = bus_data_rd_message;
-			BUS_ADDR_BANK_EXPONENT:		bus_data_rd_mux = bus_data_rd_exponent;
-			BUS_ADDR_BANK_RESULT:		bus_data_rd_mux = bus_data_rd_result;
-			//
-			default:							bus_data_rd_mux = {32{1'bX}};
-			//
-		endcase
-
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_buffer_user
+  #(parameter OPERAND_ADDR_WIDTH        =  5)     // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                          clk,
+
+    input wire                          bus_cs,
+    input wire                          bus_we,
+    input wire [ADDR_WIDTH_TOTAL-1:0]   bus_addr,
+    input wire [31:0]                   bus_data_wr,
+    output wire [31:0]                  bus_data_rd,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr,
+    output wire [31:0]                  ro_modulus_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr,
+    output wire [31:0]                  ro_message_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr,
+    output wire [31:0]                  ro_exponent_bram_out,
+
+    input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr,
+    input wire                          rw_result_bram_wr,
+    input wire [31:0]                   rw_result_bram_in
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   ADDR_WIDTH_TOTAL                = OPERAND_ADDR_WIDTH + 2;
+
+   localparam   [1: 0]  BUS_ADDR_BANK_MODULUS   = 2'b00;
+   localparam   [1: 0]  BUS_ADDR_BANK_MESSAGE   = 2'b01;
+   localparam   [1: 0]  BUS_ADDR_BANK_EXPONENT  = 2'b10;
+   localparam   [1: 0]  BUS_ADDR_BANK_RESULT    = 2'b11;
+
+   //
+   // Address Decoder
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0]        bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
+   wire [                  1:0]         bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2];
+
+
+   //
+   // Modulus Memory
+   //
+   wire [31: 0]                         bus_data_rd_modulus;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_modulus
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_modulus),
+
+      .b_addr           (ro_modulus_bram_addr),
+      .b_out            (ro_modulus_bram_out)
+      );
+
+
+   //
+   // Message Memory
+   //
+   wire [31: 0]         bus_data_rd_message;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_message
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_message),
+
+      .b_addr           (ro_message_bram_addr),
+      .b_out            (ro_message_bram_out)
+      );
+
+
+   //
+   // Exponent Memory
+   //
+   wire [31: 0]         bus_data_rd_exponent;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_exponent
+     (
+      .clk              (clk),
+
+      .a_addr           (bus_addr_operand_word),
+      .a_wr             (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)),
+      .a_in             (bus_data_wr),
+      .a_out            (bus_data_rd_exponent),
+
+      .b_addr           (ro_exponent_bram_addr),
+      .b_out            (ro_exponent_bram_out)
+      );
+
+
+   //
+   // Result Memory
+   //
+   wire [31: 0]         bus_data_rd_result;
+
+   ram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH        (32),
+      .MEM_ADDR_BITS    (OPERAND_ADDR_WIDTH)
+      )
+   mem_result
+     (
+      .clk              (clk),
+
+      .a_addr           (rw_result_bram_addr),
+      .a_wr             (rw_result_bram_wr),
+      .a_in             (rw_result_bram_in),
+      .a_out            (),
+
+      .b_addr           (bus_addr_operand_word),
+      .b_out            (bus_data_rd_result)
+      );
+
+
+   //
+   // Output Selector
+   //
+   reg [1: 0]           bus_addr_operand_bank_prev;
+   always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank;
+
+   reg [31: 0]          bus_data_rd_mux;
+   assign bus_data_rd = bus_data_rd_mux;
+
+   always @(*)
+     //
+     case (bus_addr_operand_bank_prev)
+       //
+       BUS_ADDR_BANK_MODULUS:   bus_data_rd_mux = bus_data_rd_modulus;
+       BUS_ADDR_BANK_MESSAGE:   bus_data_rd_mux = bus_data_rd_message;
+       BUS_ADDR_BANK_EXPONENT:  bus_data_rd_mux = bus_data_rd_exponent;
+       BUS_ADDR_BANK_RESULT:    bus_data_rd_mux = bus_data_rd_result;
+       //
+       default:                 bus_data_rd_mux = {32{1'bX}};
+       //
+     endcase
+
+
+endmodule
diff --git a/src/rtl/modexps6_modinv32.v b/src/rtl/modexps6_modinv32.v
index dc08b7b..916a17e 100644
--- a/src/rtl/modexps6_modinv32.v
+++ b/src/rtl/modexps6_modinv32.v
@@ -1,116 +1,140 @@
-`timescale 1ns / 1ps
-
-module modexps6_modinv32
-	(
-		clk,
-		ena, rdy,
-		n0, n0_modinv
-	);
-	
-	
-		//
-		// Ports
-		//
-	input		wire				clk;
-	
-	input		wire				ena;
-	output	wire				rdy;
-	
-	input		wire	[31: 0]	n0;
-	output	wire	[31: 0]	n0_modinv;	
-
-
-		//
-		// Trigger
-		//
-	reg ena_dly = 1'b0;
-	wire ena_trig = ena && !ena_dly;
-	always @(posedge clk) ena_dly <= ena;
-
-
-		//
-		// Ready Register
-		//
-	reg rdy_reg = 1'b0;
-	assign rdy = rdy_reg;
-	
-	
-		//
-		// Counter
-		//
-	reg	[ 7: 0]	cnt		= 8'd0;
-	wire	[ 7: 0]	cnt_zero = 8'd0;
-	wire	[ 7: 0]	cnt_last	= 8'd132;
-	wire	[ 7: 0]	cnt_next	= cnt + 1'b1;
-	wire	[ 1: 0]	cnt_phase = cnt[1:0];
-	wire	[ 5: 0]	cnt_cycle = cnt[7:2];
-	
-	always @(posedge clk)
-		//
-		if (cnt == cnt_zero)	cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero;
-		else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next;
-		
-	
-		//
-		// Enable / Ready Logic
-		//
-	always @(posedge clk)
-		//
-		if (cnt == cnt_last) rdy_reg <= 1'b1;
-		else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0;
-		
-		
-		//
-		// Output Register
-		//
-	reg	[31: 0]	n0_modinv_reg;
-	assign n0_modinv = n0_modinv_reg;
-	
-	
-		//
-		// Multiplier
-		//
-	wire	[63: 0]	multiplier_out;
-	wire	[31: 0]	multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1};
-	
-	multiplier_s6 dsp_multiplier
-	(
-		.clk	(clk),
-		.a		(n0),
-		.b		(n0_modinv_reg),
-		.p		(multiplier_out)
-	);
-	
-	
-		//
-		// Mask and Power
-		//
-	reg	[30: 0]	mask_reg;
-	reg	[31: 0]	power_reg;
-	
-	always @(posedge clk)
-		//
-		if (cnt_phase == 2'd1) begin
-			//
-			if (cnt_cycle == 6'd0) begin
-				//
-				mask_reg			<= 31'd0;
-				power_reg		<= 32'd1;
-				//
-				n0_modinv_reg	<= 32'd0;
-				//
-			end else begin
-				//
-				mask_reg			<= { mask_reg[29:0], 1'b1};
-				power_reg		<= {power_reg[30:0], 1'b0};
-				//
-				if (multiplier_out_masked != 32'd1)
-					//
-					n0_modinv_reg <= n0_modinv_reg + power_reg;
-				//
-			end
-			//
-		end
-			
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_modinv32
+  (
+   input wire          clk,
+
+   input wire          ena,
+   output wire         rdy,
+
+   input wire [31: 0]  n0,
+   output wire [31: 0] n0_modinv
+   );
+
+
+   //
+   // Trigger
+   //
+   reg                 ena_dly = 1'b0;
+   wire                ena_trig = ena && !ena_dly;
+   always @(posedge clk) ena_dly <= ena;
+
+
+   //
+   // Ready Register
+   //
+   reg                 rdy_reg = 1'b0;
+   assign rdy = rdy_reg;
+
+
+   //
+   // Counter
+   //
+   reg [7: 0]          cnt = 8'd0;
+   wire [7: 0]         cnt_zero = 8'd0;
+   wire [7: 0]         cnt_last = 8'd132;
+   wire [7: 0]         cnt_next = cnt + 1'b1;
+   wire [1: 0]         cnt_phase = cnt[1:0];
+   wire [5: 0]         cnt_cycle = cnt[7:2];
+
+   always @(posedge clk)
+     //
+     if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero;
+     else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (cnt == cnt_last) rdy_reg <= 1'b1;
+     else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0;
+
+
+   //
+   // Output Register
+   //
+   reg [31: 0]         n0_modinv_reg;
+   assign n0_modinv = n0_modinv_reg;
+
+
+   //
+   // Multiplier
+   //
+   wire [63: 0]        multiplier_out;
+   wire [31: 0]        multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1};
+
+   multiplier_s6 dsp_multiplier
+     (
+      .clk              (clk),
+      .a                (n0),
+      .b                (n0_modinv_reg),
+      .p                (multiplier_out)
+      );
+
+
+   //
+   // Mask and Power
+   //
+   reg [30: 0]         mask_reg;
+   reg [31: 0]         power_reg;
+
+   always @(posedge clk)
+     //
+     if (cnt_phase == 2'd1) begin
+        //
+        if (cnt_cycle == 6'd0) begin
+           //
+           mask_reg <= 31'd0;
+           power_reg <= 32'd1;
+           //
+           n0_modinv_reg <= 32'd0;
+           //
+        end else begin
+           //
+           mask_reg <= { mask_reg[29:0], 1'b1};
+           power_reg <= {power_reg[30:0], 1'b0};
+           //
+           if (multiplier_out_masked != 32'd1)
+             //
+             n0_modinv_reg <= n0_modinv_reg + power_reg;
+           //
+        end
+        //
+     end
+
+
+endmodule
diff --git a/src/rtl/modexps6_montgomery_coeff.v b/src/rtl/modexps6_montgomery_coeff.v
index c3ceeee..fc0a365 100644
--- a/src/rtl/modexps6_montgomery_coeff.v
+++ b/src/rtl/modexps6_montgomery_coeff.v
@@ -1,410 +1,425 @@
-`timescale 1ns / 1ps
-
-module modexps6_montgomery_coeff
-	(
-		clk,
-		ena, rdy,
-		modulus_width,
-		coeff_bram_addr, coeff_bram_wr, coeff_bram_in, coeff_bram_out,
-		nn_bram_addr,    nn_bram_wr,    nn_bram_in,
-		modulus_bram_addr, modulus_bram_out,
-		modinv_n0, modinv_ena, modinv_rdy
-	);
-
-
-		//
-		// Parameters
-		//
-	parameter MODULUS_NUM_BITS		= 11;		// 1024 -> 11 bits
-	parameter OPERAND_ADDR_WIDTH	=  5;		// 1024 / 32 = 32 -> 5 bits
-	
-	
-		//
-		// Locals
-		//
-	localparam	[  MODULUS_NUM_BITS  :0]	round_count_zero			= {1'b0, {MODULUS_NUM_BITS{1'b0}}};
-	localparam	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr_zero		= {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
-	localparam	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr_zero	= {OPERAND_ADDR_WIDTH{1'b0}};
-	
-	
-		//
-		// Ports
-		//
-	input		wire										clk;
-	
-	input		wire										ena;
-	output	wire										rdy;
-	
-	input		wire	[  MODULUS_NUM_BITS-1:0]	modulus_width;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr;
-	output	wire										coeff_bram_wr;
-	output	wire	[                  31:0]	coeff_bram_in;
-	input		wire	[                  31:0]	coeff_bram_out;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	nn_bram_addr;
-	output	wire										nn_bram_wr;
-	output	wire	[                  31:0]	nn_bram_in;
-	
-	output	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr;
-	input		wire	[                  31:0]	modulus_bram_out;
-	
-	output	wire	[                  31:0]	modinv_n0;
-	output	wire										modinv_ena;
-	input		wire										modinv_rdy;
-	
-
-		//
-		// FSM
-		//
-	localparam FSM_STATE_IDLE					= 6'd0;
-	
-	localparam FSM_STATE_INIT					= 6'd10;
-	
-	localparam FSM_STATE_SHIFT_READ			= 6'd21;
-	localparam FSM_STATE_SHIFT_WRITE			= 6'd22;
-
-	localparam FSM_STATE_COMPARE_READ		= 6'd31;
-	localparam FSM_STATE_COMPARE_COMPARE	= 6'd32;
-	
-	localparam FSM_STATE_SUBTRACT_READ		= 6'd41;
-	localparam FSM_STATE_SUBTRACT_WRITE		= 6'd42;
-	
-	localparam FSM_STATE_ROUND					= 6'd50;
-	
-	localparam FSM_STATE_FINAL					= 6'd60;
-	
-	reg	[ 5: 0]	fsm_state = FSM_STATE_IDLE;
-	
-	
-		//
-		// Trigger
-		//
-	reg ena_dly = 1'b0;
-	
-	wire ena_trig = ena && !ena_dly;
-	
-	always @(posedge clk) ena_dly <= ena;
-	
-	
-		//
-		// Ready Register
-		//
-	reg rdy_reg = 1'b0;
-	
-	assign rdy = rdy_reg;
-	
-	
-		//
-		// ModInv Control
-		//
-	reg				modinv_ena_reg = 1'b0;
-	reg	[31: 0]	modinv_n0_reg;
-	
-	assign modinv_ena = modinv_ena_reg;
-	assign modinv_n0 = modinv_n0_reg;
-	
-	
-		//
-		// Enable / Ready Logic
-		//
-	always @(posedge clk)
-		//
-		if (fsm_state == FSM_STATE_FINAL) begin
-			//
-			if (modinv_rdy) rdy_reg <= 1'b1;
-			//
-		end else if (fsm_state == FSM_STATE_IDLE) begin
-			//
-			if (rdy_reg && !ena) rdy_reg <= 1'b0;
-			//
-		end
-		
-	
-		//
-		// Flags
-		//
-	reg	reg_shift_carry = 1'b0;
-	reg	reg_subtractor_borrow = 1'b0;
-	
-	
-		//
-		// Round Counter
-		//
-	reg	[MODULUS_NUM_BITS:0]	round_count			= round_count_zero;
-	wire	[MODULUS_NUM_BITS:0]	round_count_last	= {modulus_width, 1'b0} + 6'd63;
-	wire	[MODULUS_NUM_BITS:0]	round_count_next	= (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
-	
-
-		//
-		// Modulus BRAM Interface
-		//
-	reg	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr_reg = modulus_bram_addr_zero;
-	
-	assign modulus_bram_addr = modulus_bram_addr_reg;	
-
-	
-		//
-		// Coeff BRAM Interface
-		//
-	reg	[OPERAND_ADDR_WIDTH:0]	coeff_bram_addr_reg	= coeff_bram_addr_zero;
-	reg									coeff_bram_wr_reg		= 1'b0;
-	
-	assign coeff_bram_addr = coeff_bram_addr_reg;
-	assign coeff_bram_wr = coeff_bram_wr_reg;
-	
-	
-		//
-		// NN BRAM Interface
-		//
-	reg	[OPERAND_ADDR_WIDTH:0]	nn_bram_addr_reg	= coeff_bram_addr_zero;
-	reg									nn_bram_wr_reg		= 1'b0;
-	
-	assign nn_bram_addr = nn_bram_addr_reg;
-	assign nn_bram_wr = nn_bram_wr_reg;	
-
-
-		//
-		// Hardware Subtractor
-		//
-	wire	[31: 0]	subtractor_out;
-	wire				subtractor_out_nonzero = |subtractor_out;
-	wire				subtractor_borrow_out;
-	wire				subtractor_borrow_in;
-	
-	assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow;
-	
-	subtractor_s6 dsp_subtractor
-	(
-		.a			(coeff_bram_out),
-		.b			(modulus_bram_out),
-		.s			(subtractor_out),
-		.c_in		(subtractor_borrow_in),
-		.c_out	(subtractor_borrow_out)
-	);
-	
-	
-		//
-		// Handy Wires
-		//
-	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_width_msb						= modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
-
-	wire	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr_last					= {modulus_width_msb, 1'b0};
-	wire	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr_next_or_zero		= (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero;
-	wire	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr_next_or_last		= (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last;
-	wire	[OPERAND_ADDR_WIDTH  :0]	coeff_bram_addr_prev_or_zero		= (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero;
-	
-	wire	[OPERAND_ADDR_WIDTH  :0]	modulus_bram_addr_last_ext			= coeff_bram_addr_last - 1'b1;
-	
-	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr_last				= modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
-	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr_next_or_zero	= (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero;
-	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_bram_addr_prev_or_zero	= (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero;
-
-	
-		//
-		// Coeff BRAM Input Logic
-		//
-	reg	[31: 0]	coeff_bram_in_mux;
-	
-	assign coeff_bram_in = coeff_bram_in_mux;
-	
-	always @(*)
-		//
-		case (fsm_state)
-
-			FSM_STATE_INIT:
-				//
-				if (coeff_bram_addr_reg == coeff_bram_addr_zero)	coeff_bram_in_mux = 32'h00000001;
-				else																coeff_bram_in_mux = 32'h00000000;
-			
-			FSM_STATE_SHIFT_WRITE:
-				//
-				coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry};
-			
-			FSM_STATE_SUBTRACT_WRITE:
-				//
-				if (coeff_bram_addr_reg == coeff_bram_addr_last)	coeff_bram_in_mux = 32'h00000000;
-				else																coeff_bram_in_mux = subtractor_out;
-			
-			default:
-				//
-				coeff_bram_in_mux	= {32{1'bX}};
-
-		endcase
-		
-		
-		//
-		// NN BRAM Input Logic
-		//
-	reg	[31: 0]	nn_bram_in_mux;
-	
-	assign nn_bram_in = nn_bram_in_mux;
-	
-	always @(*)
-		//
-		case (fsm_state)
-
-			FSM_STATE_INIT:
-				//
-				if (coeff_bram_addr_reg == coeff_bram_addr_last)	nn_bram_in_mux = {32{1'b0}};
-				else																nn_bram_in_mux = modulus_bram_out;
-
-			default:
-				//
-				nn_bram_in_mux	= {32{1'bX}};
-
-		endcase		
-
-
-		//
-		// Comparison Functions
-		//
-	reg	compare_greater_or_equal;
-	reg	compare_less_than;
-
-	wire compare_done = compare_greater_or_equal | compare_less_than;
-
-	always @(*)
-		//
-		if (coeff_bram_addr_reg == coeff_bram_addr_last)			compare_greater_or_equal = coeff_bram_out[0];
-		//
-		else if (coeff_bram_addr_reg == coeff_bram_addr_zero)		compare_greater_or_equal = !subtractor_borrow_out;
-		//
-		else																		compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero;		
-	
-	always @(*)
-		//
-		if (coeff_bram_addr_reg == coeff_bram_addr_last)			compare_less_than = 1'b0;
-		//
-		else																		compare_less_than = subtractor_borrow_out;
-
-	
-	
-		//
-		// Main Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-			
-			FSM_STATE_INIT: begin
-				//
-				coeff_bram_wr_reg		<= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
-				coeff_bram_addr_reg	<= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
-				//
-				nn_bram_wr_reg			<= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
-				nn_bram_addr_reg		<= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
-				//
-				if (!coeff_bram_wr_reg) begin
-					//
-					modinv_ena_reg		<= 1'b1;
-					modinv_n0_reg		<= modulus_bram_out;
-					//
-				end
-				//
-				if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin
-					//
-					if (!coeff_bram_wr_reg)
-						//
-						modulus_bram_addr_reg	<= modulus_bram_addr_next_or_zero;
-					//
-				end else begin
-					//
-					modulus_bram_addr_reg	<= modulus_bram_addr_next_or_zero;
-					//
-				end
-				//
-			end
-			
-			FSM_STATE_SHIFT_READ: begin
-				//
-				coeff_bram_wr_reg <= 1'b1;
-				//
-				if (coeff_bram_addr_reg == coeff_bram_addr_zero)
-					//
-					reg_shift_carry <= 1'b0;
-				//
-			end
-			
-			FSM_STATE_SHIFT_WRITE: begin
-				//
-				coeff_bram_wr_reg			<= 1'b0;
-				coeff_bram_addr_reg		<= coeff_bram_addr_next_or_last;
-				//
-				reg_shift_carry			<= coeff_bram_out[31];
-				//
-			end
-			
-			FSM_STATE_COMPARE_COMPARE: begin
-				//
-				coeff_bram_addr_reg		<= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero;
-				//
-				modulus_bram_addr_reg	<= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero);
-				//
-			end
-			
-			FSM_STATE_SUBTRACT_READ: begin
-				//
-				coeff_bram_wr_reg <= 1'b1;
-				//
-				if (coeff_bram_addr_reg == coeff_bram_addr_zero)
-					//
-					reg_subtractor_borrow <= 1'b0;
-				//
-			end
-			
-			FSM_STATE_SUBTRACT_WRITE: begin
-				//
-				coeff_bram_wr_reg			<= 1'b0;
-				coeff_bram_addr_reg		<= coeff_bram_addr_next_or_zero;
-				//
-				modulus_bram_addr_reg	<= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero;
-				//
-				reg_subtractor_borrow	<= subtractor_borrow_out;
-				//
-			end			
-			
-			FSM_STATE_ROUND: begin
-				//
-				round_count <= round_count_next;
-				//
-			end
-			
-			FSM_STATE_FINAL: begin
-				//
-				if (modinv_rdy) modinv_ena_reg <= 1'b0;
-				//
-			end
-			
-		endcase
-		
-		
-		//
-		// FSM Transition Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-			
-			FSM_STATE_IDLE:					fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
-			
-			FSM_STATE_SHIFT_READ:			fsm_state <= FSM_STATE_SHIFT_WRITE;
-			FSM_STATE_COMPARE_READ:			fsm_state <= FSM_STATE_COMPARE_COMPARE;
-			FSM_STATE_SUBTRACT_READ:		fsm_state <= FSM_STATE_SUBTRACT_WRITE;
-
-			FSM_STATE_INIT:					fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT          : FSM_STATE_SHIFT_READ;
-			FSM_STATE_SHIFT_WRITE:			fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ    : FSM_STATE_COMPARE_READ;
-			FSM_STATE_SUBTRACT_WRITE:		fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND;
-			
-			FSM_STATE_ROUND:					fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL;
-			
-			FSM_STATE_COMPARE_COMPARE:		fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ;
-
-			FSM_STATE_FINAL:					fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL;
-			
-			default:								fsm_state <= FSM_STATE_IDLE;
-			
-		endcase
-		
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_montgomery_coeff
+  #(parameter MODULUS_NUM_BITS          = 11,           // 1024 -> 11 bits
+    parameter OPERAND_ADDR_WIDTH        =  5)           // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                           clk,
+
+    input wire                           ena,
+    output wire                          rdy,
+
+    input wire [MODULUS_NUM_BITS-1:0]    modulus_width,
+
+    output wire [OPERAND_ADDR_WIDTH :0]  coeff_bram_addr,
+    output wire                          coeff_bram_wr,
+    output wire [31:0]                   coeff_bram_in,
+    input wire [31:0]                    coeff_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0]  nn_bram_addr,
+    output wire                          nn_bram_wr,
+    output wire [31:0]                   nn_bram_in,
+
+    output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr,
+    input wire [31:0]                    modulus_bram_out,
+
+    output wire [31:0]                   modinv_n0,
+    output wire                          modinv_ena,
+    input wire                           modinv_rdy
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   [  MODULUS_NUM_BITS  :0] round_count_zero       = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_zero   = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT            = 6'd10;
+
+   localparam FSM_STATE_SHIFT_READ      = 6'd21;
+   localparam FSM_STATE_SHIFT_WRITE     = 6'd22;
+
+   localparam FSM_STATE_COMPARE_READ    = 6'd31;
+   localparam FSM_STATE_COMPARE_COMPARE = 6'd32;
+
+   localparam FSM_STATE_SUBTRACT_READ   = 6'd41;
+   localparam FSM_STATE_SUBTRACT_WRITE  = 6'd42;
+
+   localparam FSM_STATE_ROUND           = 6'd50;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                            fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                   ena_dly = 1'b0;
+
+   wire                                  ena_trig = ena && !ena_dly;
+
+   always @(posedge clk) ena_dly <= ena;
+
+
+   //
+   // Ready Register
+   //
+   reg                                   rdy_reg = 1'b0;
+
+   assign rdy = rdy_reg;
+
+
+   //
+   // ModInv Control
+   //
+   reg                                   modinv_ena_reg = 1'b0;
+   reg [31: 0]                           modinv_n0_reg;
+
+   assign modinv_ena = modinv_ena_reg;
+   assign modinv_n0 = modinv_n0_reg;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        if (modinv_rdy) rdy_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (rdy_reg && !ena) rdy_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // Flags
+   //
+   reg  reg_shift_carry = 1'b0;
+   reg  reg_subtractor_borrow = 1'b0;
+
+
+   //
+   // Round Counter
+   //
+   reg [MODULUS_NUM_BITS:0] round_count         = round_count_zero;
+   wire [MODULUS_NUM_BITS:0] round_count_last   = {modulus_width, 1'b0} + 6'd63;
+   wire [MODULUS_NUM_BITS:0] round_count_next   = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Modulus BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero;
+
+   assign modulus_bram_addr = modulus_bram_addr_reg;
+
+
+   //
+   // Coeff BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   coeff_bram_addr_reg     = coeff_bram_addr_zero;
+   reg                          coeff_bram_wr_reg               = 1'b0;
+
+   assign coeff_bram_addr = coeff_bram_addr_reg;
+   assign coeff_bram_wr = coeff_bram_wr_reg;
+
+
+   //
+   // NN BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   nn_bram_addr_reg        = coeff_bram_addr_zero;
+   reg                          nn_bram_wr_reg          = 1'b0;
+
+   assign nn_bram_addr = nn_bram_addr_reg;
+   assign nn_bram_wr = nn_bram_wr_reg;
+
+
+   //
+   // Hardware Subtractor
+   //
+   wire [31: 0]                 subtractor_out;
+   wire                         subtractor_out_nonzero = |subtractor_out;
+   wire                         subtractor_borrow_out;
+   wire                         subtractor_borrow_in;
+
+   assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow;
+
+   subtractor_s6 dsp_subtractor
+     (
+      .a                (coeff_bram_out),
+      .b                (modulus_bram_out),
+      .s                (subtractor_out),
+      .c_in             (subtractor_borrow_in),
+      .c_out            (subtractor_borrow_out)
+      );
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb                                              = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_last           = {modulus_width_msb, 1'b0};
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_next_or_zero   = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero;
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_next_or_last   = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last;
+   wire [OPERAND_ADDR_WIDTH  :0] coeff_bram_addr_prev_or_zero   = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero;
+
+   wire [OPERAND_ADDR_WIDTH  :0] modulus_bram_addr_last_ext     = coeff_bram_addr_last - 1'b1;
+
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last         = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero;
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero;
+
+
+   //
+   // Coeff BRAM Input Logic
+   //
+   reg [31: 0]                   coeff_bram_in_mux;
+
+   assign coeff_bram_in = coeff_bram_in_mux;
+
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001;
+         else coeff_bram_in_mux = 32'h00000000;
+
+       FSM_STATE_SHIFT_WRITE:
+         //
+         coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry};
+
+       FSM_STATE_SUBTRACT_WRITE:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000;
+         else coeff_bram_in_mux = subtractor_out;
+
+       default:
+         //
+         coeff_bram_in_mux = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // NN BRAM Input Logic
+   //
+   reg [31: 0]                   nn_bram_in_mux;
+
+   assign nn_bram_in = nn_bram_in_mux;
+
+   always @(*)
+                                  //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}};
+         else nn_bram_in_mux = modulus_bram_out;
+
+         default:
+           //
+           nn_bram_in_mux       = {32{1'bX}};
+
+      endcase
+
+
+   //
+   // Comparison Functions
+   //
+   reg                           compare_greater_or_equal;
+   reg                           compare_less_than;
+
+   wire                          compare_done = compare_greater_or_equal | compare_less_than;
+
+   always @(*)
+                                  //
+     if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0];
+   //
+     else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out;
+   //
+     else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero;
+
+   always @(*)
+                              //
+     if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0;
+   //
+     else compare_less_than = subtractor_borrow_out;
+
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT: begin
+          //
+          coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+          //
+          nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0;
+          nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero;
+          //
+          if (!coeff_bram_wr_reg) begin
+             //
+             modinv_ena_reg <= 1'b1;
+             modinv_n0_reg <= modulus_bram_out;
+             //
+          end
+          //
+          if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin
+             //
+             if (!coeff_bram_wr_reg)
+               //
+               modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+             //
+          end else begin
+             //
+             modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero;
+             //
+          end
+          //
+       end
+
+       FSM_STATE_SHIFT_READ: begin
+          //
+          coeff_bram_wr_reg <= 1'b1;
+          //
+          if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+            //
+            reg_shift_carry <= 1'b0;
+          //
+       end
+
+       FSM_STATE_SHIFT_WRITE: begin
+          //
+          coeff_bram_wr_reg <= 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_addr_next_or_last;
+          //
+          reg_shift_carry <= coeff_bram_out[31];
+          //
+       end
+
+       FSM_STATE_COMPARE_COMPARE: begin
+          //
+          coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero;
+          //
+          modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero);
+          //
+       end
+
+       FSM_STATE_SUBTRACT_READ: begin
+          //
+          coeff_bram_wr_reg <= 1'b1;
+          //
+          if (coeff_bram_addr_reg == coeff_bram_addr_zero)
+            //
+            reg_subtractor_borrow <= 1'b0;
+          //
+       end
+
+       FSM_STATE_SUBTRACT_WRITE: begin
+          //
+          coeff_bram_wr_reg <= 1'b0;
+          coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero;
+          //
+          modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero;
+          //
+          reg_subtractor_borrow <= subtractor_borrow_out;
+          //
+       end
+
+       FSM_STATE_ROUND: begin
+          //
+          round_count <= round_count_next;
+          //
+       end
+
+       FSM_STATE_FINAL: begin
+          //
+          if (modinv_rdy) modinv_ena_reg <= 1'b0;
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_IDLE:          fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+       FSM_STATE_SHIFT_READ:    fsm_state <= FSM_STATE_SHIFT_WRITE;
+       FSM_STATE_COMPARE_READ:  fsm_state <= FSM_STATE_COMPARE_COMPARE;
+       FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE;
+
+       FSM_STATE_INIT:          fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT          : FSM_STATE_SHIFT_READ;
+       FSM_STATE_SHIFT_WRITE:   fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ    : FSM_STATE_COMPARE_READ;
+       FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND;
+
+       FSM_STATE_ROUND:         fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL;
+
+       FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ;
+
+       FSM_STATE_FINAL:         fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL;
+
+       default:                 fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/src/rtl/modexps6_montgomery_multiplier.v b/src/rtl/modexps6_montgomery_multiplier.v
index f22f93d..14f32f8 100644
--- a/src/rtl/modexps6_montgomery_multiplier.v
+++ b/src/rtl/modexps6_montgomery_multiplier.v
@@ -1,392 +1,407 @@
-`timescale 1ns / 1ps
-
-module modexps6_montgomery_multiplier
-	(
-		clk,
-		ena, rdy,
-		operand_width,
-		x_bram_addr, x_bram_out,
-		y_bram_addr, y_bram_out,
-		n_bram_addr, n_bram_out,
-		z_bram_addr, z_bram_wr, z_bram_in, z_bram_out,
-		n0_modinv
-	);
-	
-		//
-		// Parameters
-		//
-	parameter OPERAND_NUM_BITS		= 11;		// 1024 -> 11 bits
-	parameter OPERAND_ADDR_WIDTH	=  5;		// 1024 / 32 = 32 -> 5 bits		
-	
-	
-		//
-		// Locals
-		//
-	localparam	[OPERAND_ADDR_WIDTH:0]	round_count_zero	= {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
-	localparam	[OPERAND_ADDR_WIDTH:0]	bram_addr_zero		= {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};	
-	
-	
-		//
-		// Ports
-		//
-	input		wire										clk;
-	
-	input		wire										ena;
-	output	wire										rdy;
-	
-	input		wire	[  OPERAND_NUM_BITS-1:0]	operand_width;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	x_bram_addr;
-	input		wire	[                  31:0]	x_bram_out;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	y_bram_addr;
-	input		wire	[                  31:0]	y_bram_out;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	n_bram_addr;
-	input		wire	[                  31:0]	n_bram_out;
-	
-	output	wire	[OPERAND_ADDR_WIDTH  :0]	z_bram_addr;
-	output	wire										z_bram_wr;
-	output	wire	[                  31:0]	z_bram_in;
-	input		wire	[                  31:0]	z_bram_out;
-	
-	input		wire	[                  31:0]	n0_modinv;
-	
-	
-		//
-		// FSM
-		//
-	localparam FSM_STATE_IDLE					= 6'd0;
-	
-	localparam FSM_STATE_INIT					= 6'd10;
-	
-	localparam FSM_STATE_MUL_XY_CALC			= 6'd21;
-	localparam FSM_STATE_MUL_XY_PIPELINE	= 6'd22;
-	localparam FSM_STATE_MUL_XY_REGISTER	= 6'd23;
-	localparam FSM_STATE_MUL_XY_WRITE		= 6'd24;
-	
-	localparam FSM_STATE_MAGIC_CALC			= 6'd31;
-	localparam FSM_STATE_MAGIC_PIPELINE		= 6'd32;
-	localparam FSM_STATE_MAGIC_REGISTER		= 6'd33;
-	
-	localparam FSM_STATE_MUL_MN_CALC			= 6'd41;
-	localparam FSM_STATE_MUL_MN_PIPELINE	= 6'd42;
-	localparam FSM_STATE_MUL_MN_REGISTER	= 6'd43;
-	localparam FSM_STATE_MUL_MN_WRITE		= 6'd44;
-	
-	localparam FSM_STATE_SHIFT					= 6'd50;
-	
-	localparam FSM_STATE_ROUND					= 6'd55;
-	
-	localparam FSM_STATE_FINAL					= 6'd60;
-	
-	reg	[ 5: 0]	fsm_state = FSM_STATE_IDLE;
-	
-	
-		//
-		// Trigger
-		//
-	reg ena_dly = 1'b0;
-	always @(posedge clk) ena_dly <= ena;
-	wire ena_trig = (ena == 1'b1) && (ena_dly == 1'b0);
-
-	
-		//
-		// Ready Register
-		//
-	reg rdy_reg = 1'b0;
-	assign rdy = rdy_reg;
-	
-	
-		//
-		// Enable / Ready Logic
-		//
-	always @(posedge clk)
-		//
-		if (fsm_state == FSM_STATE_FINAL) begin
-			//
-			rdy_reg <= 1'b1;
-			//
-		end else if (fsm_state == FSM_STATE_IDLE) begin
-			//
-			if (rdy_reg && !ena) rdy_reg <= 1'b0;
-			//
-		end
-		
-		
-		//
-		// X, Y, N BRAM Interface
-		//
-	reg	[OPERAND_ADDR_WIDTH:0]	x_bram_addr_reg = bram_addr_zero;
-	reg	[OPERAND_ADDR_WIDTH:0]	y_bram_addr_reg = bram_addr_zero;
-	reg	[OPERAND_ADDR_WIDTH:0]	n_bram_addr_reg = bram_addr_zero;
-	
-	assign x_bram_addr = x_bram_addr_reg;
-	assign y_bram_addr = y_bram_addr_reg;
-	assign n_bram_addr = n_bram_addr_reg;
-	
-	
-		//
-		// Z BRAM Interface
-		//
-	reg	[OPERAND_ADDR_WIDTH:0]	z_bram_addr_reg	= bram_addr_zero;
-	reg									z_bram_wr_reg		= 1'b0;
-	reg	[                 31:0]	z_bram_in_mux;
-	
-	assign z_bram_addr = z_bram_addr_reg;
-	assign z_bram_wr = z_bram_wr_reg;
-	assign z_bram_in = z_bram_in_mux;
-	
-			
-		//
-		// Handy Wires
-		//
-	wire	[OPERAND_ADDR_WIDTH-1:0]	operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH];
-	
-	wire	[OPERAND_ADDR_WIDTH  :0]	bram_addr_last				= {operand_width_msb, 1'b1};	// +1
-		
-
-		//
-		// Hardware Multiplier (X * Y)
-		//
-	reg	[31: 0]	multiplier_xy_carry_in;
-	wire	[31: 0]	multiplier_xy_out;
-	wire	[31: 0]	multiplier_xy_carry_out;
-	
-	modexps6_adder64_carry32 dsp_multiplier_xy
-	(
-		.clk		(clk),
-		.t			(/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/),
-		.x			(/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/),
-		.y			(/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/),
-		.s			(multiplier_xy_out),
-		.c_in		(multiplier_xy_carry_in),
-		.c_out	(multiplier_xy_carry_out)
-	);
-	
-	
-		//
-		// Hardware Multiplier (Magic)
-		//
-	wire	[63: 0]	multiplier_magic_out;
-	reg	[31: 0]	magic_value_reg;
-	
-	multiplier_s6 dsp_multiplier_magic
-	(
-		.clk	(clk),
-		.a		(z_bram_out),
-		.b		(n0_modinv),
-		.p		(multiplier_magic_out)
-	);
-	
-	
-		//
-		// Hardware Multiplier (M * N)
-		//
-	reg	[31: 0]	multiplier_mn_carry_in;
-	wire	[31: 0]	multiplier_mn_out;
-	wire	[31: 0]	multiplier_mn_carry_out;
-	
-	modexps6_adder64_carry32 dsp_multiplier_mn
-	(
-		.clk		(clk),
-		.t			(z_bram_out),
-		.x			(magic_value_reg),
-		.y			(/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/),
-		.s			(multiplier_mn_out),
-		.c_in		(multiplier_mn_carry_in),
-		.c_out	(multiplier_mn_carry_out)
-	);
-	
-	
-		//
-		// Z BRAM Input Selector
-		//
-	always @(*)
-		//
-		case (fsm_state)
-		
-			FSM_STATE_INIT:
-				//
-				z_bram_in_mux	= {32{1'b0}};
-				
-			FSM_STATE_MUL_XY_WRITE:
-				//
-				if (z_bram_addr_reg < bram_addr_last)	z_bram_in_mux	= multiplier_xy_out;
-				else												z_bram_in_mux	= multiplier_xy_carry_in;
-				
-			FSM_STATE_MUL_MN_WRITE:
-				//
-				if (z_bram_addr_reg < bram_addr_last)	z_bram_in_mux	= multiplier_mn_out;
-				else												z_bram_in_mux	= multiplier_mn_carry_in + z_bram_out;
-				
-			FSM_STATE_SHIFT:
-				//
-				z_bram_in_mux	= z_bram_out;
-		
-			default:
-				//
-				z_bram_in_mux	= {32{1'bX}};
-				
-		endcase
-
-	
-		//
-		// Handy Functions
-		//
-	function	[OPERAND_ADDR_WIDTH:0]	bram_addr_next_or_zero;
-		input	[OPERAND_ADDR_WIDTH:0]	bram_addr;
-		begin
-			bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero;
-		end
-	endfunction
-	
-	function	[OPERAND_ADDR_WIDTH:0]	bram_addr_next_or_last;
-		input	[OPERAND_ADDR_WIDTH:0]	bram_addr;
-		begin
-			bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last;
-		end
-	endfunction
-	
-	function	[OPERAND_ADDR_WIDTH:0]	bram_addr_prev_or_zero;
-		input	[OPERAND_ADDR_WIDTH:0]	bram_addr;
-		begin
-			bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero;
-		end
-	endfunction
-	
-	
-		//
-		// Round Counter
-		//
-	reg	[OPERAND_ADDR_WIDTH:0]	round_count			= round_count_zero;
-	wire	[OPERAND_ADDR_WIDTH:0]	round_count_last	= {operand_width_msb, 1'b0};
-	wire	[OPERAND_ADDR_WIDTH:0]	round_count_next	= (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
-	
-	
-		//
-		// Main Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-
-			FSM_STATE_INIT: begin
-				//
-				z_bram_wr_reg		<= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0;
-				z_bram_addr_reg	<= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero;
-				//
-			end
-			
-			FSM_STATE_MUL_XY_CALC: begin
-				//
-				if (z_bram_addr_reg == bram_addr_zero) begin
-					//
-					multiplier_xy_carry_in <= {32{1'b0}};
-					//
-				end
-				//
-			end
-			
-			FSM_STATE_MUL_XY_REGISTER: begin
-				//
-				z_bram_wr_reg <= 1'b1;
-				//
-			end
-			
-			FSM_STATE_MUL_XY_WRITE: begin
-				//
-				z_bram_wr_reg			<= 1'b0;
-				z_bram_addr_reg		<= bram_addr_next_or_zero(z_bram_addr_reg);
-				//
-				x_bram_addr_reg		<= bram_addr_next_or_zero(x_bram_addr_reg);
-				//
-				multiplier_xy_carry_in	<= multiplier_xy_carry_out;
-				//
-			end
-			
-			FSM_STATE_MUL_MN_CALC: begin
-				//
-				if (z_bram_addr_reg == bram_addr_zero) begin
-					//
-					multiplier_mn_carry_in <= {32{1'b0}};
-					//
-					magic_value_reg <= multiplier_magic_out[31:0];
-					//
-				end
-				//
-			end
-			
-			FSM_STATE_MUL_MN_REGISTER: begin
-				//
-				z_bram_wr_reg <= 1'b1;
-				//
-			end
-			
-			FSM_STATE_MUL_MN_WRITE: begin
-				//
-				z_bram_wr_reg			<= 1'b0;
-				z_bram_addr_reg		<= bram_addr_next_or_last(z_bram_addr_reg);
-				//
-				n_bram_addr_reg		<= bram_addr_next_or_zero(n_bram_addr_reg);
-				//
-				multiplier_mn_carry_in	<= multiplier_mn_carry_out;
-				//
-			end
-			
-			FSM_STATE_SHIFT: begin
-				//
-				if (z_bram_wr_reg == 1'b0)							z_bram_wr_reg <= 1'b1;
-				else if (z_bram_addr_reg == bram_addr_zero)	z_bram_wr_reg <= 1'b0;
-				
-				z_bram_addr_reg	<= bram_addr_prev_or_zero(z_bram_addr_reg);
-				//
-			end
-			
-			FSM_STATE_ROUND: begin
-				//
-				y_bram_addr_reg	<= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero;
-				//
-				round_count <= round_count_next;
-				//
-			end
-
-		endcase
-	
-	
-		//
-		// FSM Transition Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-			//
-			FSM_STATE_IDLE:					fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
-			
-			FSM_STATE_INIT:					fsm_state <= (z_bram_addr < bram_addr_last  ) ? FSM_STATE_INIT        : FSM_STATE_MUL_XY_CALC;
-			FSM_STATE_ROUND:					fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL;
-
-			FSM_STATE_MUL_XY_CALC:			fsm_state <= FSM_STATE_MUL_XY_PIPELINE;
-			FSM_STATE_MAGIC_CALC:			fsm_state <= FSM_STATE_MAGIC_PIPELINE;
-			FSM_STATE_MUL_MN_CALC:			fsm_state <= FSM_STATE_MUL_MN_PIPELINE;
-			
-			FSM_STATE_MUL_XY_PIPELINE:		fsm_state <= FSM_STATE_MUL_XY_REGISTER;
-			FSM_STATE_MAGIC_PIPELINE:		fsm_state <= FSM_STATE_MAGIC_REGISTER;
-			FSM_STATE_MUL_MN_PIPELINE:		fsm_state <= FSM_STATE_MUL_MN_REGISTER;
-			
-			FSM_STATE_MUL_XY_REGISTER:		fsm_state <= FSM_STATE_MUL_XY_WRITE;
-			FSM_STATE_MAGIC_REGISTER:		fsm_state <= FSM_STATE_MUL_MN_CALC;
-			FSM_STATE_MUL_MN_REGISTER:		fsm_state <= FSM_STATE_MUL_MN_WRITE;
-			
-			FSM_STATE_MUL_XY_WRITE:			fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC;
-			FSM_STATE_MUL_MN_WRITE:			fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT;
-			FSM_STATE_SHIFT:					fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT       : FSM_STATE_ROUND;
-			
-			FSM_STATE_FINAL:					fsm_state <= FSM_STATE_IDLE;
-			
-			default:								fsm_state <= FSM_STATE_IDLE;
-
-		endcase
-
-
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_montgomery_multiplier
+  #(parameter OPERAND_NUM_BITS          = 11,           // 1024 -> 11 bits
+    parameter OPERAND_ADDR_WIDTH        =  5)           // 1024 / 32 = 32 -> 5 bits
+   (
+    input wire                          clk,
+
+    input wire                          ena,
+    output wire                         rdy,
+
+    input wire [OPERAND_NUM_BITS-1:0]   operand_width,
+
+    output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr,
+    input wire [31:0]                   x_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr,
+    input wire [31:0]                   y_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr,
+    input wire [31:0]                   n_bram_out,
+
+    output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr,
+    output wire                         z_bram_wr,
+    output wire [31:0]                  z_bram_in,
+    input wire [31:0]                   z_bram_out,
+
+    input wire [31:0]                   n0_modinv
+    );
+
+
+   //
+   // Locals
+   //
+   localparam   [OPERAND_ADDR_WIDTH:0]  round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+   localparam   [OPERAND_ADDR_WIDTH:0]  bram_addr_zero   = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT            = 6'd10;
+
+   localparam FSM_STATE_MUL_XY_CALC     = 6'd21;
+   localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22;
+   localparam FSM_STATE_MUL_XY_REGISTER = 6'd23;
+   localparam FSM_STATE_MUL_XY_WRITE    = 6'd24;
+
+   localparam FSM_STATE_MAGIC_CALC      = 6'd31;
+   localparam FSM_STATE_MAGIC_PIPELINE  = 6'd32;
+   localparam FSM_STATE_MAGIC_REGISTER  = 6'd33;
+
+   localparam FSM_STATE_MUL_MN_CALC     = 6'd41;
+   localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42;
+   localparam FSM_STATE_MUL_MN_REGISTER = 6'd43;
+   localparam FSM_STATE_MUL_MN_WRITE    = 6'd44;
+
+   localparam FSM_STATE_SHIFT           = 6'd50;
+
+   localparam FSM_STATE_ROUND           = 6'd55;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                           fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                  ena_dly = 1'b0;
+   always @(posedge clk) ena_dly <= ena;
+   wire                                 ena_trig = (ena == 1'b1) && (ena_dly == 1'b0);
+
+
+   //
+   // Ready Register
+   //
+   reg                                  rdy_reg = 1'b0;
+   assign rdy = rdy_reg;
+
+
+   //
+   // Enable / Ready Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        rdy_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (rdy_reg && !ena) rdy_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // X, Y, N BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   x_bram_addr_reg = bram_addr_zero;
+   reg [OPERAND_ADDR_WIDTH:0]   y_bram_addr_reg = bram_addr_zero;
+   reg [OPERAND_ADDR_WIDTH:0]   n_bram_addr_reg = bram_addr_zero;
+
+   assign x_bram_addr = x_bram_addr_reg;
+   assign y_bram_addr = y_bram_addr_reg;
+   assign n_bram_addr = n_bram_addr_reg;
+
+
+   //
+   // Z BRAM Interface
+   //
+   reg [OPERAND_ADDR_WIDTH:0]   z_bram_addr_reg = bram_addr_zero;
+   reg                          z_bram_wr_reg           = 1'b0;
+   reg [                31:0]   z_bram_in_mux;
+
+   assign z_bram_addr = z_bram_addr_reg;
+   assign z_bram_wr = z_bram_wr_reg;
+   assign z_bram_in = z_bram_in_mux;
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_addr_last = {operand_width_msb, 1'b1};    // +1
+
+
+   //
+   // Hardware Multiplier (X * Y)
+   //
+   reg [31: 0]                   multiplier_xy_carry_in;
+   wire [31: 0]                  multiplier_xy_out;
+   wire [31: 0]                  multiplier_xy_carry_out;
+
+   modexps6_adder64_carry32 dsp_multiplier_xy
+     (
+      .clk      (clk),
+      .t        (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/),
+      .x        (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/),
+      .y        (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/),
+      .s        (multiplier_xy_out),
+      .c_in     (multiplier_xy_carry_in),
+      .c_out    (multiplier_xy_carry_out)
+      );
+
+
+   //
+   // Hardware Multiplier (Magic)
+   //
+   wire [63: 0]                  multiplier_magic_out;
+   reg [31: 0]                   magic_value_reg;
+
+   multiplier_s6 dsp_multiplier_magic
+     (
+      .clk      (clk),
+      .a        (z_bram_out),
+      .b        (n0_modinv),
+      .p        (multiplier_magic_out)
+      );
+
+
+   //
+   // Hardware Multiplier (M * N)
+   //
+   reg [31: 0]                   multiplier_mn_carry_in;
+   wire [31: 0]                  multiplier_mn_out;
+   wire [31: 0]                  multiplier_mn_carry_out;
+
+   modexps6_adder64_carry32 dsp_multiplier_mn
+     (
+      .clk      (clk),
+      .t        (z_bram_out),
+      .x        (magic_value_reg),
+      .y        (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/),
+      .s        (multiplier_mn_out),
+      .c_in     (multiplier_mn_carry_in),
+      .c_out    (multiplier_mn_carry_out)
+      );
+
+
+   //
+   // Z BRAM Input Selector
+   //
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT:
+         //
+         z_bram_in_mux  = {32{1'b0}};
+
+       FSM_STATE_MUL_XY_WRITE:
+         //
+         if (z_bram_addr_reg < bram_addr_last)  z_bram_in_mux   = multiplier_xy_out;
+         else                                                                                           z_bram_in_mux   = multiplier_xy_carry_in;
+
+       FSM_STATE_MUL_MN_WRITE:
+         //
+         if (z_bram_addr_reg < bram_addr_last)  z_bram_in_mux   = multiplier_mn_out;
+         else                                                                                           z_bram_in_mux   = multiplier_mn_carry_in + z_bram_out;
+
+       FSM_STATE_SHIFT:
+         //
+         z_bram_in_mux  = z_bram_out;
+
+       default:
+         //
+         z_bram_in_mux  = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Handy Functions
+   //
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_next_or_zero;
+      input [OPERAND_ADDR_WIDTH:0] bram_addr;
+      begin
+         bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_next_or_last;
+      input     [OPERAND_ADDR_WIDTH:0]  bram_addr;
+      begin
+         bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH:0]  bram_addr_prev_or_zero;
+      input     [OPERAND_ADDR_WIDTH:0]  bram_addr;
+      begin
+         bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero;
+      end
+   endfunction
+
+
+   //
+   // Round Counter
+   //
+   reg  [OPERAND_ADDR_WIDTH:0]  round_count                     = round_count_zero;
+   wire [OPERAND_ADDR_WIDTH:0]  round_count_last        = {operand_width_msb, 1'b0};
+   wire [OPERAND_ADDR_WIDTH:0]  round_count_next        = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT: begin
+          //
+          z_bram_wr_reg         <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0;
+          z_bram_addr_reg       <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero;
+          //
+       end
+
+       FSM_STATE_MUL_XY_CALC: begin
+          //
+          if (z_bram_addr_reg == bram_addr_zero) begin
+             //
+             multiplier_xy_carry_in <= {32{1'b0}};
+          //
+       end
+          //
+       end
+
+       FSM_STATE_MUL_XY_REGISTER: begin
+          //
+          z_bram_wr_reg <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MUL_XY_WRITE: begin
+          //
+          z_bram_wr_reg         <= 1'b0;
+          z_bram_addr_reg       <= bram_addr_next_or_zero(z_bram_addr_reg);
+          //
+          x_bram_addr_reg       <= bram_addr_next_or_zero(x_bram_addr_reg);
+          //
+          multiplier_xy_carry_in <= multiplier_xy_carry_out;
+          //
+       end
+
+       FSM_STATE_MUL_MN_CALC: begin
+          //
+          if (z_bram_addr_reg == bram_addr_zero) begin
+             //
+             multiplier_mn_carry_in <= {32{1'b0}};
+          //
+          magic_value_reg <= multiplier_magic_out[31:0];
+          //
+       end
+          //
+       end
+
+       FSM_STATE_MUL_MN_REGISTER: begin
+          //
+          z_bram_wr_reg <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MUL_MN_WRITE: begin
+          //
+          z_bram_wr_reg         <= 1'b0;
+          z_bram_addr_reg       <= bram_addr_next_or_last(z_bram_addr_reg);
+          //
+          n_bram_addr_reg       <= bram_addr_next_or_zero(n_bram_addr_reg);
+          //
+          multiplier_mn_carry_in <= multiplier_mn_carry_out;
+          //
+       end
+
+       FSM_STATE_SHIFT: begin
+          //
+          if (z_bram_wr_reg == 1'b0)                                                    z_bram_wr_reg <= 1'b1;
+          else if (z_bram_addr_reg == bram_addr_zero)   z_bram_wr_reg <= 1'b0;
+
+          z_bram_addr_reg       <= bram_addr_prev_or_zero(z_bram_addr_reg);
+          //
+       end
+
+       FSM_STATE_ROUND: begin
+          //
+          y_bram_addr_reg       <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero;
+          //
+          round_count <= round_count_next;
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+       //
+       FSM_STATE_IDLE:            fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE;
+
+       FSM_STATE_INIT:            fsm_state <= (z_bram_addr < bram_addr_last  ) ? FSM_STATE_INIT        : FSM_STATE_MUL_XY_CALC;
+       FSM_STATE_ROUND:           fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL;
+
+       FSM_STATE_MUL_XY_CALC:     fsm_state <= FSM_STATE_MUL_XY_PIPELINE;
+       FSM_STATE_MAGIC_CALC:      fsm_state <= FSM_STATE_MAGIC_PIPELINE;
+       FSM_STATE_MUL_MN_CALC:     fsm_state <= FSM_STATE_MUL_MN_PIPELINE;
+
+       FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER;
+       FSM_STATE_MAGIC_PIPELINE:  fsm_state <= FSM_STATE_MAGIC_REGISTER;
+       FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER;
+
+       FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE;
+       FSM_STATE_MAGIC_REGISTER:  fsm_state <= FSM_STATE_MUL_MN_CALC;
+       FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE;
+
+       FSM_STATE_MUL_XY_WRITE:    fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC;
+       FSM_STATE_MUL_MN_WRITE:    fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT;
+       FSM_STATE_SHIFT:           fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT       : FSM_STATE_ROUND;
+
+       FSM_STATE_FINAL:           fsm_state <= FSM_STATE_IDLE;
+
+       default:                   fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/src/rtl/modexps6_top.v b/src/rtl/modexps6_top.v
index 29845f8..d3c65a5 100644
--- a/src/rtl/modexps6_top.v
+++ b/src/rtl/modexps6_top.v
@@ -1,696 +1,706 @@
-`timescale 1ns / 1ps
-
-module modexps6_top
-	(
-		clk,
-		
-		init, ready,
-		next, valid,
-		
-		modulus_width,
-		exponent_width,
-		
-		fast_public_mode,
-		
-		bus_cs, bus_we,
-		bus_addr, bus_data_wr, bus_data_rd
-	);
-	
-	
-		//
-		// Parameters
-		//
-	parameter	MAX_MODULUS_WIDTH		= 1024;
-	
-	
-		//
-		// modexps6_clog2()
-		//
-	function	integer modexps6_clog2;
-		input	integer value;
-		integer ret;
-		begin
-			value = value - 1;
-			for (ret = 0; value > 0; ret = ret + 1)
-				value = value >> 1;
-			modexps6_clog2 = ret;
-		end
-	endfunction
-
-
-		//
-		// Locals
-		//
-	localparam	OPERAND_ADDR_WIDTH	= modexps6_clog2(MAX_MODULUS_WIDTH / 32);
-	localparam	MODULUS_NUM_BITS		= modexps6_clog2(MAX_MODULUS_WIDTH + 1);
-	localparam	ADDR_WIDTH_TOTAL		= OPERAND_ADDR_WIDTH + 2;
-	
-	localparam	[OPERAND_ADDR_WIDTH-1:0]	bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
-	localparam	[OPERAND_ADDR_WIDTH  :0]	bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
-	
-	localparam	[    MODULUS_NUM_BITS:0]	round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
-		
-		
-		//
-		// Ports
-		//
-	input		wire									clk;
-	
-	input		wire									init;
-	output	wire									ready;
-	
-	input		wire									next;
-	output	wire									valid;
-	
-	input		wire	[MODULUS_NUM_BITS-1:0]	modulus_width;
-	input		wire	[MODULUS_NUM_BITS-1:0]	exponent_width;
-
-	input		wire									fast_public_mode;
-	
-	input		wire									bus_cs;
-	input		wire									bus_we;
-	input		wire	[ADDR_WIDTH_TOTAL-1:0]	bus_addr;
-	input		wire	[                31:0]	bus_data_wr;
-	output	wire	[                31:0]	bus_data_rd;
-	
-		
-		//
-		// User Memory
-		//
-	wire	[OPERAND_ADDR_WIDTH-1:0]	ro_modulus_bram_addr;
-	wire	[                  31:0]	ro_modulus_bram_out;
-	
-	reg	[OPERAND_ADDR_WIDTH-1:0]	ro_message_bram_addr		= bram_user_addr_zero;
-	wire	[                  31:0]	ro_message_bram_out;
-		
-	reg	[OPERAND_ADDR_WIDTH-1:0]	ro_exponent_bram_addr	= bram_user_addr_zero;
-	wire	[                  31:0]	ro_exponent_bram_out;
-	
-	reg	[OPERAND_ADDR_WIDTH-1:0]	rw_result_bram_addr 		= bram_user_addr_zero;
-	wire	[                  31:0]	rw_result_bram_out;
-	reg										rw_result_bram_wr			= 1'b0;
-	wire	[                  31:0]	rw_result_bram_in;
-	
-	modexps6_buffer_user #
-	(
-		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH)
-	)
-	mem_user
-	(
-		.clk							(clk),
-		
-		.bus_cs						(bus_cs),
-		.bus_we						(bus_we),
-		.bus_addr					(bus_addr),
-		.bus_data_wr				(bus_data_wr),
-		.bus_data_rd				(bus_data_rd),
-		
-		.ro_modulus_bram_addr	(ro_modulus_bram_addr),
-		.ro_modulus_bram_out		(ro_modulus_bram_out),
-		
-		.ro_message_bram_addr	(ro_message_bram_addr),
-		.ro_message_bram_out		(ro_message_bram_out),
-		
-		.ro_exponent_bram_addr	(ro_exponent_bram_addr),
-		.ro_exponent_bram_out	(ro_exponent_bram_out),
-		
-		.rw_result_bram_addr		(rw_result_bram_addr),
-		.rw_result_bram_wr		(rw_result_bram_wr),
-		.rw_result_bram_in		(rw_result_bram_in)
-	);
-	
-	
-		//
-		// Core (Internal) Memory
-		//
-	wire	[OPERAND_ADDR_WIDTH:0]	rw_coeff_bram_addr;
-	wire									rw_coeff_bram_wr;
-	wire	[                31:0]	rw_coeff_bram_in;
-	wire	[                31:0]	rw_coeff_bram_out;
-	
-	reg	[OPERAND_ADDR_WIDTH:0]	rw_mm_bram_addr		= bram_core_addr_zero;
-	reg									rw_mm_bram_wr			= 1'b0;
-	reg	[                31:0]	rw_mm_bram_in;
-	wire	[                31:0]	rw_mm_bram_out;
-	
-	wire	[OPERAND_ADDR_WIDTH:0]	rw_nn_bram_addr;
-	wire									rw_nn_bram_wr;
-	wire	[                31:0]	rw_nn_bram_in;
-	
-	reg	[OPERAND_ADDR_WIDTH:0]	rw_y_bram_addr			= bram_core_addr_zero;
-	reg									rw_y_bram_wr			= 1'b0;
-	reg	[               31:0]	rw_y_bram_in;
-	wire	[               31:0]	rw_y_bram_out;
-	
-	wire	[OPERAND_ADDR_WIDTH:0]	rw_r_bram_addr;
-	wire									rw_r_bram_wr;
-	wire	[               31:0]	rw_r_bram_in;
-	wire	[               31:0]	rw_r_bram_out;
-
-	reg	[OPERAND_ADDR_WIDTH:0]	rw_t_bram_addr			= bram_core_addr_zero;
-	reg									rw_t_bram_wr			= 1'b0;
-	reg	[               31:0]	rw_t_bram_in;
-	wire	[               31:0]	rw_t_bram_out;
-	
-	reg	[OPERAND_ADDR_WIDTH:0]	ro_coeff_bram_addr	= bram_core_addr_zero;
-	wire	[                31:0]	ro_coeff_bram_out;
-	
-	wire	[OPERAND_ADDR_WIDTH:0]	ro_mm_bram_addr;
-	wire	[                31:0]	ro_mm_bram_out;
-	
-	wire	[OPERAND_ADDR_WIDTH:0]	ro_nn_bram_addr;
-	wire	[                31:0]	ro_nn_bram_out;		
-	
-	reg	[OPERAND_ADDR_WIDTH:0]	ro_r_bram_addr			= bram_core_addr_zero;
-	wire	[                31:0]	ro_r_bram_out;
-
-	wire	[OPERAND_ADDR_WIDTH:0]	ro_t_bram_addr;
-	wire	[               31:0]	ro_t_bram_out;
-	
-	modexps6_buffer_core #
-	(
-		.OPERAND_ADDR_WIDTH	(OPERAND_ADDR_WIDTH)
-	)
-	mem_core
-	(
-		.clk						(clk),
-		
-		.rw_coeff_bram_addr	(rw_coeff_bram_addr),
-		.rw_coeff_bram_wr		(rw_coeff_bram_wr),
-		.rw_coeff_bram_in		(rw_coeff_bram_in),
-		.rw_coeff_bram_out	(rw_coeff_bram_out),
-		
-		.rw_mm_bram_addr		(rw_mm_bram_addr),
-		.rw_mm_bram_wr			(rw_mm_bram_wr),
-		.rw_mm_bram_in			(rw_mm_bram_in),
-		.rw_mm_bram_out		(rw_mm_bram_out),
-		
-		.rw_nn_bram_addr		(rw_nn_bram_addr),
-		.rw_nn_bram_wr			(rw_nn_bram_wr),
-		.rw_nn_bram_in			(rw_nn_bram_in),
-
-		.rw_y_bram_addr		(rw_y_bram_addr),
-		.rw_y_bram_wr			(rw_y_bram_wr),
-		.rw_y_bram_in			(rw_y_bram_in),
-		.rw_y_bram_out			(rw_y_bram_out),
-		
-		.rw_r_bram_addr		(rw_r_bram_addr),
-		.rw_r_bram_wr			(rw_r_bram_wr),
-		.rw_r_bram_in			(rw_r_bram_in),
-		.rw_r_bram_out			(rw_r_bram_out),
-		
-		.rw_t_bram_addr		(rw_t_bram_addr),
-		.rw_t_bram_wr			(rw_t_bram_wr),
-		.rw_t_bram_in			(rw_t_bram_in),
-		.rw_t_bram_out			(rw_t_bram_out),
-
-		.ro_coeff_bram_addr	(ro_coeff_bram_addr),
-		.ro_coeff_bram_out	(ro_coeff_bram_out),		
-		
-		.ro_mm_bram_addr		(ro_mm_bram_addr),
-		.ro_mm_bram_out		(ro_mm_bram_out),
-		
-		.ro_nn_bram_addr		(ro_nn_bram_addr),
-		.ro_nn_bram_out		(ro_nn_bram_out),
-		
-		.ro_r_bram_addr		(ro_r_bram_addr),
-		.ro_r_bram_out			(ro_r_bram_out),
-		
-		.ro_t_bram_addr		(ro_t_bram_addr),
-		.ro_t_bram_out			(ro_t_bram_out)
-	);
-	
-	
-		//
-		// Small 32-bit ModInv Core
-		//
-	wire				modinv_ena;
-	wire				modinv_rdy;
-
-	wire	[31: 0]	modinv_n0;
-	wire	[31: 0]	modinv_n0_negative = ~modinv_n0 + 1'b1;
-	wire	[31: 0]	modinv_n0_modinv;
-	
-	modexps6_modinv32 core_modinv32
-	(
-		.clk			(clk),
-		
-		.ena			(modinv_ena),
-		.rdy			(modinv_rdy),
-		
-		.n0			(modinv_n0_negative),
-		.n0_modinv	(modinv_n0_modinv)
-	);
-	
-	
-		//
-		// Montgomery Coefficient Calculator
-		//
-	modexps6_montgomery_coeff #
-	(
-		.MODULUS_NUM_BITS		(MODULUS_NUM_BITS),
-		.OPERAND_ADDR_WIDTH	(OPERAND_ADDR_WIDTH)
-	)
-	core_montgomery_coeff
-	(
-		.clk						(clk),
-		
-		.ena						(init),
-		.rdy						(ready),
-		
-		.modulus_width			(modulus_width),
-		
-		.coeff_bram_addr		(rw_coeff_bram_addr),
-		.coeff_bram_wr			(rw_coeff_bram_wr),
-		.coeff_bram_in			(rw_coeff_bram_in),
-		.coeff_bram_out		(rw_coeff_bram_out),
-		
-		.nn_bram_addr			(rw_nn_bram_addr),
-		.nn_bram_wr				(rw_nn_bram_wr),
-		.nn_bram_in				(rw_nn_bram_in),
-	
-		.modulus_bram_addr	(ro_modulus_bram_addr),
-		.modulus_bram_out		(ro_modulus_bram_out),
-		
-		.modinv_n0				(modinv_n0),
-		.modinv_ena				(modinv_ena),
-		.modinv_rdy				(modinv_rdy)
-	);
-	
-	
-		//
-		// Montgomery Multiplier
-		//
-	reg	mul_ena = 1'b0;
-	wire	mul_rdy;
-	
-	modexps6_montgomery_multiplier #
-	(
-		.OPERAND_NUM_BITS		(MODULUS_NUM_BITS),
-		.OPERAND_ADDR_WIDTH	(OPERAND_ADDR_WIDTH)
-	)
-	core_montgomery_multiplier
-	(
-		.clk					(clk),
-		
-		.ena					(mul_ena),
-		.rdy					(mul_rdy),
-		
-		.operand_width		(modulus_width),
-		
-		.x_bram_addr		(ro_t_bram_addr),
-		.x_bram_out			(ro_t_bram_out),
-		
-		.y_bram_addr		(ro_mm_bram_addr),
-		.y_bram_out			(ro_mm_bram_out),
-		
-		.n_bram_addr		(ro_nn_bram_addr),
-		.n_bram_out			(ro_nn_bram_out),
-		
-		.z_bram_addr		(rw_r_bram_addr),
-		.z_bram_wr			(rw_r_bram_wr),
-		.z_bram_in			(rw_r_bram_in),
-		.z_bram_out			(rw_r_bram_out),
-		
-		.n0_modinv			(modinv_n0_modinv)
-	);
-
-
-		//
-		// FSM
-		//
-	localparam FSM_STATE_IDLE					= 6'd0;
-	
-	localparam FSM_STATE_INIT_LOAD			= 6'd11;
-	localparam FSM_STATE_INIT_WAIT			= 6'd12;
-	localparam FSM_STATE_INIT_UNLOAD			= 6'd13;
-	
-	localparam FSM_STATE_READ_EI				= 6'd20;
-	
-	localparam FSM_STATE_ROUND_BEGIN			= 6'd25;
-	
-	localparam FSM_STATE_MULTIPLY_LOAD		= 6'd31;
-	localparam FSM_STATE_MULTIPLY_WAIT		= 6'd32;
-	localparam FSM_STATE_MULTIPLY_UNLOAD	= 6'd33;
-
-	localparam FSM_STATE_SQUARE_LOAD			= 6'd41;
-	localparam FSM_STATE_SQUARE_WAIT			= 6'd42;
-	localparam FSM_STATE_SQUARE_UNLOAD		= 6'd43;
-	
-	localparam FSM_STATE_ROUND_END			= 6'd50;
-	
-	localparam FSM_STATE_FINAL					= 6'd60;
-	
-	reg	[ 5: 0]	fsm_state = FSM_STATE_IDLE;
-	
-	
-		//
-		// Trigger
-		//
-	reg next_dly = 1'b0;
-	always @(posedge clk) next_dly <= next;
-	wire next_trig = (next == 1'b1) && (next_dly == 1'b0);
-	
-	
-		//
-		// Valid Register
-		//
-	reg valid_reg = 1'b0;
-	assign valid = valid_reg;
-	
-	
-		//
-		// Next/ Valid Logic
-		//
-	always @(posedge clk)
-		//
-		if (fsm_state == FSM_STATE_FINAL) begin
-			//
-			valid_reg <= 1'b1;
-			//
-		end else if (fsm_state == FSM_STATE_IDLE) begin
-			//
-			if (valid_reg && !next) valid_reg <= 1'b0;
-			//
-		end
-		
-		
-		//
-		// Exponent Bit Counter
-		//
-	reg	[ 4: 0]	ei_bit_count = 5'd0;
-	wire				ei_bit = ro_exponent_bram_out[ei_bit_count];
-		
-		
-		//
-		// Round Counter
-		//
-	reg	[MODULUS_NUM_BITS:0]	round_count			= round_count_zero;
-	wire	[MODULUS_NUM_BITS:0]	round_count_last	= exponent_width - 1'b1;
-	wire	[MODULUS_NUM_BITS:0]	round_count_next	= (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
-	
-	
-		//
-		// Handy Wires
-		//
-	wire	[OPERAND_ADDR_WIDTH-1:0]	modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
-	
-	wire	[OPERAND_ADDR_WIDTH  :0]	bram_core_addr_last	= {modulus_width_msb, 1'b0};
-	
-	wire	[OPERAND_ADDR_WIDTH  :0]	bram_user_addr_last_ext		= bram_core_addr_last - 1'b1;
-	wire	[OPERAND_ADDR_WIDTH-1:0]	bram_user_addr_last			= bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
-		
-		
-		//
-		// Handy Functions
-		//
-	function	[OPERAND_ADDR_WIDTH:0]	bram_core_addr_next_or_zero;
-		input	[OPERAND_ADDR_WIDTH:0]	bram_core_addr;
-		begin
-			bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero;
-		end
-	endfunction
-	
-	function	[OPERAND_ADDR_WIDTH-1:0]	bram_user_addr_next_or_zero;
-		input	[OPERAND_ADDR_WIDTH-1:0]	bram_user_addr;
-		begin
-			bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero;
-		end
-	endfunction
-	
-	
-		//
-		// Result BRAM Input
-		//
-	assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out;
-	
-	
-		//
-		// MM BRAM Input Selector
-		//
-	always @(*)
-		//
-		case (fsm_state)
-			
-			FSM_STATE_INIT_LOAD:
-				//
-				rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}};
-			
-			FSM_STATE_INIT_UNLOAD:
-				//
-				rw_mm_bram_in = ro_r_bram_out;
-				
-			FSM_STATE_SQUARE_UNLOAD:
-				//
-				rw_mm_bram_in = ro_r_bram_out;
-			
-			default:
-				//
-				rw_mm_bram_in	= {32{1'bX}};
-			
-		endcase
-		
-		
-		//
-		// Y BRAM Input Selector
-		//
-	always @(*)
-		//
-		case (fsm_state)
-		
-			FSM_STATE_INIT_LOAD:
-				//
-				rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000;
-				
-			FSM_STATE_MULTIPLY_UNLOAD:
-				//
-				rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW!
-		
-			default:
-				//
-				rw_y_bram_in	= {32{1'bX}};
-				
-		endcase
-		
-		
-		//
-		// T BRAM Input Selector
-		//
-	always @(*)
-		//
-		case (fsm_state)
-		
-			FSM_STATE_INIT_LOAD:
-				//
-				rw_t_bram_in = ro_coeff_bram_out;
-				
-			FSM_STATE_MULTIPLY_LOAD:
-				//
-				rw_t_bram_in = rw_y_bram_out;
-				
-			FSM_STATE_SQUARE_LOAD:
-				//
-				rw_t_bram_in = rw_mm_bram_out;
-		
-			default:
-				//
-				rw_t_bram_in	= {32{1'bX}};
-				
-		endcase
-	
-	
-		//
-		// Main Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-			
-			FSM_STATE_INIT_LOAD: begin
-				//
-				rw_mm_bram_wr		<= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				rw_y_bram_wr		<= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				rw_t_bram_wr		<= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_mm_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
-				rw_y_bram_addr		<= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
-				rw_t_bram_addr		<= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
-				//
-				if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr	<= bram_core_addr_next_or_zero(ro_coeff_bram_addr);
-				else ro_coeff_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr);
-				//
-				if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr	<= bram_user_addr_next_or_zero(ro_message_bram_addr);
-				else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr);
-				//
-			end
-			
-			FSM_STATE_INIT_WAIT: begin
-				//
-				if (mul_ena)	mul_ena	<= mul_rdy ? 1'b0 : 1'b1;
-				else				mul_ena	<= 1'b1;
-				//
-			end
-			
-			FSM_STATE_INIT_UNLOAD: begin
-				//
-				rw_mm_bram_wr		<= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_mm_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
-				//
-				if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr	<= bram_core_addr_next_or_zero(ro_r_bram_addr);
-				else ro_r_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
-				//
-			end
-			
-			FSM_STATE_MULTIPLY_LOAD: begin
-				//
-				rw_t_bram_wr		<= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_t_bram_addr		<= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
-				//
-				if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr	<= bram_core_addr_next_or_zero(rw_y_bram_addr);
-				else rw_y_bram_addr	<= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr);				
-				//
-			end
-			
-			FSM_STATE_MULTIPLY_WAIT: begin
-				//
-				if (mul_ena)	mul_ena	<= mul_rdy ? 1'b0 : 1'b1;
-				else				mul_ena	<= 1'b1;				
-				//
-			end
-			
-			FSM_STATE_MULTIPLY_UNLOAD: begin
-				//
-				rw_y_bram_wr		<= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_y_bram_addr		<= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero;
-				//
-				if (ei_bit) begin
-					//
-					if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr	<= bram_core_addr_next_or_zero(ro_r_bram_addr);
-					else ro_r_bram_addr	<= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);				
-					//
-				end else begin
-					//
-					if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr	<= bram_core_addr_next_or_zero(rw_t_bram_addr);
-					else rw_t_bram_addr	<= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr);				
-					//
-				end
-				//
-				if (round_count == round_count_last) begin
-					//
-					if (rw_result_bram_addr == bram_user_addr_zero) begin
-						//
-						if (rw_y_bram_wr) begin
-							//
-							rw_result_bram_wr		<= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1;
-							rw_result_bram_addr	<= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr);
-							//
-						end else begin
-							//
-							rw_result_bram_wr		<= 1'b1;
-							rw_result_bram_addr	<= bram_user_addr_zero;
-							//
-						end
-						//
-					end else begin
-						//
-						rw_result_bram_wr		<= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0;
-						rw_result_bram_addr	<= bram_user_addr_next_or_zero(rw_result_bram_addr);
-						//
-					end
-					//
-				end
-				//
-			end
-			
-			FSM_STATE_SQUARE_LOAD: begin
-				//
-				rw_t_bram_wr		<= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_t_bram_addr		<= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
-				//
-				if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr	<= bram_core_addr_next_or_zero(rw_mm_bram_addr);
-				else rw_mm_bram_addr	<= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr);								
-				//
-			end
-			
-			FSM_STATE_SQUARE_WAIT: begin
-				//
-				if (mul_ena)	mul_ena	<= mul_rdy ? 1'b0 : 1'b1;
-				else				mul_ena	<= 1'b1;								
-				//
-			end
-			
-			FSM_STATE_SQUARE_UNLOAD: begin
-				//
-				rw_mm_bram_wr		<= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
-				//
-				rw_mm_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
-				//
-				if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr	<= bram_core_addr_next_or_zero(ro_r_bram_addr);
-				else ro_r_bram_addr	<= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);				
-				//
-			end
-			
-			FSM_STATE_ROUND_END: begin
-				//
-				round_count <= round_count_next;
-				//
-				if (round_count < round_count_last) begin
-					//
-					ei_bit_count <= ei_bit_count + 1'b1;
-					//
-					if (ei_bit_count == 5'd31)
-						//
-						ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr);
-					//
-				end else begin
-					//
-					ei_bit_count <= 5'd0;
-					//
-					ro_exponent_bram_addr <= bram_user_addr_zero;
-					//
-				end
-				//
-			end
-			
-		endcase
-		
-	
-		//
-		// FSM Transition Logic
-		//
-	always @(posedge clk)
-		//
-		case (fsm_state)
-			
-			FSM_STATE_IDLE:					fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE;
-			
-			FSM_STATE_INIT_LOAD:				fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT;
-			FSM_STATE_INIT_WAIT:				fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT;
-			FSM_STATE_INIT_UNLOAD:			fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI;
-			
-			FSM_STATE_READ_EI:				fsm_state <= FSM_STATE_ROUND_BEGIN;
-			
-			FSM_STATE_ROUND_BEGIN:			fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD;
-			
-			FSM_STATE_MULTIPLY_LOAD:		fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT;
-			FSM_STATE_MULTIPLY_WAIT:		fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT;
-			FSM_STATE_MULTIPLY_UNLOAD:		fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD;
-			
-			FSM_STATE_SQUARE_LOAD:			fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT;
-			FSM_STATE_SQUARE_WAIT:			fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT;
-			FSM_STATE_SQUARE_UNLOAD:		fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END;
-			
-			FSM_STATE_ROUND_END:				fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL;
-			
-			FSM_STATE_FINAL:					fsm_state <= FSM_STATE_IDLE;
-			
-			default:								fsm_state <= FSM_STATE_IDLE;
-			
-		endcase
-	
-	
-endmodule
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexps6_top
+  #(parameter MAX_MODULUS_WIDTH = 1024)
+  (
+   input wire                        clk,
+
+   input wire                        init,
+   output wire                       ready,
+
+   input wire                        next,
+   output wire                       valid,
+
+   input wire [MODULUS_NUM_BITS-1:0] modulus_width,
+   input wire [MODULUS_NUM_BITS-1:0] exponent_width,
+
+   input wire                        fast_public_mode,
+
+   input wire                        bus_cs,
+   input wire                        bus_we,
+   input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr,
+   input wire [31:0]                 bus_data_wr,
+   output wire [31:0]                bus_data_rd
+   );
+
+
+   //
+   // modexps6_clog2()
+   //
+   function     integer modexps6_clog2;
+      input     integer              value;
+      integer                        ret;
+      begin
+         value = value - 1;
+         for (ret = 0; value > 0; ret = ret + 1)
+           value = value >> 1;
+         modexps6_clog2 = ret;
+      end
+   endfunction
+
+
+   //
+   // Locals
+   //
+   localparam   OPERAND_ADDR_WIDTH = modexps6_clog2(MAX_MODULUS_WIDTH / 32);
+   localparam   MODULUS_NUM_BITS   = modexps6_clog2(MAX_MODULUS_WIDTH + 1);
+   localparam   ADDR_WIDTH_TOTAL   = OPERAND_ADDR_WIDTH + 2;
+
+   localparam   [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+   localparam   [OPERAND_ADDR_WIDTH  :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+   localparam   [   MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+
+
+   //
+   // User Memory
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0]        ro_modulus_bram_addr;
+   wire [                 31:0]         ro_modulus_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         ro_message_bram_addr    = bram_user_addr_zero;
+   wire [                 31:0]         ro_message_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         ro_exponent_bram_addr   = bram_user_addr_zero;
+   wire [                 31:0]         ro_exponent_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH-1:0]         rw_result_bram_addr     = bram_user_addr_zero;
+   wire [                 31:0]         rw_result_bram_out;
+   reg                                  rw_result_bram_wr       = 1'b0;
+   wire [                 31:0]         rw_result_bram_in;
+
+   modexps6_buffer_user #
+     (
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   mem_user
+     (
+      .clk                      (clk),
+
+      .bus_cs                   (bus_cs),
+      .bus_we                   (bus_we),
+      .bus_addr                 (bus_addr),
+      .bus_data_wr              (bus_data_wr),
+      .bus_data_rd              (bus_data_rd),
+
+      .ro_modulus_bram_addr     (ro_modulus_bram_addr),
+      .ro_modulus_bram_out      (ro_modulus_bram_out),
+
+      .ro_message_bram_addr     (ro_message_bram_addr),
+      .ro_message_bram_out      (ro_message_bram_out),
+
+      .ro_exponent_bram_addr    (ro_exponent_bram_addr),
+      .ro_exponent_bram_out     (ro_exponent_bram_out),
+
+      .rw_result_bram_addr      (rw_result_bram_addr),
+      .rw_result_bram_wr        (rw_result_bram_wr),
+      .rw_result_bram_in        (rw_result_bram_in)
+      );
+
+
+   //
+   // Core (Internal) Memory
+   //
+   wire [OPERAND_ADDR_WIDTH:0]          rw_coeff_bram_addr;
+   wire                                 rw_coeff_bram_wr;
+   wire [               31:0]           rw_coeff_bram_in;
+   wire [               31:0]           rw_coeff_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_mm_bram_addr         = bram_core_addr_zero;
+   reg                                  rw_mm_bram_wr           = 1'b0;
+   reg [               31:0]            rw_mm_bram_in;
+   wire [               31:0]           rw_mm_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          rw_nn_bram_addr;
+   wire                                 rw_nn_bram_wr;
+   wire [               31:0]           rw_nn_bram_in;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_y_bram_addr          = bram_core_addr_zero;
+   reg                                  rw_y_bram_wr            = 1'b0;
+   reg [              31:0]             rw_y_bram_in;
+   wire [              31:0]            rw_y_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          rw_r_bram_addr;
+   wire                                 rw_r_bram_wr;
+   wire [              31:0]            rw_r_bram_in;
+   wire [              31:0]            rw_r_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           rw_t_bram_addr          = bram_core_addr_zero;
+   reg                                  rw_t_bram_wr            = 1'b0;
+   reg [              31:0]             rw_t_bram_in;
+   wire [              31:0]            rw_t_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           ro_coeff_bram_addr      = bram_core_addr_zero;
+   wire [               31:0]           ro_coeff_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_mm_bram_addr;
+   wire [               31:0]           ro_mm_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_nn_bram_addr;
+   wire [               31:0]           ro_nn_bram_out;
+
+   reg [OPERAND_ADDR_WIDTH:0]           ro_r_bram_addr          = bram_core_addr_zero;
+   wire [               31:0]           ro_r_bram_out;
+
+   wire [OPERAND_ADDR_WIDTH:0]          ro_t_bram_addr;
+   wire [              31:0]            ro_t_bram_out;
+
+   modexps6_buffer_core #
+     (
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   mem_core
+     (
+      .clk                      (clk),
+
+      .rw_coeff_bram_addr       (rw_coeff_bram_addr),
+      .rw_coeff_bram_wr         (rw_coeff_bram_wr),
+      .rw_coeff_bram_in         (rw_coeff_bram_in),
+      .rw_coeff_bram_out        (rw_coeff_bram_out),
+
+      .rw_mm_bram_addr          (rw_mm_bram_addr),
+      .rw_mm_bram_wr            (rw_mm_bram_wr),
+      .rw_mm_bram_in            (rw_mm_bram_in),
+      .rw_mm_bram_out           (rw_mm_bram_out),
+
+      .rw_nn_bram_addr          (rw_nn_bram_addr),
+      .rw_nn_bram_wr            (rw_nn_bram_wr),
+      .rw_nn_bram_in            (rw_nn_bram_in),
+
+      .rw_y_bram_addr           (rw_y_bram_addr),
+      .rw_y_bram_wr             (rw_y_bram_wr),
+      .rw_y_bram_in             (rw_y_bram_in),
+      .rw_y_bram_out            (rw_y_bram_out),
+
+      .rw_r_bram_addr           (rw_r_bram_addr),
+      .rw_r_bram_wr             (rw_r_bram_wr),
+      .rw_r_bram_in             (rw_r_bram_in),
+      .rw_r_bram_out            (rw_r_bram_out),
+
+      .rw_t_bram_addr           (rw_t_bram_addr),
+      .rw_t_bram_wr             (rw_t_bram_wr),
+      .rw_t_bram_in             (rw_t_bram_in),
+      .rw_t_bram_out            (rw_t_bram_out),
+
+      .ro_coeff_bram_addr       (ro_coeff_bram_addr),
+      .ro_coeff_bram_out        (ro_coeff_bram_out),
+
+      .ro_mm_bram_addr          (ro_mm_bram_addr),
+      .ro_mm_bram_out           (ro_mm_bram_out),
+
+      .ro_nn_bram_addr          (ro_nn_bram_addr),
+      .ro_nn_bram_out           (ro_nn_bram_out),
+
+      .ro_r_bram_addr           (ro_r_bram_addr),
+      .ro_r_bram_out            (ro_r_bram_out),
+
+      .ro_t_bram_addr           (ro_t_bram_addr),
+      .ro_t_bram_out            (ro_t_bram_out)
+      );
+
+
+   //
+   // Small 32-bit ModInv Core
+   //
+   wire                                 modinv_ena;
+   wire                                 modinv_rdy;
+
+   wire [31: 0]                         modinv_n0;
+   wire [31: 0]                         modinv_n0_negative = ~modinv_n0 + 1'b1;
+   wire [31: 0]                         modinv_n0_modinv;
+
+   modexps6_modinv32 core_modinv32
+     (
+      .clk                      (clk),
+
+      .ena                      (modinv_ena),
+      .rdy                      (modinv_rdy),
+
+      .n0                       (modinv_n0_negative),
+      .n0_modinv                (modinv_n0_modinv)
+      );
+
+
+   //
+   // Montgomery Coefficient Calculator
+   //
+   modexps6_montgomery_coeff #
+     (
+      .MODULUS_NUM_BITS         (MODULUS_NUM_BITS),
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   core_montgomery_coeff
+     (
+      .clk                      (clk),
+
+      .ena                      (init),
+      .rdy                      (ready),
+
+      .modulus_width            (modulus_width),
+
+      .coeff_bram_addr          (rw_coeff_bram_addr),
+      .coeff_bram_wr            (rw_coeff_bram_wr),
+      .coeff_bram_in            (rw_coeff_bram_in),
+      .coeff_bram_out           (rw_coeff_bram_out),
+
+      .nn_bram_addr             (rw_nn_bram_addr),
+      .nn_bram_wr               (rw_nn_bram_wr),
+      .nn_bram_in               (rw_nn_bram_in),
+
+      .modulus_bram_addr        (ro_modulus_bram_addr),
+      .modulus_bram_out         (ro_modulus_bram_out),
+
+      .modinv_n0                (modinv_n0),
+      .modinv_ena               (modinv_ena),
+      .modinv_rdy               (modinv_rdy)
+      );
+
+
+   //
+   // Montgomery Multiplier
+   //
+   reg                                  mul_ena = 1'b0;
+   wire                                 mul_rdy;
+
+   modexps6_montgomery_multiplier #
+     (
+      .OPERAND_NUM_BITS         (MODULUS_NUM_BITS),
+      .OPERAND_ADDR_WIDTH       (OPERAND_ADDR_WIDTH)
+      )
+   core_montgomery_multiplier
+     (
+      .clk                      (clk),
+
+      .ena                      (mul_ena),
+      .rdy                      (mul_rdy),
+
+      .operand_width            (modulus_width),
+
+      .x_bram_addr              (ro_t_bram_addr),
+      .x_bram_out               (ro_t_bram_out),
+
+      .y_bram_addr              (ro_mm_bram_addr),
+      .y_bram_out               (ro_mm_bram_out),
+
+      .n_bram_addr              (ro_nn_bram_addr),
+      .n_bram_out               (ro_nn_bram_out),
+
+      .z_bram_addr              (rw_r_bram_addr),
+      .z_bram_wr                (rw_r_bram_wr),
+      .z_bram_in                (rw_r_bram_in),
+      .z_bram_out               (rw_r_bram_out),
+
+      .n0_modinv                (modinv_n0_modinv)
+      );
+
+
+   //
+   // FSM
+   //
+   localparam FSM_STATE_IDLE            = 6'd0;
+
+   localparam FSM_STATE_INIT_LOAD       = 6'd11;
+   localparam FSM_STATE_INIT_WAIT       = 6'd12;
+   localparam FSM_STATE_INIT_UNLOAD     = 6'd13;
+
+   localparam FSM_STATE_READ_EI         = 6'd20;
+
+   localparam FSM_STATE_ROUND_BEGIN     = 6'd25;
+
+   localparam FSM_STATE_MULTIPLY_LOAD   = 6'd31;
+   localparam FSM_STATE_MULTIPLY_WAIT   = 6'd32;
+   localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33;
+
+   localparam FSM_STATE_SQUARE_LOAD     = 6'd41;
+   localparam FSM_STATE_SQUARE_WAIT     = 6'd42;
+   localparam FSM_STATE_SQUARE_UNLOAD   = 6'd43;
+
+   localparam FSM_STATE_ROUND_END       = 6'd50;
+
+   localparam FSM_STATE_FINAL           = 6'd60;
+
+   reg [5: 0]                           fsm_state = FSM_STATE_IDLE;
+
+
+   //
+   // Trigger
+   //
+   reg                                  next_dly = 1'b0;
+   always @(posedge clk) next_dly <= next;
+   wire                                 next_trig = (next == 1'b1) && (next_dly == 1'b0);
+
+
+   //
+   // Valid Register
+   //
+   reg                                  valid_reg = 1'b0;
+   assign valid = valid_reg;
+
+
+   //
+   // Next/ Valid Logic
+   //
+   always @(posedge clk)
+     //
+     if (fsm_state == FSM_STATE_FINAL) begin
+        //
+        valid_reg <= 1'b1;
+        //
+     end else if (fsm_state == FSM_STATE_IDLE) begin
+        //
+        if (valid_reg && !next) valid_reg <= 1'b0;
+        //
+     end
+
+
+   //
+   // Exponent Bit Counter
+   //
+   reg  [4: 0]  ei_bit_count = 5'd0;
+   wire         ei_bit = ro_exponent_bram_out[ei_bit_count];
+
+
+   //
+   // Round Counter
+   //
+   reg [MODULUS_NUM_BITS:0] round_count         = round_count_zero;
+   wire [MODULUS_NUM_BITS:0] round_count_last   = exponent_width - 1'b1;
+   wire [MODULUS_NUM_BITS:0] round_count_next   = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+   //
+   // Handy Wires
+   //
+   wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_core_addr_last = {modulus_width_msb, 1'b0};
+
+   wire [OPERAND_ADDR_WIDTH  :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1;
+   wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+
+
+   //
+   // Handy Functions
+   //
+   function     [OPERAND_ADDR_WIDTH:0]  bram_core_addr_next_or_zero;
+      input [OPERAND_ADDR_WIDTH:0] bram_core_addr;
+      begin
+         bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero;
+      end
+   endfunction
+
+   function     [OPERAND_ADDR_WIDTH-1:0]        bram_user_addr_next_or_zero;
+      input     [OPERAND_ADDR_WIDTH-1:0]        bram_user_addr;
+      begin
+         bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero;
+      end
+   endfunction
+
+
+   //
+   // Result BRAM Input
+   //
+   assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out;
+
+
+   //
+   // MM BRAM Input Selector
+   //
+   always @(*)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}};
+
+       FSM_STATE_INIT_UNLOAD:
+         //
+         rw_mm_bram_in = ro_r_bram_out;
+
+       FSM_STATE_SQUARE_UNLOAD:
+         //
+         rw_mm_bram_in = ro_r_bram_out;
+
+       default:
+         //
+         rw_mm_bram_in  = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Y BRAM Input Selector
+   //
+   always @(*)
+                         //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000;
+
+       FSM_STATE_MULTIPLY_UNLOAD:
+         //
+         rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW!
+
+       default:
+         //
+         rw_y_bram_in   = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // T BRAM Input Selector
+   //
+   always @(*)
+                        //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD:
+         //
+         rw_t_bram_in = ro_coeff_bram_out;
+
+       FSM_STATE_MULTIPLY_LOAD:
+         //
+         rw_t_bram_in = rw_y_bram_out;
+
+       FSM_STATE_SQUARE_LOAD:
+         //
+         rw_t_bram_in = rw_mm_bram_out;
+
+       default:
+         //
+         rw_t_bram_in   = {32{1'bX}};
+
+     endcase
+
+
+   //
+   // Main Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_INIT_LOAD: begin
+          //
+          rw_mm_bram_wr         <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          rw_y_bram_wr          <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          rw_t_bram_wr          <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr       <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          rw_y_bram_addr        <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          rw_t_bram_addr        <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+          else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+          //
+          if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr);
+          else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr);
+          //
+       end
+
+       FSM_STATE_INIT_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_INIT_UNLOAD: begin
+          //
+          rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+          else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+          //
+       end
+
+       FSM_STATE_MULTIPLY_LOAD: begin
+          //
+          rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+          //
+          if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr);
+          else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr);
+          //
+       end
+
+       FSM_STATE_MULTIPLY_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_MULTIPLY_UNLOAD: begin
+          //
+          rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero;
+          //
+          if (ei_bit) begin
+             //
+             if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+             else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+             //
+          end else begin
+             //
+             if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr);
+             else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr);
+             //
+          end
+          //
+          if (round_count == round_count_last) begin
+             //
+             if (rw_result_bram_addr == bram_user_addr_zero) begin
+                //
+                if (rw_y_bram_wr) begin
+                   //
+                   rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1;
+                   rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr);
+                   //
+                end else begin
+                   //
+                   rw_result_bram_wr <= 1'b1;
+                   rw_result_bram_addr <= bram_user_addr_zero;
+                   //
+                end
+                //
+             end else begin
+                //
+                rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0;
+                rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr);
+                //
+             end
+             //
+          end
+          //
+       end
+
+       FSM_STATE_SQUARE_LOAD: begin
+          //
+          rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+          //
+          if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr);
+          else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr);
+          //
+       end
+
+       FSM_STATE_SQUARE_WAIT: begin
+          //
+          if (mul_ena)  mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+          else          mul_ena <= 1'b1;
+          //
+       end
+
+       FSM_STATE_SQUARE_UNLOAD: begin
+          //
+          rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+          //
+          rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+          //
+          if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+          else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+          //
+       end
+
+       FSM_STATE_ROUND_END: begin
+          //
+          round_count <= round_count_next;
+          //
+          if (round_count < round_count_last) begin
+             //
+             ei_bit_count <= ei_bit_count + 1'b1;
+             //
+             if (ei_bit_count == 5'd31)
+               //
+               ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr);
+             //
+          end else begin
+             //
+             ei_bit_count <= 5'd0;
+             //
+             ro_exponent_bram_addr <= bram_user_addr_zero;
+             //
+          end
+          //
+       end
+
+     endcase
+
+
+   //
+   // FSM Transition Logic
+   //
+   always @(posedge clk)
+     //
+     case (fsm_state)
+
+       FSM_STATE_IDLE:          fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE;
+
+       FSM_STATE_INIT_LOAD:     fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT;
+       FSM_STATE_INIT_WAIT:     fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT;
+       FSM_STATE_INIT_UNLOAD:   fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI;
+
+       FSM_STATE_READ_EI:       fsm_state <= FSM_STATE_ROUND_BEGIN;
+
+       FSM_STATE_ROUND_BEGIN:   fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD;
+
+       FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT;
+       FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT;
+       FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD;
+
+       FSM_STATE_SQUARE_LOAD:   fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT;
+       FSM_STATE_SQUARE_WAIT:   fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT;
+       FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END;
+
+       FSM_STATE_ROUND_END:     fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL;
+
+       FSM_STATE_FINAL:         fsm_state <= FSM_STATE_IDLE;
+
+       default:                 fsm_state <= FSM_STATE_IDLE;
+
+     endcase
+
+
+endmodule
diff --git a/src/rtl/modexps6_wrapper.v b/src/rtl/modexps6_wrapper.v
index aa49261..89646d5 100644
--- a/src/rtl/modexps6_wrapper.v
+++ b/src/rtl/modexps6_wrapper.v
@@ -1,187 +1,211 @@
-module modexps6_wrapper
-	(
-		clk, reset_n,
-		cs, we,
-		address, write_data, read_data
-	);
-	
-	
-		//
-		// Ports
-		//
-	input		wire           clk;
-	input		wire           reset_n;
-
-	input		wire           cs;
-	input		wire           we;
-
-	input		wire  [ 9: 0]	address;
-	input		wire  [31: 0]	write_data;
-	output	wire	[31: 0]	read_data;
-	
-	
-		//
-		// Address Decoder
-		//
-	localparam	ADDR_MSB_REGS	= 1'b0;
-	localparam	ADDR_MSB_CORE	= 1'b1;
-	wire				address_msb = address[9];
-	wire	[ 8: 0]	address_lsb	= address[8:0];
-	
-	
-		//
-		// Output Mux
-		//
-	wire	[31: 0]	read_data_regs;
-	wire	[31: 0]	read_data_core;
-	
-	
-	  //
-	  // Registers
-	  //
-	localparam ADDR_NAME0			= 9'h000;
-	localparam ADDR_NAME1			= 9'h001;
-	localparam ADDR_VERSION			= 9'h002;
-
-	localparam ADDR_CONTROL			= 9'h008;		// {next, init}
-	localparam ADDR_STATUS			= 9'h009;		// {valid, ready}
-	localparam ADDR_MODE				= 9'h010;		// 0 = slow secure, 1 = fast unsafe (public)
-	localparam ADDR_MODULUS_BITS	= 9'h011;		// 
-	localparam ADDR_EXPONENT_BITS	= 9'h012;		// 
-	localparam ADDR_GPIO_REG		= 9'h020;		// 
-
-	localparam CONTROL_INIT_BIT	= 0;
-	localparam CONTROL_NEXT_BIT	= 1;
-
-	localparam STATUS_READY_BIT	= 0;
-	localparam STATUS_VALID_BIT	= 1;
-
-	localparam CORE_NAME0			= 32'h6D6F6465;	// "mode"
-	localparam CORE_NAME1			= 32'h78707336;	// "xps6"
-	localparam CORE_VERSION			= 32'h302E3130;	// "0.10"
-
-
-		//
-		// Registers
-		//
-	reg	[ 1: 0]	reg_control;
-	reg				reg_mode;
-	reg	[12: 0]	reg_modulus_width;
-	reg	[12: 0]	reg_exponent_width;
-	reg	[31: 0]	reg_gpio;
-	
-	
-		//
-		// Wires
-		//
-	wire	[ 1: 0]	reg_status;
-		
-		
-		//
-		// ModExpS6
-		//
-	modexps6_top #
-	(
-		.MAX_MODULUS_WIDTH	(4096)
-	)
-	modexps6_core
-	(
-		.clk						(clk),
-		
-		.init						(reg_control[CONTROL_INIT_BIT]),
-		.ready					(reg_status[STATUS_READY_BIT]),
-		.next						(reg_control[CONTROL_NEXT_BIT]),
-		.valid					(reg_status[STATUS_VALID_BIT]),
-		
-		.modulus_width			(reg_modulus_width),
-		.exponent_width		(reg_exponent_width),
-		
-		.fast_public_mode		(reg_mode),
-		
-		.bus_cs					(cs && (address_msb == ADDR_MSB_CORE)),
-		.bus_we					(we),
-		.bus_addr				(address_lsb),
-		.bus_data_wr			(write_data),
-		.bus_data_rd			(read_data_core)
-	);
-	
-	
-		//
-		// Read Latch
-		//
-	reg [31: 0] tmp_read_data;
-
-
-	//
-	// Read/Write Interface
-	//
-	always @(posedge clk)
-		//
-		if (!reset_n) begin
-			//
-			reg_control				<= 2'b00;
-			reg_mode					<= 1'b0;
-			reg_modulus_width		<= 13'd1024;
-			reg_exponent_width	<= 13'd1024;
-			//
-		end else if (cs && (address_msb == ADDR_MSB_REGS)) begin
-			//
-			if (we) begin
-				//
-				// Write Handler
-				//
-				case (address_lsb)
-					//
-					ADDR_CONTROL:			reg_control				<= write_data[ 1: 0];
-					ADDR_MODE:				reg_mode					<= write_data[0];
-					ADDR_MODULUS_BITS:	reg_modulus_width		<= write_data[12: 0];
-					ADDR_EXPONENT_BITS:	reg_exponent_width	<= write_data[12: 0];
-					ADDR_GPIO_REG:			reg_gpio					<= write_data;
-					//
-				endcase
-				//	
-			end else begin
-				//
-				// Read Handler
-				//
-				case (address)
-					//
-					ADDR_NAME0:				tmp_read_data <= CORE_NAME0;
-					ADDR_NAME1:				tmp_read_data <= CORE_NAME1;
-					ADDR_VERSION:			tmp_read_data <= CORE_VERSION;
-					ADDR_CONTROL:			tmp_read_data <= {{30{1'b0}}, reg_control};
-					ADDR_STATUS:			tmp_read_data <= {{30{1'b0}}, reg_status};
-					ADDR_MODE:				tmp_read_data <= {{31{1'b0}}, reg_mode};
-					ADDR_MODULUS_BITS:	tmp_read_data <= {{19{1'b0}}, reg_modulus_width};
-					ADDR_EXPONENT_BITS:	tmp_read_data <= {{19{1'b0}}, reg_exponent_width};
-					ADDR_GPIO_REG:			tmp_read_data <= reg_gpio;
-					//
-					default:					tmp_read_data <= 32'h00000000;
-					//
-				endcase
-				//
-			end
-			//
-		end
-		
-		
-		//
-		// Register / Core Memory Selector
-		//
-	reg address_msb_last;
-	always @(posedge clk) address_msb_last = address_msb;
-	
-	reg	[31: 0]	read_data_mux;
-	assign read_data = read_data_mux;	
-	
-	always @(*)
-		//
-		case (address_msb_last)
-			//
-			ADDR_MSB_REGS:		read_data_mux = tmp_read_data;
-			ADDR_MSB_CORE:		read_data_mux = read_data_core;
-			//
-		endcase
-
-		
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexps6_wrapper
+  (
+   input wire          clk,
+   input wire          reset_n,
+
+   input wire          cs,
+   input wire          we,
+
+   input wire [9: 0]   address,
+   input wire [31: 0]  write_data,
+   output wire [31: 0] read_data
+   );
+
+
+   //
+   // Address Decoder
+   //
+   localparam ADDR_MSB_REGS     = 1'b0;
+   localparam ADDR_MSB_CORE     = 1'b1;
+   wire                address_msb = address[9];
+   wire [8: 0]         address_lsb = address[8:0];
+
+
+   //
+   // Output Mux
+   //
+   wire [31: 0]        read_data_regs;
+   wire [31: 0]        read_data_core;
+
+
+   //
+   // Registers
+   //
+   localparam ADDR_NAME0        = 9'h000;
+   localparam ADDR_NAME1        = 9'h001;
+   localparam ADDR_VERSION      = 9'h002;
+
+   localparam ADDR_CONTROL      = 9'h008;               // {next, init}
+   localparam ADDR_STATUS       = 9'h009;               // {valid, ready}
+   localparam ADDR_MODE         = 9'h010;               // 0 = slow secure, 1 = fast unsafe (public)
+   localparam ADDR_MODULUS_BITS = 9'h011;               //
+   localparam ADDR_EXPONENT_BITS = 9'h012;              //
+   localparam ADDR_GPIO_REG     = 9'h020;               //
+
+   localparam CONTROL_INIT_BIT  = 0;
+   localparam CONTROL_NEXT_BIT  = 1;
+
+   localparam STATUS_READY_BIT  = 0;
+   localparam STATUS_VALID_BIT  = 1;
+
+   localparam CORE_NAME0        = 32'h6D6F6465; // "mode"
+   localparam CORE_NAME1        = 32'h78707336; // "xps6"
+   localparam CORE_VERSION      = 32'h302E3130; // "0.10"
+
+
+   //
+   // Registers
+   //
+   reg [1: 0]          reg_control;
+   reg                 reg_mode;
+   reg [12: 0]         reg_modulus_width;
+   reg [12: 0]         reg_exponent_width;
+   reg [31: 0]         reg_gpio;
+
+
+   //
+   // Wires
+   //
+   wire [1: 0]         reg_status;
+
+
+   //
+   // ModExpS6
+   //
+   modexps6_top #
+     (
+      .MAX_MODULUS_WIDTH        (4096)
+      )
+   modexps6_core
+     (
+      .clk                      (clk),
+
+      .init                     (reg_control[CONTROL_INIT_BIT]),
+      .ready                    (reg_status[STATUS_READY_BIT]),
+      .next                     (reg_control[CONTROL_NEXT_BIT]),
+      .valid                    (reg_status[STATUS_VALID_BIT]),
+
+      .modulus_width            (reg_modulus_width),
+      .exponent_width           (reg_exponent_width),
+
+      .fast_public_mode         (reg_mode),
+
+      .bus_cs                   (cs && (address_msb == ADDR_MSB_CORE)),
+      .bus_we                   (we),
+      .bus_addr                 (address_lsb),
+      .bus_data_wr              (write_data),
+      .bus_data_rd              (read_data_core)
+      );
+
+
+   //
+   // Read Latch
+   //
+   reg [31: 0]         tmp_read_data;
+
+
+   //
+   // Read/Write Interface
+   //
+   always @(posedge clk)
+     //
+     if (!reset_n) begin
+        //
+        reg_control             <= 2'b00;
+        reg_mode                <= 1'b0;
+        reg_modulus_width       <= 13'd1024;
+        reg_exponent_width      <= 13'd1024;
+        //
+     end else if (cs && (address_msb == ADDR_MSB_REGS)) begin
+        //
+        if (we) begin
+           //
+           // Write Handler
+           //
+           case (address_lsb)
+             //
+             ADDR_CONTROL:      reg_control             <= write_data[1: 0];
+             ADDR_MODE:         reg_mode                <= write_data[0];
+             ADDR_MODULUS_BITS: reg_modulus_width       <= write_data[12: 0];
+             ADDR_EXPONENT_BITS: reg_exponent_width     <= write_data[12: 0];
+             ADDR_GPIO_REG:     reg_gpio                <= write_data;
+             //
+           endcase
+           //
+        end else begin
+           //
+           // Read Handler
+           //
+           case (address)
+             //
+             ADDR_NAME0:        tmp_read_data <= CORE_NAME0;
+             ADDR_NAME1:        tmp_read_data <= CORE_NAME1;
+             ADDR_VERSION:      tmp_read_data <= CORE_VERSION;
+             ADDR_CONTROL:      tmp_read_data <= {{30{1'b0}}, reg_control};
+             ADDR_STATUS:       tmp_read_data <= {{30{1'b0}}, reg_status};
+             ADDR_MODE:         tmp_read_data <= {{31{1'b0}}, reg_mode};
+             ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width};
+             ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width};
+             ADDR_GPIO_REG:     tmp_read_data <= reg_gpio;
+             //
+             default:           tmp_read_data <= 32'h00000000;
+             //
+           endcase
+           //
+        end
+        //
+     end
+
+
+   //
+   // Register / Core Memory Selector
+   //
+   reg address_msb_last;
+   always @(posedge clk) address_msb_last = address_msb;
+
+   reg [31: 0] read_data_mux;
+   assign read_data = read_data_mux;
+
+   always @(*)
+     //
+     case (address_msb_last)
+       //
+       ADDR_MSB_REGS:           read_data_mux = tmp_read_data;
+       ADDR_MSB_CORE:           read_data_mux = read_data_core;
+       //
+     endcase
+
+
 endmodule
diff --git a/src/rtl/ram_1rw_1ro_readfirst.v b/src/rtl/ram_1rw_1ro_readfirst.v
index 7ba11ea..25b708f 100644
--- a/src/rtl/ram_1rw_1ro_readfirst.v
+++ b/src/rtl/ram_1rw_1ro_readfirst.v
@@ -1,69 +1,88 @@
-`timescale 1ns / 1ps
-
-module ram_1rw_1ro_readfirst
-	(
-		clk,
-		a_addr, a_wr, a_in, a_out,
-		b_addr, b_out
-	);
-
-		
-		//
-		// Parameters
-		//
-	parameter	MEM_WIDTH		= 32;
-   parameter	MEM_ADDR_BITS	= 8;
-		
-		
-		//
-		// Ports
-		//
-	input		wire								clk;
-	
-	input		wire	[MEM_ADDR_BITS-1:0]	a_addr;
-	input		wire								a_wr;
-	input		wire	[MEM_WIDTH-1:0]		a_in;
-	output	wire	[MEM_WIDTH-1:0]		a_out;
-	
-	input		wire	[MEM_ADDR_BITS-1:0]	b_addr;
-	output	wire	[MEM_WIDTH-1:0]		b_out;
-		
-		
-		//
-		// BRAM
-		//
+//======================================================================
+//
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module ram_1rw_1ro_readfirst
+  #(parameter MEM_WIDTH            = 32,
+    parameter MEM_ADDR_BITS        = 8)
+   (
+    input wire                     clk,
+
+    input wire [MEM_ADDR_BITS-1:0] a_addr,
+    input wire                     a_wr,
+    input wire [MEM_WIDTH-1:0]     a_in,
+    output wire [MEM_WIDTH-1:0]    a_out,
+
+    input wire [MEM_ADDR_BITS-1:0] b_addr,
+    output wire [MEM_WIDTH-1:0]    b_out
+    );
+
+
+   //
+   // BRAM
+   //
    (* RAM_STYLE="BLOCK" *)
-   reg	[MEM_WIDTH-1:0]	bram[0:(2**MEM_ADDR_BITS)-1];
-	
-	
-		//
-		// Output Registers
-		//
-   reg	[MEM_WIDTH-1:0]	bram_reg_a;
-	reg	[MEM_WIDTH-1:0]	bram_reg_b;
-	
-	assign a_out = bram_reg_a;
-	assign b_out = bram_reg_b;
+   reg [MEM_WIDTH-1:0]             bram[0:(2**MEM_ADDR_BITS)-1];
+
 
+   //
+   // Output Registers
+   //
+   reg [MEM_WIDTH-1:0]             bram_reg_a;
+   reg [MEM_WIDTH-1:0]             bram_reg_b;
 
-		//
-		// Read-Write Port A
-		//
+   assign a_out = bram_reg_a;
+   assign b_out = bram_reg_b;
+
+
+   //
+   // Read-Write Port A
+   //
    always @(posedge clk) begin
-		//
-		bram_reg_a <= bram[a_addr];
-		//
-		if (a_wr) bram[a_addr] <= a_in;
-		//
-	end
-		
-		
-		//
-		// Read-Only Port B
-		//
-	always @(posedge clk)
-		//
-		bram_reg_b <= bram[b_addr];
-		
-	
-endmodule
+      //
+      bram_reg_a <= bram[a_addr];
+      //
+      if (a_wr) bram[a_addr] <= a_in;
+      //
+   end
+
+
+   //
+   // Read-Only Port B
+   //
+   always @(posedge clk)
+     //
+     bram_reg_b <= bram[b_addr];
+
+
+endmodule



More information about the Commits mailing list