[Cryptech-Commits] [core/math/ecdsalib] branch master updated: Adapted for new ECDSA architecture.

git at cryptech.is git at cryptech.is
Thu Dec 20 10:46:07 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository core/math/ecdsalib.

The following commit(s) were added to refs/heads/master by this push:
     new 21cc313  Adapted for new ECDSA architecture.
21cc313 is described below

commit 21cc313097a670403d5740177e20209b1f0388ae
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Dec 19 15:30:10 2018 +0300

    Adapted for new ECDSA architecture.
---
 bench/tb_modular_multiplier_256.v                  | 366 ++++++++
 bench/tb_modular_multiplier_384.v                  | 366 ++++++++
 rtl/curve/uop/uop_add_rom.v                        |  66 --
 rtl/curve/uop/uop_conv_rom.v                       |  38 -
 rtl/curve/uop/uop_dbl_rom.v                        |  58 --
 rtl/curve/uop/uop_init_rom.v                       |  33 -
 rtl/curve/uop/uop_init_rom_ecdh.v                  |  33 -
 rtl/curve/uop_ecdsa.v                              |  50 --
 rtl/lowlevel/adder32_wrapper.v                     |  73 --
 rtl/lowlevel/adder47_wrapper.v                     |  69 --
 rtl/lowlevel/artix7/adder32_artix7.v               |  96 --
 rtl/lowlevel/artix7/adder47_artix7.v               |  91 --
 rtl/lowlevel/artix7/dsp48e1_wrapper.v              | 159 ----
 rtl/lowlevel/artix7/mac16_artix7.v                 |  90 --
 rtl/lowlevel/artix7/subtractor32_artix7.v          |  94 --
 rtl/lowlevel/ecdsa_lowlevel_settings.v             |  17 -
 rtl/lowlevel/generic/adder32_generic.v             |  67 --
 rtl/lowlevel/generic/adder47_generic.v             |  64 --
 rtl/lowlevel/generic/mac16_generic.v               |  74 --
 rtl/lowlevel/generic/subtractor32_generic.v        |  67 --
 rtl/lowlevel/mac16_wrapper.v                       |  75 --
 rtl/lowlevel/subtractor32_wrapper.v                |  72 --
 rtl/microcode/ecdsa_uop.vh                         | 121 +++
 rtl/modular/ecdsa256_modular_multiplier.v          | 404 +++++++++
 rtl/modular/ecdsa256_modular_reductor.v            | 702 +++++++++++++++
 .../ecdsa256_modulus_distmem.v}                    |  72 +-
 rtl/modular/ecdsa384_modular_multiplier.v          | 404 +++++++++
 rtl/modular/ecdsa384_modular_reductor.v            | 749 ++++++++++++++++
 .../ecdsa384_modulus_distmem.v}                    |  77 +-
 rtl/modular/modular_adder.v                        | 298 -------
 .../modular_invertor/helper/modinv_helper_copy.v   | 148 ----
 .../modular_invertor/helper/modinv_helper_init.v   | 172 ----
 .../helper/modinv_helper_invert_compare.v          | 286 ------
 .../helper/modinv_helper_invert_precalc.v          | 408 ---------
 .../helper/modinv_helper_invert_update.v           | 257 ------
 .../helper/modinv_helper_reduce_precalc.v          | 328 -------
 .../helper/modinv_helper_reduce_update.v           | 153 ----
 rtl/modular/modular_invertor/modinv_clog2.v        |  10 -
 rtl/modular/modular_invertor/modular_invertor.v    | 981 ---------------------
 rtl/modular/modular_subtractor.v                   | 292 ------
 rtl/multiword/mw_comparator.v                      | 220 -----
 rtl/multiword/mw_mover.v                           | 175 ----
 42 files changed, 3154 insertions(+), 5221 deletions(-)

diff --git a/bench/tb_modular_multiplier_256.v b/bench/tb_modular_multiplier_256.v
new file mode 100644
index 0000000..676a183
--- /dev/null
+++ b/bench/tb_modular_multiplier_256.v
@@ -0,0 +1,366 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_multiplier_256;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[255:0]	N	= 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+   localparam	[255:0]	X_1	= 256'h6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296;
+   localparam	[255:0]	Y_1	= 256'h4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5;
+   localparam	[255:0]	P_1	= 256'h823cd15f6dd3c71933565064513a6b2bd183e554c6a08622f713ebbbface98be;
+
+   localparam	[255:0]	X_2	= 256'h29d05c193da77b710e86323538b77e1b11f904fea42998be16bd8d744ece7ad0;
+   localparam	[255:0]	Y_2	= 256'hb01cbd1c01e58065711814b583f061e9d431cca994cea1313449bf97c840ae07;
+   localparam	[255:0]	P_2	= 256'h76b2571d1d009ab0e7d1cc086c7d3648f08755b2e2585e780d11f053b06fb6ec;
+
+   localparam	[255:0]	X_3	= 256'h8101ece47464a6ead70cf69a6e2bd3d88691a3262d22cba4f7635eaff26680a8;
+   localparam	[255:0]	Y_3	= 256'hd8a12ba61d599235f67d9cb4d58f1783d3ca43e78f0a5abaa624079936c0c3a9;
+   localparam	[255:0]	P_3	= 256'h944fea6a4fac7ae475a6bb211db4bbd394bd9b3ee9a038f6c17125a00b3a5375;
+
+   localparam	[255:0]	X_4	= 256'h7214bc9647160bbd39ff2f80533f5dc6ddd70ddf86bb815661e805d5d4e6f27c;
+   localparam	[255:0]	Y_4	= 256'h8b81e3e977597110c7cf2633435b2294b72642987defd3d4007e1cfc5df84541;
+   localparam	[255:0]	P_4	= 256'h78d3e33c81ab9c652679363c76df004ea6f9a9e3a242a0fb71a4e8fdf41ab519;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  3;
+   localparam	OPERAND_NUM_WORDS	=  8;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+
+   //
+   // Buffers (X, Y, N, P)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_p_addr;
+
+   wire 			 core_p_wren;
+
+   wire [                  31:0] core_x_data;
+   wire [                  31:0] core_y_data;
+   wire [                  31:0] core_n_data;
+   wire [                  31:0] core_p_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_xyn_addr;
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_p_addr;
+
+   reg 				 tb_xyn_wren;
+
+   reg [                  31:0]  tb_x_data;
+   reg [                  31:0]  tb_y_data;
+   reg [                  31:0]  tb_n_data;
+   wire [                  31:0] tb_p_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_x_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_y
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_y_data),
+      .a_out	(),
+
+      .b_addr	(core_y_addr),
+      .b_out	(core_y_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_n
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_n_data),
+      .a_out	(),
+
+      .b_addr	(core_n_addr),
+      .b_out	(core_n_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_s
+     (
+      .clk		(clk),
+
+      .a_addr	(core_p_addr),
+      .a_wr		(core_p_wren),
+      .a_in		(core_p_data),
+      .a_out	(),
+
+      .b_addr	(tb_p_addr),
+      .b_out	(tb_p_data)
+      );
+
+
+   //
+   // UUT
+   //
+   modular_multiplier_256 uut
+     (
+      .clk		(clk),
+      .rst_n	(rst_n),
+
+      .ena		(ena),
+      .rdy		(rdy),
+
+      .a_addr	(core_x_addr),
+      .b_addr	(core_y_addr),
+      .n_addr	(core_n_addr),
+      .p_addr	(core_p_addr),
+      .p_wren	(core_p_wren),
+
+      .a_din	(core_x_data),
+      .b_din	(core_y_data),
+      .n_din	(core_n_data),
+      .p_dout	(core_p_data)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				 ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n				= 0;
+      ena				= 0;
+
+      tb_xyn_wren		= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n				= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_modular_multiplier(X_1, Y_1, N, P_1);
+      test_modular_multiplier(X_2, Y_2, N, P_2);
+      test_modular_multiplier(X_3, Y_3, N, P_3);
+      test_modular_multiplier(X_4, Y_4, N, P_4);
+
+      /* print result */
+      if (ok)	$display("tb_modular_multiplier_256: SUCCESS");
+      else	$display("tb_modular_multiplier_256: FAILURE");
+      //
+      //$finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	[255:0]	p;
+   reg 		p_ok;
+
+   integer 	w;
+
+   reg [511:0] 	pp_full;
+   reg [255:0] 	pp_ref;
+
+   task test_modular_multiplier;
+
+      input	[255:0] x;
+      input [255:0] 	y;
+      input [255:0] 	n;
+      input [255:0] 	pp;
+
+      reg [255:0] 	x_shreg;
+      reg [255:0] 	y_shreg;
+      reg [255:0] 	n_shreg;
+      reg [255:0] 	p_shreg;
+
+      begin
+
+	 /* start filling memories */
+	 tb_xyn_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+	 y_shreg = y;
+	 n_shreg = n;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_xyn_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+	    tb_y_data	= y_shreg[31:0];
+	    tb_n_data	= n_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+	    y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+	    n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_xyn_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+	 tb_y_data	= {32{1'bX}};
+	 tb_n_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_xyn_wren	= 0;
+
+	 /* calculate reference value */
+	 pp_full = {{256{1'b0}}, x} * {{256{1'b0}}, y};
+	 pp_ref = pp_full % {{256{1'b0}}, n};
+
+	 /* compare reference value against hard-coded one */
+	 if (pp_ref != pp) begin
+	    $display("ERROR: pp_ref != pp");
+	    $finish;
+	 end
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* read result */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set address */
+	    tb_p_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	    /* store data word */
+	    p_shreg = {tb_p_data, p_shreg[255:32]};
+
+	 end
+
+	 /* compare */
+	 p_ok = (p_shreg == pp);
+
+	 /* display results */
+	 $display("test_modular_multiplier_256(): %s", p_ok ? "OK" : "ERROR");
+
+	 /* update flag */
+	 ok = ok && p_ok;
+
+      end
+
+   endtask
+
+
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_multiplier_384.v b/bench/tb_modular_multiplier_384.v
new file mode 100644
index 0000000..a5825d8
--- /dev/null
+++ b/bench/tb_modular_multiplier_384.v
@@ -0,0 +1,366 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_multiplier_384.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_multiplier_384;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[383:0]	N	= 384'hfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff;
+
+   localparam	[383:0]	X_1	= 384'haa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7;
+   localparam	[383:0]	Y_1	= 384'h3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f;
+   localparam	[383:0]	P_1	= 384'h332e559389c970313cb29c4b55af5783821971a99c250daf84dc5d3cc441cb0a482e90de9d3ccd96b3c8c48b2ad3f025;
+
+   localparam	[383:0]	X_2	= 384'haaf06bba82e9f590e29c71c219bea51723c5893ae8b0c8cf4c117c3efb57ab8d55fa1b428155ad278b5743911b13ea8a;
+   localparam	[383:0]	Y_2	= 384'hc9e821b569d9d390a26167406d6d23d6070be242d765eb831625ceec4a0f473ef59f4e30e2817e6285bce2846f15f19d;
+   localparam	[383:0]	P_2	= 384'haa1a9db70fba0a4c034777cdcd93e8bd6e9afa1171d43bdea0a16c32da20e7ebccb2fac9676f9d67a31e6f4f69e876e5;
+
+   localparam	[383:0]	X_3	= 384'h1fbac8eebd0cbf35640b39efe0808dd774debff20a2a329e91713baf7d7f3c3e81546d883730bee7e48678f857b02ca0;
+   localparam	[383:0]	Y_3	= 384'heb213103bd68ce343365a8a4c3d4555fa385f5330203bdd76ffad1f3affb95751c132007e1b240353cb0a4cf1693bdf9;
+   localparam	[383:0]	P_3	= 384'h80f70000040a44b05f3752b7d5338f87e409b868f032911bda888451c13097039d66d9e7b0e3e799b9dd613d2524b7af;
+
+   localparam	[383:0]	X_4	= 384'ha0c27ec893092dea1e1bd2ccfed3cf945c8134ed0c9f81311a0f4a05942db8dbed8dd59f267471d5462aa14fe72de856;
+   localparam	[383:0]	Y_4	= 384'h855649409815bb91424eaca5fd76c97375d575d1422ec53d343bd33b847fdf0c11569685b528ab25493015428d7cf72b;
+   localparam	[383:0]	P_4	= 384'h548e8456d5b3c36557a59914af514739a92908e59ddde731b8746891ad26199de955789e7cc34bfe966e3471c2684969;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  4;
+   localparam	OPERAND_NUM_WORDS	= 12;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+
+   //
+   // Buffers (X, Y, N, P)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_p_addr;
+
+   wire 			 core_p_wren;
+
+   wire [                  31:0] core_x_data;
+   wire [                  31:0] core_y_data;
+   wire [                  31:0] core_n_data;
+   wire [                  31:0] core_p_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_xyn_addr;
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_p_addr;
+
+   reg 				 tb_xyn_wren;
+
+   reg [                  31:0]  tb_x_data;
+   reg [                  31:0]  tb_y_data;
+   reg [                  31:0]  tb_n_data;
+   wire [                  31:0] tb_p_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_x_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_y
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_y_data),
+      .a_out	(),
+
+      .b_addr	(core_y_addr),
+      .b_out	(core_y_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_n
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_n_data),
+      .a_out	(),
+
+      .b_addr	(core_n_addr),
+      .b_out	(core_n_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_s
+     (
+      .clk	(clk),
+
+      .a_addr	(core_p_addr),
+      .a_wr	(core_p_wren),
+      .a_in	(core_p_data),
+      .a_out	(),
+
+      .b_addr	(tb_p_addr),
+      .b_out	(tb_p_data)
+      );
+
+
+   //
+   // UUT
+   //
+   modular_multiplier_384 uut
+     (
+      .clk	(clk),
+      .rst_n	(rst_n),
+
+      .ena	(ena),
+      .rdy	(rdy),
+
+      .a_addr	(core_x_addr),
+      .b_addr	(core_y_addr),
+      .n_addr	(core_n_addr),
+      .p_addr	(core_p_addr),
+      .p_wren	(core_p_wren),
+
+      .a_din	(core_x_data),
+      .b_din	(core_y_data),
+      .n_din	(core_n_data),
+      .p_dout	(core_p_data)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg			ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n		= 0;
+      ena		= 0;
+
+      tb_xyn_wren	= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n		= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_modular_multiplier_384(X_1, Y_1, N, P_1);
+      test_modular_multiplier_384(X_2, Y_2, N, P_2);
+      test_modular_multiplier_384(X_3, Y_3, N, P_3);
+      test_modular_multiplier_384(X_4, Y_4, N, P_4);
+
+      /* print result */
+      if (ok)	$display("tb_modular_multiplier_384: SUCCESS");
+      else	$display("tb_modular_multiplier_384: FAILURE");
+      //
+      //$finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	[383:0]	p;
+   reg 		p_ok;
+
+   integer 	w;
+
+   reg [767:0] 	pp_full;
+   reg [383:0] 	pp_ref;
+
+   task test_modular_multiplier_384;
+
+      input	[383:0] x;
+      input [383:0] 	y;
+      input [383:0] 	n;
+      input [383:0] 	pp;
+
+      reg [383:0] 	x_shreg;
+      reg [383:0] 	y_shreg;
+      reg [383:0] 	n_shreg;
+      reg [383:0] 	p_shreg;
+
+      begin
+
+	 /* start filling memories */
+	 tb_xyn_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+	 y_shreg = y;
+	 n_shreg = n;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_xyn_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+	    tb_y_data	= y_shreg[31:0];
+	    tb_n_data	= n_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[383:32]};
+	    y_shreg = {{32{1'bX}}, y_shreg[383:32]};
+	    n_shreg = {{32{1'bX}}, n_shreg[383:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_xyn_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+	 tb_y_data	= {32{1'bX}};
+	 tb_n_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_xyn_wren	= 0;
+
+	 /* calculate reference value */
+	 pp_full = {{384{1'b0}}, x} * {{384{1'b0}}, y};
+	 pp_ref = pp_full % {{384{1'b0}}, n};
+
+	 /* compare reference value against hard-coded one */
+	 if (pp_ref != pp) begin
+	    $display("ERROR: pp_ref != pp");
+	    $finish;
+	 end
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* read result */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set address */
+	    tb_p_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	    /* store data word */
+	    p_shreg = {tb_p_data, p_shreg[383:32]};
+
+	 end
+
+	 /* compare */
+	 p_ok = (p_shreg == pp);
+
+	 /* display results */
+	 $display("test_modular_multiplier_384(): %s", p_ok ? "OK" : "ERROR");
+
+	 /* update flag */
+	 ok = ok && p_ok;
+
+      end
+
+   endtask
+
+
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v
deleted file mode 100644
index 2cf511b..0000000
--- a/rtl/curve/uop/uop_add_rom.v
+++ /dev/null
@@ -1,66 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_add_rom
-  (
-   input wire 	      clk,
-   input wire [ 5: 0] addr,
-   output reg [19: 0] data
-   );
-
-
-   //
-   // Microcode
-   //
-`include "../uop_ecdsa.v"
-
-
-   //
-   // Addition Microprogram
-   //
-   always @(posedge clk)
-
-     case (addr)
-
-/*  2. */6'd00:	data <= {OPCODE_CMP, UOP_SRC_PZ,    UOP_SRC_ZERO,  UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/*  3. */6'd01:	data <= {OPCODE_MOV, UOP_SRC_PZ,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-         6'd02:	data <= {OPCODE_MUL, UOP_SRC_PZ,    UOP_SRC_T1,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/*  4. */6'd03:	data <= {OPCODE_MUL, UOP_SRC_PZ,    UOP_SRC_T1,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  5. */6'd04:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_G_X,   UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/*  6. */6'd05:	data <= {OPCODE_MUL, UOP_SRC_T2,    UOP_SRC_G_Y,   UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  7. */6'd06:	data <= {OPCODE_SUB, UOP_SRC_T1,    UOP_SRC_PX,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/*  8. */6'd07:	data <= {OPCODE_SUB, UOP_SRC_T2,    UOP_SRC_PY,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  9. */6'd08:	data <= {OPCODE_CMP, UOP_SRC_T1,    UOP_SRC_ZERO,  UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-         6'd09:	data <= {OPCODE_CMP, UOP_SRC_T2,    UOP_SRC_ZERO,  UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 10. */6'd10:	data <= {OPCODE_MUL, UOP_SRC_PZ,    UOP_SRC_T1,    UOP_DST_RZ,    UOP_EXEC_ALWAYS};
-/* 11. */6'd11:	data <= {OPCODE_MOV, UOP_SRC_T1,    UOP_SRC_DUMMY, UOP_DST_T3,    UOP_EXEC_ALWAYS};
-         6'd12:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T3,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-/* 12. */6'd13:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T3,    UOP_DST_T4,    UOP_EXEC_ALWAYS};
-/* 13. */6'd14:	data <= {OPCODE_MUL, UOP_SRC_PX,    UOP_SRC_T3,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-/* 14. */6'd15:	data <= {OPCODE_ADD, UOP_SRC_T3,    UOP_SRC_T3,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/* 15. */6'd16:	data <= {OPCODE_MOV, UOP_SRC_T2,    UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_ALWAYS};
-         6'd17:	data <= {OPCODE_MUL, UOP_SRC_RX,    UOP_SRC_T2,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-/* 16. */6'd18:	data <= {OPCODE_SUB, UOP_SRC_RX,    UOP_SRC_T1,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-/* 17. */6'd19:	data <= {OPCODE_SUB, UOP_SRC_RX,    UOP_SRC_T4,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-/* 18. */6'd20:	data <= {OPCODE_SUB, UOP_SRC_T3,    UOP_SRC_RX,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-/* 19. */6'd21:	data <= {OPCODE_MUL, UOP_SRC_T2,    UOP_SRC_T3,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-/* 20. */6'd22:	data <= {OPCODE_MUL, UOP_SRC_PY,    UOP_SRC_T4,    UOP_DST_T4,    UOP_EXEC_ALWAYS};
-/* 21. */6'd23:	data <= {OPCODE_SUB, UOP_SRC_T3,    UOP_SRC_T4,    UOP_DST_RY,    UOP_EXEC_ALWAYS};
-
-         6'd24:	data <= {OPCODE_MOV, UOP_SRC_G_X,   UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_PZT1T2_0XX};
-         6'd25:	data <= {OPCODE_MOV, UOP_SRC_G_Y,   UOP_SRC_DUMMY, UOP_DST_RY,    UOP_EXEC_PZT1T2_0XX};
-         6'd26:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RZ,    UOP_EXEC_PZT1T2_0XX};
-
-         6'd27:	data <= {OPCODE_MOV, UOP_SRC_H_X,   UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_PZT1T2_100};
-         6'd28:	data <= {OPCODE_MOV, UOP_SRC_H_Y,   UOP_SRC_DUMMY, UOP_DST_RY,    UOP_EXEC_PZT1T2_100};
-         6'd29:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RZ,    UOP_EXEC_PZT1T2_100};
-
-         6'd30:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_PZT1T2_101};
-         6'd31:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RY,    UOP_EXEC_PZT1T2_101};
-         6'd32:	data <= {OPCODE_MOV, UOP_SRC_ZERO,  UOP_SRC_DUMMY, UOP_DST_RZ,    UOP_EXEC_PZT1T2_101};
-
-         default:	data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-
-     endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v
deleted file mode 100644
index aa09bbb..0000000
--- a/rtl/curve/uop/uop_conv_rom.v
+++ /dev/null
@@ -1,38 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_conv_rom
-  (
-   input wire 	      clk,
-   input wire [ 5: 0] addr,
-   output reg [19: 0] data
-   );
-
-
-   //
-   // Microcode
-   //
-`include "../uop_ecdsa.v"
-
-
-   //
-   // Doubling Microprogram
-   //
-   always @(posedge clk)
-
-     case (addr)
-
-       6'd00:	data <= {OPCODE_CMP, UOP_SRC_PZ,   UOP_SRC_ZERO,  UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-       6'd01:	data <= {OPCODE_MOV, UOP_SRC_V,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-       6'd02:	data <= {OPCODE_MUL, UOP_SRC_V,    UOP_SRC_T1,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-       6'd03:	data <= {OPCODE_MUL, UOP_SRC_V,    UOP_SRC_T2,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-       6'd04:	data <= {OPCODE_MUL, UOP_SRC_PX,   UOP_SRC_T2,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-       6'd05:	data <= {OPCODE_MUL, UOP_SRC_PY,   UOP_SRC_T3,    UOP_DST_RY,    UOP_EXEC_ALWAYS};
-       6'd06:	data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_PZT1T2_0XX};
-       6'd07:	data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY,    UOP_EXEC_PZT1T2_0XX};
-
-       default:	data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-
-     endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v
deleted file mode 100644
index 0561e08..0000000
--- a/rtl/curve/uop/uop_dbl_rom.v
+++ /dev/null
@@ -1,58 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_dbl_rom
-  (
-   input wire 	      clk,
-   input wire [ 5: 0] addr,
-   output reg [19: 0] data
-   );
-
-
-   //
-   // Microcode
-   //
-`include "../uop_ecdsa.v"
-
-
-   //
-   // Doubling Microprogram
-   //
-   always @(posedge clk)
-
-     case (addr)
-
-/*  1. */6'd00:	data <= {OPCODE_CMP, UOP_SRC_PZ,    UOP_SRC_ZERO,  UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/*  2. */6'd01:	data <= {OPCODE_MOV, UOP_SRC_PZ,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-			5'd02:	data <= {OPCODE_MUL, UOP_SRC_PZ,    UOP_SRC_T1,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/*  3. */6'd03:	data <= {OPCODE_SUB, UOP_SRC_PX,    UOP_SRC_T1,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  4. */6'd04:	data <= {OPCODE_ADD, UOP_SRC_PX,    UOP_SRC_T1,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/*  5. */6'd05:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T2,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  6. */6'd06:	data <= {OPCODE_ADD, UOP_SRC_T2,    UOP_SRC_T2,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-         6'd07:	data <= {OPCODE_ADD, UOP_SRC_T1,    UOP_SRC_T2,    UOP_DST_T2,    UOP_EXEC_ALWAYS};
-/*  7. */6'd08:	data <= {OPCODE_ADD, UOP_SRC_PY,    UOP_SRC_PY,    UOP_DST_RY,    UOP_EXEC_ALWAYS};
-/*  8. */6'd09:	data <= {OPCODE_MUL, UOP_SRC_PZ,    UOP_SRC_RY,    UOP_DST_RZ,    UOP_EXEC_ALWAYS};
-/*  9. */6'd10:	data <= {OPCODE_MOV, UOP_SRC_RY,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-         6'd11:	data <= {OPCODE_MOV, UOP_SRC_RY,    UOP_SRC_DUMMY, UOP_DST_T3,    UOP_EXEC_ALWAYS};
-         6'd12:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T3,    UOP_DST_RY,    UOP_EXEC_ALWAYS};
-/* 10. */6'd13:	data <= {OPCODE_MUL, UOP_SRC_PX,    UOP_SRC_RY,    UOP_DST_T3,    UOP_EXEC_ALWAYS};
-/* 11. */6'd14:	data <= {OPCODE_MOV, UOP_SRC_RY,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-         6'd15:	data <= {OPCODE_MUL, UOP_SRC_RY,    UOP_SRC_T1,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/* 12. */6'd16:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_DELTA, UOP_DST_RY,    UOP_EXEC_ALWAYS};
-/* 13. */6'd17:	data <= {OPCODE_MOV, UOP_SRC_T2,    UOP_SRC_DUMMY, UOP_DST_T1,    UOP_EXEC_ALWAYS};
-         6'd18:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T2,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-/* 14. */6'd19:	data <= {OPCODE_ADD, UOP_SRC_T3,    UOP_SRC_T3,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/* 15. */6'd20:	data <= {OPCODE_SUB, UOP_SRC_RX,    UOP_SRC_T1,    UOP_DST_RX,    UOP_EXEC_ALWAYS};
-/* 16. */6'd21:	data <= {OPCODE_SUB, UOP_SRC_T3,    UOP_SRC_RX,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/* 17. */6'd22:	data <= {OPCODE_MUL, UOP_SRC_T1,    UOP_SRC_T2,    UOP_DST_T1,    UOP_EXEC_ALWAYS};
-/* 18. */6'd23:	data <= {OPCODE_SUB, UOP_SRC_T1,    UOP_SRC_RY,    UOP_DST_RY,    UOP_EXEC_ALWAYS};
-
-         6'd24:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RX,    UOP_EXEC_PZT1T2_0XX};
-         6'd25:	data <= {OPCODE_MOV, UOP_SRC_ONE,   UOP_SRC_DUMMY, UOP_DST_RY,    UOP_EXEC_PZT1T2_0XX};
-         6'd26:	data <= {OPCODE_MOV, UOP_SRC_ZERO,  UOP_SRC_DUMMY, UOP_DST_RZ,    UOP_EXEC_PZT1T2_0XX};
-
-       default:	data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-
-     endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v
deleted file mode 100644
index 0142c4c..0000000
--- a/rtl/curve/uop/uop_init_rom.v
+++ /dev/null
@@ -1,33 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_init_rom
-  (
-   input wire 	      clk,
-   input wire [ 5: 0] addr,
-   output reg [19: 0] data
-   );
-
-
-   //
-   // Microcode
-   //
-`include "../uop_ecdsa.v"
-
-
-   //
-   // Initialization Microprogram
-   //
-   always @(posedge clk)
-
-     case (addr)
-
-       6'd00:	data <= {OPCODE_MOV, UOP_SRC_ONE,  UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
-       6'd01:	data <= {OPCODE_MOV, UOP_SRC_ONE,  UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-       6'd02:	data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-
-       default:	data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-
-     endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_init_rom_ecdh.v b/rtl/curve/uop/uop_init_rom_ecdh.v
deleted file mode 100644
index dc92f2a..0000000
--- a/rtl/curve/uop/uop_init_rom_ecdh.v
+++ /dev/null
@@ -1,33 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_init_rom_ecdh
-  (
-   input wire 	      clk,
-   input wire [ 5: 0] addr,
-   output reg [19: 0] data
-   );
-
-
-   //
-   // Microcode
-   //
-`include "../uop_ecdsa.v"
-
-
-   //
-   // Initialization Microprogram for ECDH Mode
-   //
-   always @(posedge clk)
-
-     case (addr)
-
-       6'd00:	data <= {OPCODE_MOV, UOP_SRC_G_X,  UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
-       6'd01:	data <= {OPCODE_MOV, UOP_SRC_G_Y,  UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-       6'd02:	data <= {OPCODE_MOV, UOP_SRC_ONE,  UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-
-       default:	data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-
-     endcase
-
-
-endmodule
diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v
deleted file mode 100644
index ce757be..0000000
--- a/rtl/curve/uop_ecdsa.v
+++ /dev/null
@@ -1,50 +0,0 @@
-localparam	[ 4: 0]	OPCODE_CMP			= 5'b00001;
-localparam	[ 4: 0]	OPCODE_MOV			= 5'b00010;
-localparam	[ 4: 0]	OPCODE_ADD			= 5'b00100;
-localparam	[ 4: 0]	OPCODE_SUB			= 5'b01000;
-localparam	[ 4: 0]	OPCODE_MUL			= 5'b10000;
-localparam	[ 4: 0]	OPCODE_RDY			= 5'b00000;
-
-localparam	[ 4: 0]	UOP_SRC_PX			= 5'h0_0;
-localparam	[ 4: 0]	UOP_SRC_PY			= 5'h0_1;
-localparam	[ 4: 0]	UOP_SRC_PZ			= 5'h0_2;
-
-localparam	[ 4: 0]	UOP_SRC_RX			= 5'h0_3;
-localparam	[ 4: 0]	UOP_SRC_RY			= 5'h0_4;
-localparam	[ 4: 0]	UOP_SRC_RZ			= 5'h0_5;
-
-localparam	[ 4: 0]	UOP_SRC_T1			= 5'h0_6;
-localparam	[ 4: 0]	UOP_SRC_T2			= 5'h0_7;
-localparam	[ 4: 0]	UOP_SRC_T3			= 5'h0_8;
-localparam	[ 4: 0]	UOP_SRC_T4			= 5'h0_9;
-
-localparam	[ 4: 0]	UOP_SRC_ONE			= 5'h0_A;
-localparam	[ 4: 0]	UOP_SRC_ZERO		= 5'h0_B;
-localparam	[ 4: 0]	UOP_SRC_DELTA		= 5'h0_C;
-
-localparam	[ 4: 0]	UOP_SRC_V			= 5'h0_F;
-
-localparam	[ 4: 0]	UOP_SRC_G_X			= 5'h1_0;
-localparam	[ 4: 0]	UOP_SRC_G_Y			= 5'h1_1;
-
-localparam	[ 4: 0]	UOP_SRC_H_X			= 5'h1_2;
-localparam	[ 4: 0]	UOP_SRC_H_Y			= 5'h1_3;
-
-localparam	[ 4: 0]	UOP_SRC_DUMMY		= 5'hX_X;
-
-localparam	[ 2: 0]	UOP_DST_RX			= 3'd0;
-localparam	[ 2: 0]	UOP_DST_RY			= 3'd1;
-localparam	[ 2: 0]	UOP_DST_RZ			= 3'd2;
-
-localparam	[ 2: 0]	UOP_DST_T1			= 3'd3;
-localparam	[ 2: 0]	UOP_DST_T2			= 3'd4;
-localparam	[ 2: 0]	UOP_DST_T3			= 3'd5;
-localparam	[ 2: 0]	UOP_DST_T4			= 3'd6;
-
-localparam	[ 2: 0]	UOP_DST_DUMMY		= 3'dX;
-
-localparam				UOP_EXEC_ALWAYS		= 2'b11;	// R
-localparam				UOP_EXEC_PZT1T2_0XX	= 2'b10;	// G
-localparam				UOP_EXEC_PZT1T2_100	= 2'b00;	// H
-localparam				UOP_EXEC_PZT1T2_101	= 2'b01;	// O
-
diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v
deleted file mode 100644
index 57778df..0000000
--- a/rtl/lowlevel/adder32_wrapper.v
+++ /dev/null
@@ -1,73 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_wrapper
-  (
-   input 	  clk, // clock
-   input [31: 0]  a, // operand input
-   input [31: 0]  b, // operand input
-   output [31: 0] s, // sum output
-   input 	  c_in, // carry input
-   output 	  c_out		// carry output
-   );
-
-   //
-   // Include Primitive Selector
-   //
-`include "ecdsa_lowlevel_settings.v"
-
-
-   //
-   // Instantiate Vendor/Generic Primitive
-   //
-   `ADDER32_PRIMITIVE adder32_inst
-     (
-      .clk(clk),
-      .a(a),
-      .b(b),
-      .s(s),
-      .c_in(c_in),
-      .c_out(c_out)
-      );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v
deleted file mode 100644
index be5b1ee..0000000
--- a/rtl/lowlevel/adder47_wrapper.v
+++ /dev/null
@@ -1,69 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_wrapper
-  (
-   input 	  clk, // clock
-   input [46: 0]  a, // operand input
-   input [46: 0]  b, // operand input
-   output [46: 0] s			// sum output
-   );
-
-   //
-   // Include Primitive Selector
-   //
-`include "ecdsa_lowlevel_settings.v"
-
-
-   //
-   // Instantiate Vendor/Generic Primitive
-   //
-   `ADDER47_PRIMITIVE adder47_inst
-     (
-      .clk(clk),
-      .a(a),
-      .b(b),
-      .s(s)
-      );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v
deleted file mode 100644
index dad2340..0000000
--- a/rtl/lowlevel/artix7/adder32_artix7.v
+++ /dev/null
@@ -1,96 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_artix7
-  (
-   input 	  clk, // clock
-   input [31: 0]  a, // operand input
-   input [31: 0]  b, // operand input
-   output [31: 0] s, // sum output
-   input 	  c_in, // carry input
-   output 	  c_out		// carry output
-   );
-
-   //
-   // Lower and higher parts of operand
-   //
-   wire [17: 0]   bl = b[17: 0];
-   wire [13: 0]   bh = b[31:18];
-
-
-   //
-   // DSP48E1 Slice
-   //
-
-   /* Operation Mode */
-   wire [ 3: 0]   dsp48e1_alumode	= 4'b0000;
-   wire [ 6: 0]   dsp48e1_opmode		= 7'b0110011;
-
-   /* Internal Product */
-   wire [47: 0]   p_int;
-
-   dsp48e1_wrapper dsp_adder
-     (
-      .clk			(clk),
-
-      .ce			(1'b1),
-
-      .carry		(c_in),
-
-      .alumode		(dsp48e1_alumode),
-      .opmode		(dsp48e1_opmode),
-
-      .a				({{16{1'b0}}, bh}),
-      .b				(bl),
-      .c				({{16{1'b0}}, a}),
-
-      .p				(p_int)
-      );
-
-   //
-   // Output Mapping
-   //
-   assign s 		= p_int[31: 0];
-   assign c_out	= p_int[32];
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v
deleted file mode 100644
index caafc85..0000000
--- a/rtl/lowlevel/artix7/adder47_artix7.v
+++ /dev/null
@@ -1,91 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_artix7
-  (
-   input 	  clk,	// clock
-   input [46: 0]  a,	// operand input
-   input [46: 0]  b,	// operand input
-   output [46: 0] s	// sum output
-   );
-
-   //
-   // Lower and higher parts of operand
-   //
-   wire [17: 0]   bl = b[17: 0];
-   wire [28: 0]   bh = b[46:18];
-
-   //
-   // DSP48E1 Slice
-   //
-
-   /* Operation Mode */
-   wire [ 3: 0]   dsp48e1_alumode	= 4'b0000;
-   wire [ 6: 0]   dsp48e1_opmode	= 7'b0110011;
-
-   /* Internal Product */
-   wire [47: 0]   p_int;
-
-   dsp48e1_wrapper dsp_adder
-     (
-      .clk			(clk),
-
-      .ce			(1'b1),
-
-      .carry		(1'b0),
-
-      .alumode		(dsp48e1_alumode),
-      .opmode		(dsp48e1_opmode),
-
-      .a				({1'b0, bh}),
-      .b				(bl),
-      .c				({1'b0, a}),
-
-      .p				(p_int)
-      );
-
-   //
-   // Output Mapping
-   //
-   assign s 		= p_int[46: 0];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v
deleted file mode 100644
index 4c272f0..0000000
--- a/rtl/lowlevel/artix7/dsp48e1_wrapper.v
+++ /dev/null
@@ -1,159 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// dsp48e1_wrapper.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) tile wrapper.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module dsp48e1_wrapper
-  (
-   input 	  clk,
-
-   input 	  ce,
-
-   input [ 6: 0]  opmode,
-   input [ 3: 0]  alumode,
-
-   input 	  carry,
-
-   input [29: 0]  a,
-   input [17: 0]  b,
-   input [47: 0]  c,
-
-   output [47: 0] p
-   );
-
-
-   //
-   // Tile instantiation
-   //
-   DSP48E1 #
-     (
-      .AREG						(0),
-      .BREG						(0),
-      .CREG						(0),
-      .DREG						(0),
-      .MREG						(0),
-      .PREG						(1),
-      .ADREG					(0),
-
-      .ACASCREG				(0),
-      .BCASCREG				(0),
-      .ALUMODEREG				(0),
-      .INMODEREG				(0),
-      .OPMODEREG				(0),
-      .CARRYINREG				(0),
-      .CARRYINSELREG			(0),
-
-      .A_INPUT					("DIRECT"),
-      .B_INPUT					("DIRECT"),
-
-      .USE_DPORT				("FALSE"),
-      .USE_MULT				("DYNAMIC"),
-      .USE_SIMD				("ONE48"),
-
-      .USE_PATTERN_DETECT	("NO_PATDET"),
-      .SEL_PATTERN			("PATTERN"),
-      .SEL_MASK				("MASK"),
-      .PATTERN					(48'h000000000000),
-      .MASK						(48'h3fffffffffff),
-      .AUTORESET_PATDET		("NO_RESET")
-      )
-   DSP48E1_inst
-     (
-      .CLK					(clk),
-
-      .RSTA					(1'b0),
-      .RSTB					(1'b0),
-      .RSTC					(1'b0),
-      .RSTD					(1'b0),
-      .RSTM					(1'b0),
-      .RSTP					(1'b0),
-
-      .RSTCTRL				(1'b0),
-      .RSTINMODE			(1'b0),
-      .RSTALUMODE			(1'b0),
-      .RSTALLCARRYIN		(1'b0),
-
-      .CEA1					(1'b0),
-      .CEA2					(1'b0),
-      .CEB1					(1'b0),
-      .CEB2					(1'b0),
-      .CEC					(1'b0),
-      .CED					(1'b0),
-      .CEM					(1'b0),
-      .CEP					(ce),
-      .CEAD					(1'b0),
-      .CEALUMODE			(1'b0),
-      .CEINMODE			(1'b0),
-
-      .CECTRL				(1'b0),
-      .CECARRYIN			(1'b0),
-
-      .A						(a),
-      .B						(b),
-      .C						(c),
-      .D						({25{1'b1}}),
-      .P						(p),
-
-      .CARRYIN				(carry),
-      .CARRYOUT			(),
-      .CARRYINSEL			(3'b000),
-
-      .CARRYCASCIN		(1'b0),
-      .CARRYCASCOUT		(),
-
-      .PATTERNDETECT		(),
-      .PATTERNBDETECT	(),
-
-      .OPMODE				(opmode),
-      .ALUMODE				(alumode),
-      .INMODE				(5'b00000),
-
-      .MULTSIGNIN			(1'b0),
-      .MULTSIGNOUT		(),
-
-      .UNDERFLOW			(),
-      .OVERFLOW			(),
-
-      .ACIN					(30'd0),
-      .BCIN					(18'd0),
-      .PCIN					(48'd0),
-
-      .ACOUT				(),
-      .BCOUT				(),
-      .PCOUT				()
-      );
-
-endmodule
diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v
deleted file mode 100644
index 421e7ba..0000000
--- a/rtl/lowlevel/artix7/mac16_artix7.v
+++ /dev/null
@@ -1,90 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_artix7
-  (
-   input 	  clk, // clock
-   input 	  clr, // clear accumulator (active-high)
-   input 	  ce, // enable clock (active-high)
-   input [15: 0]  a, // operand input
-   input [15: 0]  b, // operand input
-   output [46: 0] s			// sum output
-   );
-
-
-   //
-   // DSP48E1 Slice
-   //
-
-   /* Operation Mode */
-   wire [ 3: 0]   dsp48e1_alumode	= 4'b0000;
-   wire [ 6: 0]   dsp48e1_opmode		= {2'b01, clr, 4'b0101};
-
-   /* Internal Product */
-   wire [47: 0]   p_int;
-
-   dsp48e1_wrapper dsp_adder
-     (
-      .clk			(clk),
-
-      .ce			(ce),
-
-      .carry		(1'b0),
-
-      .alumode		(dsp48e1_alumode),
-      .opmode		(dsp48e1_opmode),
-
-      .a				({{14{1'b0}}, a}),
-      .b				({{ 2{1'b0}}, b}),
-      .c				({48{1'b0}}),
-
-      .p				(p_int)
-      );
-
-   //
-   // Output Mapping
-   //
-   assign s = p_int[46:0];
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v
deleted file mode 100644
index 7377781..0000000
--- a/rtl/lowlevel/artix7/subtractor32_artix7.v
+++ /dev/null
@@ -1,94 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_artix7
-  (
-   input 	  clk,
-   input [31: 0]  a,
-   input [31: 0]  b,
-   output [31: 0] d,
-   input 	  b_in,
-   output 	  b_out
-   );
-
-   //
-   // Lower and higher parts of operand
-   //
-   wire [17: 0]   bl = b[17: 0];
-   wire [13: 0]   bh = b[31:18];
-
-   //
-   // DSP48E1 Slice
-   //
-
-   /* Operation Mode */
-   wire [ 3: 0]   dsp48e1_alumode	= 4'b0011;
-   wire [ 6: 0]   dsp48e1_opmode		= 7'b0110011;
-
-   /* Internal Product */
-   wire [47: 0]   p_int;
-
-   dsp48e1_wrapper dsp_subtractor
-     (
-      .clk			(clk),
-
-      .ce			(1'b1),
-
-      .carry		(b_in),
-
-      .alumode		(dsp48e1_alumode),
-      .opmode		(dsp48e1_opmode),
-
-      .a				({{16{1'b0}}, bh}),
-      .b				(bl),
-      .c				({{16{1'b0}}, a}),
-
-      .p				(p_int)
-      );
-
-   //
-   // Output Mapping
-   //
-   assign d 		= p_int[31: 0];
-   assign b_out	= p_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v
deleted file mode 100644
index c04a14f..0000000
--- a/rtl/lowlevel/ecdsa_lowlevel_settings.v
+++ /dev/null
@@ -1,17 +0,0 @@
-`define USE_VENDOR_PRIMITIVES
-
-`ifdef USE_VENDOR_PRIMITIVES
-
-`define MAC16_PRIMITIVE				mac16_artix7
-`define ADDER32_PRIMITIVE			adder32_artix7
-`define ADDER47_PRIMITIVE			adder47_artix7
-`define SUBTRACTOR32_PRIMITIVE	subtractor32_artix7
-
-`else
-
-`define MAC16_PRIMITIVE				mac16_generic
-`define ADDER32_PRIMITIVE			adder32_generic
-`define ADDER47_PRIMITIVE			adder47_generic
-`define SUBTRACTOR32_PRIMITIVE	subtractor32_generic
-
-`endif
diff --git a/rtl/lowlevel/generic/adder32_generic.v b/rtl/lowlevel/generic/adder32_generic.v
deleted file mode 100644
index eadfb6f..0000000
--- a/rtl/lowlevel/generic/adder32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_generic
-  (
-   input 	  clk, // clock
-   input [31: 0]  a, // operand input
-   input [31: 0]  b, // operand input
-   output [31: 0] s, // sum output
-   input 	  c_in, // carry input
-   output 	  c_out		// carry output
-   );
-
-   //
-   // Sum
-   //
-   reg [32: 0] 	  s_int;
-
-   always @(posedge clk)
-     s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in};
-
-   //
-   // Output
-   //
-   assign s = s_int[31:0];
-   assign c_out = s_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/adder47_generic.v b/rtl/lowlevel/generic/adder47_generic.v
deleted file mode 100644
index 406c175..0000000
--- a/rtl/lowlevel/generic/adder47_generic.v
+++ /dev/null
@@ -1,64 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_generic.v
-// -----------------------------------------------------------------------------
-// Generic 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_generic
-  (
-   input 	  clk, // clock
-   input [46: 0]  a, // operand input
-   input [46: 0]  b, // operand input
-   output [46: 0] s			// sum output
-   );
-
-   //
-   // Sum
-   //
-   reg [46: 0] 	  s_int;
-
-   always @(posedge clk)
-     s_int <= a + b;
-
-   //
-   // Output
-   //
-   assign s = s_int;
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/mac16_generic.v b/rtl/lowlevel/generic/mac16_generic.v
deleted file mode 100644
index 6d120a3..0000000
--- a/rtl/lowlevel/generic/mac16_generic.v
+++ /dev/null
@@ -1,74 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_generic.v
-// -----------------------------------------------------------------------------
-// Generic 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_generic
-  (
-   input 	  clk, // clock
-   input 	  clr, // clear accumulator (active-high)
-   input 	  ce, // enable clock (active-high)
-   input [15: 0]  a, // operand input
-   input [15: 0]  b, // operand input
-   output [46: 0] s			// sum output
-   );
-
-   //
-   // Multiplier
-   //
-   wire [31: 0]   p = {{16{1'b0}}, a} * {{16{1'b0}}, b};
-   wire [46: 0]   p_ext = {{15{1'b0}}, p};
-
-   //
-   // Accumulator
-   //
-   reg [46: 0] 	  s_int;
-
-   always @(posedge clk)
-     //
-     if (ce) s_int <= clr ? p_ext : p_ext + s_int;
-
-   //
-   // Output
-   //
-   assign s = s_int;
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/subtractor32_generic.v b/rtl/lowlevel/generic/subtractor32_generic.v
deleted file mode 100644
index 5137ace..0000000
--- a/rtl/lowlevel/generic/subtractor32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_generic
-  (
-   input 	  clk,
-   input [31: 0]  a,
-   input [31: 0]  b,
-   output [31: 0] d,
-   input 	  b_in,
-   output 	  b_out
-   );
-
-   //
-   // Difference
-   //
-   reg [32: 0] 	  d_int;
-
-   always @(posedge clk)
-     d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in};
-
-   //
-   // Output
-   //
-   assign d = d_int[31:0];
-   assign b_out = d_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v
deleted file mode 100644
index 89dbba1..0000000
--- a/rtl/lowlevel/mac16_wrapper.v
+++ /dev/null
@@ -1,75 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 16-bit multiplier and 48-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_wrapper
-  (
-   input 	  clk, // clock
-   input 	  clr, // clear accumulator (active-high)
-   input 	  ce, // enable clock (active-high)
-   input [15: 0]  a, // operand input
-   input [15: 0]  b, // operand input
-   output [46: 0] s			// sum output
-   );
-
-
-   //
-   // Include Primitive Selector
-   //
-`include "ecdsa_lowlevel_settings.v"
-
-
-   //
-   // Instantiate Vendor/Generic Primitive
-   //
-   `MAC16_PRIMITIVE mac16_inst
-     (
-      .clk(clk),
-      .clr(clr),
-      .ce(ce),
-      .a(a),
-      .b(b),
-      .s(s)
-      );
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v
deleted file mode 100644
index 063e753..0000000
--- a/rtl/lowlevel/subtractor32_wrapper.v
+++ /dev/null
@@ -1,72 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_wrapper
-  (
-   input 	  clk,
-   input [31: 0]  a,
-   input [31: 0]  b,
-   output [31: 0] d,
-   input 	  b_in,
-   output 	  b_out
-   );
-
-   //
-   // Include Primitive Selector
-   //
-`include "ecdsa_lowlevel_settings.v"
-
-
-   //
-   // Instantiate Vendor/Generic Primitive
-   //
-   `SUBTRACTOR32_PRIMITIVE subtractor32_inst
-     (
-      .clk(clk),
-      .a(a),
-      .b(b),
-      .d(d),
-      .b_in(b_in),
-      .b_out(b_out)
-      );
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/microcode/ecdsa_uop.vh b/rtl/microcode/ecdsa_uop.vh
new file mode 100644
index 0000000..cebec26
--- /dev/null
+++ b/rtl/microcode/ecdsa_uop.vh
@@ -0,0 +1,121 @@
+//======================================================================
+//
+// Copyright (c) 2018, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+localparam integer UOP_ADDR_WIDTH    = 9;    // 2 ^ 9 = max 512 instructions
+
+localparam integer UOP_DATA_WIDTH = 6 + 1 + 3 * 6;  // opcode + banks + 3 * operand (2 * src + dst)
+
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_PREPARE                 = 9'd000;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_DOUBLE            = 9'd004;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_ADD               = 9'd025;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_ADD_AT_INFINITY   = 9'd053;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_ADD_SAME_X_SAME_Y = 9'd057;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_ADD_SAME_X        = 9'd061;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_ADD_REGULAR       = 9'd065;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_K0                = 9'd069;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CYCLE_K1                = 9'd073;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CONVERT                 = 9'd077;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CONVERT_AT_INFINITY     = 9'd081;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_CONVERT_REGULAR         = 9'd084;
+localparam [UOP_ADDR_WIDTH-1:0] UOP_OFFSET_INVERT                  = 9'd087;
+
+localparam [5:0] UOP_OPCODE_CMPZ    = 6'b000001;
+localparam [5:0] UOP_OPCODE_COPY    = 6'b000010;
+localparam [5:0] UOP_OPCODE_ADD     = 6'b000100;
+localparam [5:0] UOP_OPCODE_SUB     = 6'b001000;
+localparam [5:0] UOP_OPCODE_MUL     = 6'b010000;
+localparam [5:0] UOP_OPCODE_STOP    = 6'b100000;
+
+localparam UOP_BANKS_LO2HI  = 1'b0;
+localparam UOP_BANKS_HI2LO  = 1'b1;
+localparam UOP_BANKS_DUMMY  = 1'bX;
+
+localparam [5:0] UOP_OPERAND_CONST_ZERO     = 6'd00;
+localparam [5:0] UOP_OPERAND_CONST_ONE      = 6'd01;
+localparam [5:0] UOP_OPERAND_CONST_DELTA    = 6'd02;
+
+localparam [5:0] UOP_OPERAND_CONST_GX       = 6'd03;
+localparam [5:0] UOP_OPERAND_CONST_GY       = 6'd04;
+
+localparam [5:0] UOP_OPERAND_CONST_HX       = 6'd05;
+localparam [5:0] UOP_OPERAND_CONST_HY       = 6'd06;
+
+localparam [5:0] UOP_OPERAND_CYCLE_RX       = 6'd07;
+localparam [5:0] UOP_OPERAND_CYCLE_RY       = 6'd08;
+localparam [5:0] UOP_OPERAND_CYCLE_RZ       = 6'd09;
+
+localparam [5:0] UOP_OPERAND_CYCLE_SX       = 6'd10;
+localparam [5:0] UOP_OPERAND_CYCLE_SY       = 6'd11;
+localparam [5:0] UOP_OPERAND_CYCLE_SZ       = 6'd12;
+
+localparam [5:0] UOP_OPERAND_CYCLE_A        = 6'd13;
+localparam [5:0] UOP_OPERAND_CYCLE_A2       = 6'd14;
+localparam [5:0] UOP_OPERAND_CYCLE_B        = 6'd15;
+localparam [5:0] UOP_OPERAND_CYCLE_C        = 6'd16;
+localparam [5:0] UOP_OPERAND_CYCLE_C2       = 6'd17;
+localparam [5:0] UOP_OPERAND_CYCLE_C2_2     = 6'd18;
+localparam [5:0] UOP_OPERAND_CYCLE_D        = 6'd19;
+localparam [5:0] UOP_OPERAND_CYCLE_E        = 6'd20;
+localparam [5:0] UOP_OPERAND_CYCLE_F        = 6'd21;
+localparam [5:0] UOP_OPERAND_CYCLE_G        = 6'd22;
+localparam [5:0] UOP_OPERAND_CYCLE_H        = 6'd23;
+localparam [5:0] UOP_OPERAND_CYCLE_J        = 6'd24;
+
+localparam [5:0] UOP_OPERAND_CYCLE_Z2       = 6'd25;
+
+localparam [5:0] UOP_OPERAND_CYCLE_T1       = 6'd26;
+localparam [5:0] UOP_OPERAND_CYCLE_T2       = 6'd27;
+localparam [5:0] UOP_OPERAND_CYCLE_T3       = 6'd28;
+localparam [5:0] UOP_OPERAND_CYCLE_T4       = 6'd29;
+
+localparam [5:0] UOP_OPERAND_INVERT_R1      = 6'd30;
+localparam [5:0] UOP_OPERAND_INVERT_R2      = 6'd31;
+
+localparam [5:0] UOP_OPERAND_INVERT_X2      = 6'd32;
+localparam [5:0] UOP_OPERAND_INVERT_X3      = 6'd33;
+localparam [5:0] UOP_OPERAND_INVERT_X6      = 6'd34;
+localparam [5:0] UOP_OPERAND_INVERT_X12     = 6'd35;
+localparam [5:0] UOP_OPERAND_INVERT_X15     = 6'd36;
+localparam [5:0] UOP_OPERAND_INVERT_X30     = 6'd37;
+localparam [5:0] UOP_OPERAND_INVERT_X32     = 6'd38;
+localparam [5:0] UOP_OPERAND_INVERT_X60     = 6'd39;
+localparam [5:0] UOP_OPERAND_INVERT_X120    = 6'd40;
+
+localparam [5:0] UOP_OPERAND_INVERT_A2      = 6'd41;
+localparam [5:0] UOP_OPERAND_INVERT_A3      = 6'd42;
+
+localparam [5:0] UOP_OPERAND_DONTCARE       = 6'dX;
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/ecdsa256_modular_multiplier.v b/rtl/modular/ecdsa256_modular_multiplier.v
new file mode 100644
index 0000000..7762b78
--- /dev/null
+++ b/rtl/modular/ecdsa256_modular_multiplier.v
@@ -0,0 +1,404 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa256_modular_multiplier.v
+// -----------------------------------------------------------------------------
+// Modular multiplier for P-256 prime.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module ecdsa256_modular_multiplier
+  (
+   clk, rst_n,
+   ena, rdy,
+   a_addr, b_addr, p_addr, p_wren,
+   a_din, b_din, p_dout
+   );
+
+
+    //
+    // Settings
+    //
+    `include "cryptech_primitive_switch.vh"
+    
+
+   //
+   // Constants
+   //
+   localparam	OPERAND_NUM_WORDS				= 8;
+   localparam	WORD_COUNTER_WIDTH				= 3;
+
+
+   //
+   // Handy Numbers
+   //
+   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
+   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
+
+
+   //
+   // Handy Functions
+   //
+   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_NEXT_OR_ZERO;
+      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+				   WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+      end
+   endfunction
+
+   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_PREVIOUS_OR_LAST;
+      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+				       WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+      end
+   endfunction
+
+
+   //
+   // Ports
+   //
+   input	wire					clk;	// system clock
+   input	wire					rst_n;	// active-low async reset
+
+   input	wire 					ena;	// enable input
+   output	wire 					rdy;	// ready output
+
+   output	wire [WORD_COUNTER_WIDTH-1:0] 		a_addr;	// index of current A word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 		b_addr;	// index of current B word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 		p_addr;	// index of current P word
+   output	wire 					p_wren;	// store current P word now
+
+   input	wire [                  31:0] 		a_din;	// A
+   input	wire [                  31:0] 		b_din;	// B
+   output	wire [                  31:0] 		p_dout;	// P = A * B mod N
+
+
+   //
+   // Word Indices
+   //
+   reg [WORD_COUNTER_WIDTH-1:0] 			index_a;
+   reg [WORD_COUNTER_WIDTH-1:0] 			index_b;
+
+   /* map registers to output ports */
+   assign a_addr	= index_a;
+   assign b_addr	= index_b;
+
+   //
+   // FSM
+   //
+   localparam	FSM_SHREG_WIDTH	= (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+   reg [FSM_SHREG_WIDTH-1:0] 				fsm_shreg;
+
+   assign rdy = fsm_shreg[0];
+
+   wire [1 * OPERAND_NUM_WORDS-1:0] 			fsm_shreg_inc_index_a	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+   wire [1 * OPERAND_NUM_WORDS-1:0] 			fsm_shreg_store_word_a	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+   wire [2 * OPERAND_NUM_WORDS-1:0] 			fsm_shreg_inc_index_b	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+   wire [2 * OPERAND_NUM_WORDS-2:0] 			fsm_shreg_store_si_msb	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 			fsm_shreg_store_si_lsb	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+   wire [2 * OPERAND_NUM_WORDS-2:0] 			fsm_shreg_shift_si	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 			fsm_shreg_mask_cw1_sum	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+   wire [2 * OPERAND_NUM_WORDS-1:0] 			fsm_shreg_store_c_word	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 			fsm_shreg_reduce_start	= fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 			fsm_shreg_reduce_stop	= fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+   wire 						inc_index_a		= |fsm_shreg_inc_index_a;
+   wire 						store_word_a		= |fsm_shreg_store_word_a;
+   wire 						inc_index_b		= |fsm_shreg_inc_index_b;
+   wire 						clear_mac_ab		= |fsm_shreg_inc_index_b;
+   wire 						shift_wide_a		= |fsm_shreg_inc_index_b;
+   wire 						enable_mac_ab		= |fsm_shreg_inc_index_b;
+   wire 						store_si_msb		= |fsm_shreg_store_si_msb;
+   wire 						store_si_lsb		=  fsm_shreg_store_si_lsb;
+   wire 						shift_si		= |fsm_shreg_shift_si;
+   wire 						mask_cw1_sum		=  fsm_shreg_mask_cw1_sum;
+   wire 						store_c_word		= |fsm_shreg_store_c_word;
+   wire 						reduce_start		=  fsm_shreg_reduce_start;
+   wire 						reduce_stop		=  fsm_shreg_reduce_stop;
+
+
+   //
+   // FSM Logic
+   //
+   wire 						reduce_done;
+
+   always @(posedge clk or negedge rst_n)
+     //
+     if (rst_n == 1'b0)
+       //
+       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+   //
+     else begin
+	//
+	if (rdy)
+	  fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+	//
+	else if (!reduce_stop || reduce_done)
+	  fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+	//
+     end
+
+
+   //
+   // Word Index Increment Logic
+   //
+   reg	index_b_ff;
+
+   always @(posedge clk)
+     //
+     if (inc_index_b) index_b_ff <= ~index_b_ff;
+     else index_b_ff <= 1'b0;
+
+   always @(posedge clk)
+     //
+     if (rdy) begin
+	//
+	index_a		<= WORD_INDEX_ZERO;
+	index_b		<= WORD_INDEX_LAST;
+	//
+     end else begin
+	//
+	if (inc_index_a)		index_a	<= WORD_INDEX_NEXT_OR_ZERO(index_a);
+	if (inc_index_b && !index_b_ff)	index_b	<= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+	//
+     end
+
+
+   //
+   // Wide Operand Buffer
+   //
+   reg	[255:0]	buf_a_wide;
+
+   always @(posedge clk)
+     //
+     if (store_word_a)
+       buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+     else if (shift_wide_a)
+       buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+   //
+   // Multiplier Array
+   //
+   wire 	mac_inhibit;	// control signal to pause all accumulators
+
+   wire [46: 0] mac[0:15];	// outputs of all accumulators
+   reg [15: 0] 	mac_clear;	// individual per-accumulator clear flag
+
+   assign mac_inhibit = ~enable_mac_ab;
+
+   always @(posedge clk)
+     //
+     if (!clear_mac_ab)
+       mac_clear <= {16{1'b1}};
+     else begin
+
+	if (mac_clear == {16{1'b1}})
+	  mac_clear <= {{14{1'b0}}, 1'b1, {1'b0}};
+	else
+	  mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+     end
+
+     //
+     // Array of parallel multipliers
+     //
+     genvar i;
+     generate for (i=0; i<16; i=i+1)
+       begin : gen_mac_array
+	  //
+	  `CRYPTECH_PRIMITIVE_MAC16 mac16_inst
+		     (
+		      .clk		(clk),
+		      .ce		(~mac_inhibit),
+
+		      .clr		(mac_clear[i]),
+
+		      .a		(buf_a_wide[16*i+:16]),
+		      .b		(index_b_ff ? b_din[15:0] : b_din[31:16]),
+		      .s		(mac[i])
+		      );
+	  //
+       end
+     endgenerate
+
+     //
+     // Intermediate Words
+     //
+     reg	[47*(2*OPERAND_NUM_WORDS-1)-1:0]	si_msb;
+     reg	[47*(2*OPERAND_NUM_WORDS-0)-1:0]	si_lsb;
+
+
+     wire	[47*(2*OPERAND_NUM_WORDS-1)-1:0]	si_msb_new;
+     wire	[47*(2*OPERAND_NUM_WORDS-0)-1:0]	si_lsb_new;
+
+     generate for (i=0; i<16; i=i+1)
+       begin : gen_si_lsb_new
+	  assign si_lsb_new[47*i+:47] = mac[15-i];
+       end
+     endgenerate
+
+     generate for (i=1; i<16; i=i+1)
+       begin : gen_si_msb_new
+	  assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+       end
+     endgenerate
+
+     always @(posedge clk) begin
+	//
+	if (shift_si) begin
+	   si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+	   si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+	end else begin
+
+	   if (store_si_msb)
+	     si_msb <= si_msb_new;
+
+	   if (store_si_lsb)
+	     si_lsb <= si_lsb_new;
+	end
+
+     end
+
+
+     //
+     // Accumulators
+     //
+     wire	[46: 0]	add47_cw0_s;
+     wire	[46: 0]	add47_cw1_s;
+
+
+     //
+     // cw0, b, cw1, b
+     //
+     reg	[30: 0]	si_prev_dly;
+     reg	[15: 0]	si_next_dly;
+
+     always @(posedge clk)
+       //
+       if (shift_si)
+	 si_prev_dly <= si_lsb[93:63];
+       else
+	 si_prev_dly <= {31{1'b0}};
+
+       always @(posedge clk)
+	 //
+	 si_next_dly <= si_lsb[62:47];
+
+       wire	[46: 0]	add47_cw0_a = si_lsb[46:0];
+       wire	[46: 0]	add47_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+       wire	[46: 0]	add47_cw1_a = add47_cw0_s;
+       wire	[46: 0]	add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
+
+       `CRYPTECH_PRIMITIVE_ADD47 add47_cw0_inst
+	 (
+	  .clk	(clk),
+	  .a		(add47_cw0_a),
+	  .b		(add47_cw0_b),
+	  .s		(add47_cw0_s)
+	  );
+
+       `CRYPTECH_PRIMITIVE_ADD47 add47_cw1_inst
+	 (
+	  .clk	(clk),
+	  .a		(add47_cw1_a),
+	  .b		(add47_cw1_b),
+	  .s		(add47_cw1_s)
+	  );
+
+
+
+       //
+       // Full-Size Product
+       //
+       reg	[WORD_COUNTER_WIDTH:0]	bram_c_addr;
+
+       wire	[WORD_COUNTER_WIDTH:0]	reduce_c_addr;
+       wire	[                31:0]	reduce_c_word;
+
+       always @(posedge clk)
+	 //
+	 if (store_c_word)
+	   bram_c_addr <= bram_c_addr + 1'b1;
+	 else
+	   bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+	 bram_1rw_1ro_readfirst #
+	   (
+	    .MEM_WIDTH		(32),
+	    .MEM_ADDR_BITS	(WORD_COUNTER_WIDTH + 1)
+	    )
+	 bram_c_inst
+	   (
+	    .clk		(clk),
+
+	    .a_addr		(bram_c_addr),
+	    .a_wr		(store_c_word),
+	    .a_in		(add47_cw1_s[31:0]),
+	    .a_out		(),
+
+	    .b_addr		(reduce_c_addr),
+	    .b_out		(reduce_c_word)
+	    );
+
+
+	 //
+	 // Reduction Stage
+	 //
+	 ecdsa256_modular_reductor mod_redc_inst
+	   (
+	    .clk		(clk),
+	    .rst_n		(rst_n),
+
+	    .ena		(reduce_start),
+	    .rdy		(reduce_done),
+
+	    .x_addr		(reduce_c_addr),
+	    .p_addr		(p_addr),
+	    .p_wren		(p_wren),
+
+	    .x_din		(reduce_c_word),
+	    .p_dout		(p_dout)
+	    );
+
+
+	 endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/ecdsa256_modular_reductor.v b/rtl/modular/ecdsa256_modular_reductor.v
new file mode 100644
index 0000000..8fa37f5
--- /dev/null
+++ b/rtl/modular/ecdsa256_modular_reductor.v
@@ -0,0 +1,702 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa256_modular_reductor.v
+// -----------------------------------------------------------------------------
+// Modular reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module ecdsa256_modular_reductor
+  (
+   clk, rst_n,
+   ena, rdy,
+   x_addr, p_addr, p_wren,
+   x_din, p_dout
+   );
+
+   //
+   // Constants
+   //
+   localparam	OPERAND_NUM_WORDS	= 8;
+   localparam	WORD_COUNTER_WIDTH	= 3;
+
+
+   //
+   // Handy Numbers
+   //
+   localparam	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_ZERO	= 0;
+   localparam	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_LAST	= 2 * OPERAND_NUM_WORDS - 1;
+
+
+   //
+   // Handy Functions
+   //
+   function	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_PREVIOUS_OR_LAST;
+      input	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+				       WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+      end
+   endfunction
+
+
+   //
+   // Ports
+   //
+   input		wire				clk;	// system clock
+   input		wire				rst_n;	// active-low async reset
+
+   input		wire				ena;	// enable input
+   output	wire 					rdy;	// ready output
+
+   output	wire [WORD_COUNTER_WIDTH-0:0] 		x_addr;	// index of current X word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 		p_addr;	// index of current P word
+   output	wire 					p_wren;	// store current P word now
+
+   input		wire [                  31:0] 	x_din;	// X
+   output	wire [                  31:0] 		p_dout;	// P = X mod N
+
+
+   //
+   // Word Indices
+   //
+   reg [WORD_COUNTER_WIDTH:0] 				index_x;
+
+
+   /* map registers to output ports */
+   assign x_addr	= index_x;
+
+
+   //
+   // FSM
+   //
+   localparam	FSM_SHREG_WIDTH	= (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+   reg [FSM_SHREG_WIDTH-1:0] 				fsm_shreg;
+
+   assign rdy = fsm_shreg[0];
+
+   wire [2 * OPERAND_NUM_WORDS - 1:0] 			fsm_shreg_inc_index_x	= fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+   wire [2 * OPERAND_NUM_WORDS - 1:0] 			fsm_shreg_store_word_z	= fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+   wire [2 *                 5 - 1:0] 			fsm_shreg_reduce_stages	= fsm_shreg[                                        1 +: 2 *                 5];
+
+   wire [5-1:0] 					fsm_shreg_reduce_stage_start;
+   wire [5-1:0] 					fsm_shreg_reduce_stage_stop;
+
+   genvar 						s;
+   generate for (s=0; s<5; s=s+1)
+     begin : gen_fsm_shreg_reduce_stages
+	assign fsm_shreg_reduce_stage_start[5 - (s + 1)]	= fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+	assign fsm_shreg_reduce_stage_stop[5 - (s + 1)]		= fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+     end
+   endgenerate
+
+   wire inc_index_x	= |fsm_shreg_inc_index_x;
+   wire store_word_z	= |fsm_shreg_store_word_z;
+   wire reduce_start	= |fsm_shreg_reduce_stage_start;
+   wire reduce_stop	= |fsm_shreg_reduce_stage_stop;
+   wire store_p		=  fsm_shreg_reduce_stage_stop[0];
+
+
+   wire	reduce_adder0_done;
+   wire	reduce_adder1_done;
+   wire	reduce_subtractor_done;
+
+   wire	reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+   always @(posedge clk or negedge rst_n)
+     //
+     if (rst_n == 1'b0)
+       //
+       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+   //
+     else begin
+	//
+	if (rdy)
+	  //
+	  fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+	//
+	else if (!reduce_stop || reduce_done_all)
+	  //
+	  fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+	//
+     end
+
+
+   //
+   // Word Index Increment Logic
+   //
+   always @(posedge clk)
+     //
+     if (rdy)
+       //
+       index_x <= WORD_INDEX_LAST;
+   //
+     else if (inc_index_x)
+       //
+       index_x	<= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+   //
+   // Look-up Table
+   //
+
+   //
+   // Take a look at the corresponding C model for more information
+   // on how exactly the math behind reduction works. The first step
+   // is to assemble nine 256-bit values ("z-words") from 32-bit parts
+   // of the full 512-bit product ("c-word"). The problem with z5 is
+   // that it contains c13 two times. This implementation scans from
+   // c15 to c0 and writes current part of c-word into corresponding
+   // parts of z-words. Since those 32-bit parts are stored in block
+   // memories, one source word can only be written to one location in
+   // every z-word at a time. The trick is to delay c13 and then write
+   // the delayed value at the corresponding location in z5 instead of
+   // the next c12. "z_save" flag is used to indicate that the current
+   // word should be delayed and written once again during the next cycle.
+   //
+
+   reg	[9*WORD_COUNTER_WIDTH-1:0]	z_addr;	//
+   reg [9                   -1:0] 	z_wren;	//
+   reg [9                   -1:0] 	z_mask;	// mask input to store zero word
+   reg [9                   -1:0] 	z_save;	// save previous word once again
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                     s9     s8     s7     s6     s5     s4     s3     s2     s1
+	 //                     ||     ||     ||     ||     ||     ||     ||     ||     ||
+	 4'd00:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd00};
+	 4'd01:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd01};
+	 4'd02:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd02};
+	 4'd03:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd03};
+	 4'd04:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd04};
+	 4'd05:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd05};
+	 4'd06:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd06};
+	 4'd07:	z_addr <= {3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'dx,  3'd07};
+	 4'd08:	z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dx};
+	 4'd09:	z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dx};
+	 4'd10:	z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dx};
+	 4'd11:	z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dx};
+	 4'd12:	z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dx};
+	 4'd13:	z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dx};
+	 4'd14:	z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dx};
+	 4'd15:	z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dx};
+	 //
+         default:	z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+	 //
+       endcase
+
+   always @(posedge clk)
+     //
+     case (index_x)
+       //
+       //                     9     8     7     6     5     4     3     2     1
+       //                     |     |     |     |     |     |     |     |     |
+       4'd00:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd01:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd02:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd03:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd04:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd05:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd06:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd07:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       4'd08:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd09:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd10:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd11:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd12:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd13:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd14:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       4'd15:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       //
+       default:	z_wren <= {9{1'b0}};
+       //
+     endcase
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                     9     8     7     6     5     4     3     2     1
+	 //                     |     |     |     |     |     |     |     |     |
+	 4'd00:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd01:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd02:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd03:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd04:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd05:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd06:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd07:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd08:	z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+	 4'd09:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+	 4'd10:	z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+	 4'd11:	z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+	 4'd12:	z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+	 4'd13:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+	 4'd14:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd15:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 //
+         default:	z_mask <= {9{1'bX}};
+	 //
+       endcase
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                     9     8     7     6     5     4     3     2     1
+	 //                     |     |     |     |     |     |     |     |     |
+	 4'd00:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd01:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd02:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd03:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd04:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd05:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd06:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd07:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd08:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd09:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd10:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd11:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd12:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd13:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd14:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 4'd15:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 //
+         default:	z_save <= {9{1'bX}};
+	 //
+       endcase
+
+
+    //
+    // Modulus
+    //
+    wire [WORD_COUNTER_WIDTH-1:0] n_addr;
+    wire [                32-1:0] n_dout; 
+    ecdsa256_modulus_distmem p256_q_rom
+    (
+    	.clk    (clk),
+        .b_addr (n_addr),
+        .b_out  (n_dout)
+    );    
+
+   //
+   // Intermediate Numbers
+   //
+   reg [WORD_COUNTER_WIDTH-1:0] 	reduce_z_addr[1:9];
+   wire [                32-1:0] 	reduce_z_dout[1:9];
+
+   reg [31: 0] 				x_din_dly;
+   always @(posedge clk)
+     //
+     x_din_dly <= x_din;
+
+
+   genvar 				z;
+   generate for (z=1; z<=9; z=z+1)
+     //
+     begin : gen_z_bram
+	//
+	bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+	bram_c_inst
+		   (
+		    .clk		(clk),
+
+		    .a_addr	(z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+		    .a_wr		(z_wren[z-1] & store_word_z),
+		    .a_in		(z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+		    .a_out	(),
+
+		    .b_addr	(reduce_z_addr[z]),
+		    .b_out	(reduce_z_dout[z])
+		    );
+	//
+     end
+      //
+   endgenerate
+
+
+
+
+   wire	[                32-1:0]	bram_sum0_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_sum0_wr_addr;
+   wire 				bram_sum0_wr_wren;
+
+   wire [                32-1:0] 	bram_sum1_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_sum1_wr_addr;
+   wire 				bram_sum1_wr_wren;
+
+   wire [                32-1:0] 	bram_diff_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_diff_wr_addr;
+   wire 				bram_diff_wr_wren;
+
+   wire [                32-1:0] 	bram_sum0_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_sum0_rd_addr;
+
+   wire [                32-1:0] 	bram_sum1_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_sum1_rd_addr;
+
+   wire [                32-1:0] 	bram_diff_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_diff_rd_addr;
+
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_sum0_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_sum0_wr_addr),
+      .a_wr		(bram_sum0_wr_wren),
+      .a_in		(bram_sum0_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_sum0_rd_addr),
+      .b_out	(bram_sum0_rd_dout)
+      );
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_sum1_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_sum1_wr_addr),
+      .a_wr		(bram_sum1_wr_wren),
+      .a_in		(bram_sum1_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_sum1_rd_addr),
+      .b_out	(bram_sum1_rd_dout)
+      );
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_diff_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_diff_wr_addr),
+      .a_wr		(bram_diff_wr_wren),
+      .a_in		(bram_diff_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_diff_rd_addr),
+      .b_out	(bram_diff_rd_dout)
+      );
+
+
+   wire [WORD_COUNTER_WIDTH-1:0] 	adder0_ab_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] 	adder1_ab_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] 	subtractor_ab_addr;
+
+   reg [                32-1:0] 	adder0_a_din;
+   reg [                32-1:0] 	adder0_b_din;
+
+   reg [                32-1:0] 	adder1_a_din;
+   reg [                32-1:0] 	adder1_b_din;
+
+   reg [                32-1:0] 	subtractor_a_din;
+   reg [                32-1:0] 	subtractor_b_din;
+
+   // n_addr - only 1 output, because all modules are in sync
+
+   modular_adder #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   adder_inst0
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_adder0_done),
+
+      .ab_addr		(adder0_ab_addr),
+      .n_addr		(),
+      .s_addr		(bram_sum0_wr_addr),
+      .s_wren		(bram_sum0_wr_wren),
+
+      .a_din		(adder0_a_din),
+      .b_din		(adder0_b_din),
+      .n_din		(n_dout),
+      .s_dout		(bram_sum0_wr_din)
+      );
+
+   modular_adder #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   adder_inst1
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_adder1_done),
+
+      .ab_addr		(adder1_ab_addr),
+      .n_addr		(),
+      .s_addr		(bram_sum1_wr_addr),
+      .s_wren		(bram_sum1_wr_wren),
+
+      .a_din		(adder1_a_din),
+      .b_din		(adder1_b_din),
+      .n_din		(n_dout),
+      .s_dout		(bram_sum1_wr_din)
+      );
+
+   modular_subtractor #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   subtractor_inst
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_subtractor_done),
+
+      .ab_addr		(subtractor_ab_addr),
+      .n_addr		(n_addr),
+      .d_addr		(bram_diff_wr_addr),
+      .d_wren		(bram_diff_wr_wren),
+
+      .a_din		(subtractor_a_din),
+      .b_din		(subtractor_b_din),
+      .n_din		(n_dout),
+      .d_dout		(bram_diff_wr_din)
+      );
+
+
+   //
+   // Address (Operand) Selector
+   //
+   always @(*)
+     //
+     case (fsm_shreg_reduce_stage_stop)
+       //
+       5'b10000: begin
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= adder0_ab_addr;
+	  reduce_z_addr[3]	= adder1_ab_addr;
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= subtractor_ab_addr;
+	  reduce_z_addr[7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+       end
+       //
+       5'b01000: begin
+	  reduce_z_addr[1]	= adder0_ab_addr;
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= adder1_ab_addr;
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[7]	= subtractor_ab_addr;
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder1_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00100: begin
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= adder0_ab_addr;
+	  reduce_z_addr[6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[8]	= subtractor_ab_addr;
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder1_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00010: begin
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= subtractor_ab_addr;
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder0_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00001: begin
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = adder0_ab_addr;
+       end
+       //
+       default: begin
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+       end
+       //
+     endcase
+
+
+   //
+   // adder 0
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder0_a_din = reduce_z_dout[2];
+	5'b01000:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00100:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00010:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00001:	adder0_a_din = bram_sum0_rd_dout;
+	default:		adder0_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder0_b_din = reduce_z_dout[2];
+	5'b01000:	adder0_b_din = reduce_z_dout[1];
+	5'b00100:	adder0_b_din = reduce_z_dout[5];
+	5'b00010:	adder0_b_din = bram_sum1_rd_dout;
+	5'b00001:	adder0_b_din = bram_diff_rd_dout;
+	default:		adder0_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+   //
+   // adder 1
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder1_a_din = reduce_z_dout[3];
+	5'b01000:	adder1_a_din = bram_sum1_rd_dout;
+	5'b00100:	adder1_a_din = bram_sum1_rd_dout;
+	5'b00010:	adder1_a_din = {32{1'bX}};
+	5'b00001:	adder1_a_din = {32{1'bX}};
+	default:		adder1_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder1_b_din = reduce_z_dout[3];
+	5'b01000:	adder1_b_din = reduce_z_dout[4];
+	5'b00100:	adder1_b_din = {32{1'b0}};
+	5'b00010:	adder1_b_din = {32{1'bX}};
+	5'b00001:	adder1_b_din = {32{1'bX}};
+	default:		adder1_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+
+   //
+   // subtractor
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	subtractor_a_din = {32{1'b0}};
+	5'b01000:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00100:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00010:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00001:	subtractor_a_din = {32{1'bX}};
+	default:		subtractor_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	subtractor_b_din = reduce_z_dout[6];
+	5'b01000:	subtractor_b_din = reduce_z_dout[7];
+	5'b00100:	subtractor_b_din = reduce_z_dout[8];
+	5'b00010:	subtractor_b_din = reduce_z_dout[9];
+	5'b00001:	subtractor_b_din = {32{1'bX}};
+	default:		subtractor_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+
+   //
+   // Address Mapping
+   //
+   assign p_addr	= bram_sum0_wr_addr;
+   assign p_wren	= bram_sum0_wr_wren & store_p;
+   assign p_dout	= bram_sum0_wr_din;
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/modular/ecdsa256_modulus_distmem.v
similarity index 62%
copy from rtl/util/bram_1rw_1ro_readfirst.v
copy to rtl/modular/ecdsa256_modulus_distmem.v
index db62726..6c576d0 100644
--- a/rtl/util/bram_1rw_1ro_readfirst.v
+++ b/rtl/modular/ecdsa256_modulus_distmem.v
@@ -1,6 +1,6 @@
 //======================================================================
 //
-// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+// Copyright (c) 2016, 2018 NORDUnet A/S All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -30,72 +30,38 @@
 //
 //======================================================================
 
-`timescale 1ns / 1ps
-
-module bram_1rw_1ro_readfirst
-  #(parameter MEM_WIDTH            = 32,
-    parameter MEM_ADDR_BITS        = 8)
-   (
-    input wire 			   clk,
-
-    input wire [MEM_ADDR_BITS-1:0] a_addr,
-    input wire 			   a_wr,
-    input wire [MEM_WIDTH-1:0] 	   a_in,
-    output wire [MEM_WIDTH-1:0]    a_out,
-
-    input wire [MEM_ADDR_BITS-1:0] b_addr,
-    output wire [MEM_WIDTH-1:0]    b_out
-    );
-
-
-   //
-   // BRAM
-   //
-   (* RAM_STYLE="BLOCK" *)
-   reg [MEM_WIDTH-1:0] 		   bram[0:(2**MEM_ADDR_BITS)-1];
-
-
-   //
-   // Initialization
-   //
-   /**
-    integer c;
-    initial begin
-    for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
-    bram[c] = {MEM_WIDTH{1'b0}};
-	end
-    **/
-
+module ecdsa256_modulus_distmem
+  (
+   input wire 		clk,
+   input wire [ 3-1:0] 	b_addr,
+   output wire [32-1:0] b_out
+   );
 
 
    //
    // Output Registers
    //
-   reg [MEM_WIDTH-1:0] 		   bram_reg_a;
-   reg [MEM_WIDTH-1:0] 		   bram_reg_b;
+   (* ram_style="distributed" *)
+   reg [31:0] 		bram_reg_b;
 
-   assign a_out = bram_reg_a;
    assign b_out = bram_reg_b;
 
 
-   //
-   // Read-Write Port A
-   //
-   always @(posedge clk) begin
-      //
-      bram_reg_a <= bram[a_addr];
-      //
-      if (a_wr) bram[a_addr] <= a_in;
-      //
-   end
-
-
    //
    // Read-Only Port B
    //
    always @(posedge clk)
      //
-     bram_reg_b <= bram[b_addr];
+     case (b_addr)
+       3'b000:	bram_reg_b <= 32'hffffffff;
+       3'b001:	bram_reg_b <= 32'hffffffff;
+       3'b010:	bram_reg_b <= 32'hffffffff;
+       3'b011:	bram_reg_b <= 32'h00000000;
+       3'b100:	bram_reg_b <= 32'h00000000;
+       3'b101:	bram_reg_b <= 32'h00000000;
+       3'b110:	bram_reg_b <= 32'h00000001;
+       3'b111:	bram_reg_b <= 32'hffffffff;
+     endcase
 
 
 endmodule
diff --git a/rtl/modular/ecdsa384_modular_multiplier.v b/rtl/modular/ecdsa384_modular_multiplier.v
new file mode 100644
index 0000000..a52fc86
--- /dev/null
+++ b/rtl/modular/ecdsa384_modular_multiplier.v
@@ -0,0 +1,404 @@
+//------------------------------------------------------------------------------
+//
+// ecdsa384_modular_multiplier.v
+// -----------------------------------------------------------------------------
+// Modular multiplier for P-384 prime.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module ecdsa384_modular_multiplier
+  (
+   clk, rst_n,
+   ena, rdy,
+   a_addr, b_addr, p_addr, p_wren,
+   a_din, b_din, p_dout
+   );
+
+
+    //
+    // Settings
+    //
+    `include "cryptech_primitive_switch.vh"
+    
+
+   //
+   // Constants
+   //
+   localparam	OPERAND_NUM_WORDS				= 12;
+   localparam	WORD_COUNTER_WIDTH				=  4;
+
+
+   //
+   // Handy Numbers
+   //
+   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
+   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
+
+
+   //
+   // Handy Functions
+   //
+   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_NEXT_OR_ZERO;
+      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+				   WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+      end
+   endfunction
+
+   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_PREVIOUS_OR_LAST;
+      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+				       WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+      end
+   endfunction
+
+
+   //
+   // Ports
+   //
+   input		wire										clk;		// system clock
+   input		wire										rst_n;	// active-low async reset
+
+   input		wire										ena;		// enable input
+   output	wire 											rdy;		// ready output
+
+   output	wire [WORD_COUNTER_WIDTH-1:0] 								a_addr;	// index of current A word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 								b_addr;	// index of current B word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 								p_addr;	// index of current P word
+   output	wire 											p_wren;	// store current P word now
+
+   input		wire [                  31:0] 							a_din;	// A
+   input		wire [                  31:0] 							b_din;	// B
+   output	wire [                  31:0] 								p_dout;	// P = A * B mod N
+
+
+   //
+   // Word Indices
+   //
+   reg [WORD_COUNTER_WIDTH-1:0] 									index_a;
+   reg [WORD_COUNTER_WIDTH-1:0] 									index_b;
+
+   /* map registers to output ports */
+   assign a_addr	= index_a;
+   assign b_addr	= index_b;
+
+   //
+   // FSM
+   //
+   localparam	FSM_SHREG_WIDTH	= (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+   reg [FSM_SHREG_WIDTH-1:0] 										fsm_shreg;
+
+   assign rdy = fsm_shreg[0];
+
+   wire [1 * OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_a	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+   wire [1 * OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_word_a	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+   wire [2 * OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_b	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+   wire [2 * OPERAND_NUM_WORDS-2:0] 									fsm_shreg_store_si_msb	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 									fsm_shreg_store_si_lsb	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+   wire [2 * OPERAND_NUM_WORDS-2:0] 									fsm_shreg_shift_si		= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 									fsm_shreg_mask_cw1_sum	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+   wire [2 * OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_c_word	= fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 									fsm_shreg_reduce_start	= fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+   wire [0 * OPERAND_NUM_WORDS-0:0] 									fsm_shreg_reduce_stop	= fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+   wire 												inc_index_a		= |fsm_shreg_inc_index_a;
+   wire 												store_word_a		= |fsm_shreg_store_word_a;
+   wire 												inc_index_b		= |fsm_shreg_inc_index_b;
+   wire 												clear_mac_ab		= |fsm_shreg_inc_index_b;
+   wire 												shift_wide_a		= |fsm_shreg_inc_index_b;
+   wire 												enable_mac_ab	= |fsm_shreg_inc_index_b;
+   wire 												store_si_msb		= |fsm_shreg_store_si_msb;
+   wire 												store_si_lsb		=  fsm_shreg_store_si_lsb;
+   wire 												shift_si			= |fsm_shreg_shift_si;
+   wire 												mask_cw1_sum		=  fsm_shreg_mask_cw1_sum;
+   wire 												store_c_word		= |fsm_shreg_store_c_word;
+   wire 												reduce_start		=  fsm_shreg_reduce_start;
+   wire 												reduce_stop		=  fsm_shreg_reduce_stop;
+
+
+   //
+   // FSM Logic
+   //
+   wire 												reduce_done;
+
+   always @(posedge clk or negedge rst_n)
+     //
+     if (rst_n == 1'b0)
+       //
+       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+   //
+     else begin
+	//
+	if (rdy)
+	  fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+	//
+	else if (!reduce_stop || reduce_done)
+	  fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+	//
+     end
+
+
+   //
+   // Word Index Increment Logic
+   //
+   reg	index_b_ff;
+
+   always @(posedge clk)
+     //
+     if (inc_index_b) index_b_ff <= ~index_b_ff;
+     else index_b_ff <= 1'b0;
+
+   always @(posedge clk)
+     //
+     if (rdy) begin
+	//
+	index_a		<= WORD_INDEX_ZERO;
+	index_b		<= WORD_INDEX_LAST;
+	//
+     end else begin
+	//
+	if (inc_index_a)						index_a	<= WORD_INDEX_NEXT_OR_ZERO(index_a);
+	if (inc_index_b && !index_b_ff)	index_b	<= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+	//
+     end
+
+
+   //
+   // Wide Operand Buffer
+   //
+   reg	[383:0]	buf_a_wide;
+
+   always @(posedge clk)
+     //
+     if (store_word_a)
+       buf_a_wide <= {buf_a_wide[16 +: 384 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[384 - 2 * 16 +: 16]};
+     else if (shift_wide_a)
+       buf_a_wide <= {buf_a_wide[384-(16+1):0], buf_a_wide[384-16+:16]};
+
+
+   //
+   // Multiplier Array
+   //
+   wire 	mac_inhibit;			// control signal to pause all accumulators
+
+   wire [46: 0] mac[0:23];	// outputs of all accumulators
+   reg [23: 0] 	mac_clear;	// individual per-accumulator clear flag
+
+   assign mac_inhibit = ~enable_mac_ab;
+
+   always @(posedge clk)
+     //
+     if (!clear_mac_ab)
+       mac_clear <= {24{1'b1}};
+     else begin
+
+	if (mac_clear == {24{1'b1}})
+	  mac_clear <= {{22{1'b0}}, 1'b1, 1'b0};
+	else
+	  mac_clear <= (mac_clear[23] == 1'b0) ? {mac_clear[22:0], 1'b0} : {24{1'b1}};
+
+
+     end
+
+     //
+     // Array of parallel multipliers
+     //
+     genvar i;
+     generate for (i=0; i<24; i=i+1)
+       begin : gen_mac_array
+	  //
+	  `CRYPTECH_PRIMITIVE_MAC16 mac16_inst
+		     (
+		      .clk		(clk),
+		      .ce		(~mac_inhibit),
+
+		      .clr		(mac_clear[i]),
+
+		      .a			(buf_a_wide[16*i+:16]),
+		      .b			(index_b_ff ? b_din[15:0] : b_din[31:16]),
+		      .s			(mac[i])
+		      );
+	  //
+       end
+     endgenerate
+
+     //
+     // Intermediate Words
+     //
+     reg	[47*(2*OPERAND_NUM_WORDS-1)-1:0]	si_msb;
+     reg	[47*(2*OPERAND_NUM_WORDS-0)-1:0]	si_lsb;
+
+
+     wire	[47*(2*OPERAND_NUM_WORDS-1)-1:0]	si_msb_new;
+     wire	[47*(2*OPERAND_NUM_WORDS-0)-1:0]	si_lsb_new;
+
+     generate for (i=0; i<24; i=i+1)
+       begin : gen_si_lsb_new
+	  assign si_lsb_new[47*i+:47] = mac[23-i];
+       end
+     endgenerate
+
+     generate for (i=1; i<24; i=i+1)
+       begin : gen_si_msb_new
+	  assign si_msb_new[47*(23-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(23-i)+:47];
+       end
+     endgenerate
+
+     always @(posedge clk) begin
+	//
+	if (shift_si) begin
+	   si_msb <= {{2*47{1'b0}}, si_msb[23*47-1:2*47]};
+	   si_lsb <= {si_msb[2*47-1:0], si_lsb[24*47-1:2*47]};
+	end else begin
+
+	   if (store_si_msb)
+	     si_msb <= si_msb_new;
+
+	   if (store_si_lsb)
+	     si_lsb <= si_lsb_new;
+	end
+
+     end
+
+
+     //
+     // Accumulators
+     //
+     wire	[46: 0]	add47_cw0_s;
+     wire	[46: 0]	add47_cw1_s;
+
+
+     //
+     // cw0, b, cw1, b
+     //
+     reg	[30: 0]	si_prev_dly;
+     reg	[15: 0]	si_next_dly;
+
+     always @(posedge clk)
+       //
+       if (shift_si)
+	 si_prev_dly <= si_lsb[93:63];
+       else
+	 si_prev_dly <= {31{1'b0}};
+
+       always @(posedge clk)
+	 //
+	 si_next_dly <= si_lsb[62:47];
+
+       wire	[46: 0]	add47_cw0_a = si_lsb[46:0];
+       wire	[46: 0]	add47_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+       wire	[46: 0]	add47_cw1_a = add47_cw0_s;
+       wire	[46: 0]	add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
+
+       `CRYPTECH_PRIMITIVE_ADD47 add47_cw0_inst
+	 (
+	  .clk	(clk),
+	  .a		(add47_cw0_a),
+	  .b		(add47_cw0_b),
+	  .s		(add47_cw0_s)
+	  );
+
+       `CRYPTECH_PRIMITIVE_ADD47 add47_cw1_inst
+	 (
+	  .clk	(clk),
+	  .a		(add47_cw1_a),
+	  .b		(add47_cw1_b),
+	  .s		(add47_cw1_s)
+	  );
+
+
+
+       //
+       // Full-Size Product
+       //
+       reg	[WORD_COUNTER_WIDTH:0]	bram_c_addr;
+
+       wire	[WORD_COUNTER_WIDTH:0]	reduce_c_addr;
+       wire	[                31:0]	reduce_c_word;
+
+       always @(posedge clk)
+	 //
+	 if (store_c_word)
+	   bram_c_addr <= bram_c_addr + 1'b1;
+	 else
+	   bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+	 bram_1rw_1ro_readfirst #
+	   (
+	    .MEM_WIDTH			(32),
+	    .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH + 1)
+	    )
+	 bram_c_inst
+	   (
+	    .clk		(clk),
+
+	    .a_addr	(bram_c_addr),
+	    .a_wr		(store_c_word),
+	    .a_in		(add47_cw1_s[31:0]),
+	    .a_out	(),
+
+	    .b_addr	(reduce_c_addr),
+	    .b_out	(reduce_c_word)
+	    );
+
+
+	 //
+	 // Reduction Stage
+	 //
+	 modular_reductor_384 reduce_384_inst
+	   (
+	    .clk		(clk),
+	    .rst_n	(rst_n),
+
+	    .ena		(reduce_start),
+	    .rdy		(reduce_done),
+
+	    .x_addr	(reduce_c_addr),
+	    .p_addr	(p_addr),
+	    .p_wren	(p_wren),
+
+	    .x_din	(reduce_c_word),
+	    .p_dout	(p_dout)
+	    );
+
+
+	 endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/ecdsa384_modular_reductor.v b/rtl/modular/ecdsa384_modular_reductor.v
new file mode 100644
index 0000000..5681559
--- /dev/null
+++ b/rtl/modular/ecdsa384_modular_reductor.v
@@ -0,0 +1,749 @@
+//------------------------------------------------------------------------------
+//
+// modular_reductor_384.v
+// -----------------------------------------------------------------------------
+// Modular reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_reductor_384
+  (
+   clk, rst_n,
+   ena, rdy,
+   x_addr, p_addr, p_wren,
+   x_din, p_dout
+   );
+
+   //
+   // Constants
+   //
+   localparam	OPERAND_NUM_WORDS	= 12;
+   localparam	WORD_COUNTER_WIDTH	=  4;
+
+
+   //
+   // Handy Numbers
+   //
+   localparam	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_ZERO	= 0;
+   localparam	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_LAST	= 2 * OPERAND_NUM_WORDS - 1;
+
+
+   //
+   // Handy Functions
+   //
+   function	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_PREVIOUS_OR_LAST;
+      input	[WORD_COUNTER_WIDTH:0]	WORD_INDEX_CURRENT;
+      begin
+	 WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+				       WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+      end
+   endfunction
+
+
+   //
+   // Ports
+   //
+   input		wire										clk;		// system clock
+   input		wire										rst_n;	// active-low async reset
+
+   input		wire										ena;		// enable input
+   output	wire 											rdy;		// ready output
+
+   output	wire [WORD_COUNTER_WIDTH-0:0] 								x_addr;	// index of current X word
+   output	wire [WORD_COUNTER_WIDTH-1:0] 								p_addr;	// index of current P word
+   output	wire 											p_wren;	// store current P word now
+
+   input		wire [                  31:0] 							x_din;	// X
+   output	wire [                  31:0] 								p_dout;	// P = X mod N
+
+
+   //
+   // Word Indices
+   //
+   reg [WORD_COUNTER_WIDTH:0] 										index_x;
+
+
+   /* map registers to output ports */
+   assign x_addr	= index_x;
+
+
+   //
+   // FSM
+   //
+   localparam	FSM_SHREG_WIDTH	= (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+   reg [FSM_SHREG_WIDTH-1:0] 										fsm_shreg;
+
+   assign rdy = fsm_shreg[0];
+
+   wire [2 * OPERAND_NUM_WORDS - 1:0] 									fsm_shreg_inc_index_x	= fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+   wire [2 * OPERAND_NUM_WORDS - 1:0] 									fsm_shreg_store_word_z	= fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+   wire [2 *                 5 - 1:0] 									fsm_shreg_reduce_stages	= fsm_shreg[                                        1 +: 2 *                 5];
+
+   wire [5-1:0] 											fsm_shreg_reduce_stage_start;
+   wire [5-1:0] 											fsm_shreg_reduce_stage_stop;
+
+   genvar 												s;
+   generate for (s=0; s<5; s=s+1)
+     begin : gen_fsm_shreg_reduce_stages
+	assign fsm_shreg_reduce_stage_start[5 - (s + 1)]	= fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+	assign fsm_shreg_reduce_stage_stop[5 - (s + 1)]	= fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+     end
+   endgenerate
+
+   wire inc_index_x	= |fsm_shreg_inc_index_x;
+   wire store_word_z	= |fsm_shreg_store_word_z;
+   wire reduce_start	= |fsm_shreg_reduce_stage_start;
+   wire reduce_stop	= |fsm_shreg_reduce_stage_stop;
+   wire store_p		=  fsm_shreg_reduce_stage_stop[0];
+
+
+   wire	reduce_adder0_done;
+   wire	reduce_adder1_done;
+   wire	reduce_subtractor_done;
+
+   wire	reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+   always @(posedge clk or negedge rst_n)
+     //
+     if (rst_n == 1'b0)
+       //
+       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+   //
+     else begin
+	//
+	if (rdy)
+	  //
+	  fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+	//
+	else if (!reduce_stop || reduce_done_all)
+	  //
+	  fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+	//
+     end
+
+
+   //
+   // Word Index Increment Logic
+   //
+   always @(posedge clk)
+     //
+     if (rdy)
+       //
+       index_x <= WORD_INDEX_LAST;
+   //
+     else if (inc_index_x)
+       //
+       index_x	<= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+   //
+   // Look-up Table
+   //
+
+   //
+   // Take a look at the corresponding C model for more information
+   // on how exactly the math behind reduction works. The first step
+   // is to assemble nine 384-bit values ("z-words") from 32-bit parts
+   // of the full 768-bit product ("c-word"). The problem with z10 is
+   // that it contains c23 two times. This implementation scans from
+   // c23 to c0 and writes current part of c-word into corresponding
+   // parts of z-words. Since those 32-bit parts are stored in block
+   // memories, one source word can only be written to one location in
+   // every z-word at a time. The trick is to delay c23 and then write
+   // the delayed value at the corresponding location in z10 instead of
+   // the next c22. "z_save" flag is used to indicate that the current
+   // word should be delayed and written once again during the next cycle.
+   //
+
+
+   reg	[10*WORD_COUNTER_WIDTH-1:0]	z_addr;	//
+   reg [10                   -1:0] 	z_wren;	//
+   reg [10                   -1:0] 	z_mask;	// mask input to store zero word
+   reg [10                   -1:0] 	z_save;	// save previous word once again
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                    s10     s9     s8     s7     s6     s5     s4     s3     s2     s1
+	 //                     ||     ||     ||     ||     ||     ||     ||     ||     ||     ||
+	 5'd00:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd00};
+	 5'd01:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd01};
+	 5'd02:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd02};
+	 5'd03:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd03};
+	 5'd04:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd04};
+	 5'd05:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd05};
+	 5'd06:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd06};
+	 5'd07:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd07};
+	 5'd08:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd08};
+	 5'd09:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd09};
+	 5'd10:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd10};
+	 5'd11:	z_addr <= {4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'dx,  4'd11};
+
+	 5'd12:	z_addr <= {4'd00, 4'd00, 4'd01, 4'd01, 4'd00, 4'd04, 4'd03, 4'd00, 4'd00, 4'dx};
+	 5'd13:	z_addr <= {4'd01, 4'd05, 4'd02, 4'd02, 4'd01, 4'd05, 4'd04, 4'd01, 4'd01, 4'dx};
+	 5'd14:	z_addr <= {4'd02, 4'd06, 4'd03, 4'd06, 4'd02, 4'd06, 4'd05, 4'd02, 4'd02, 4'dx};
+	 5'd15:	z_addr <= {4'd05, 4'd07, 4'd04, 4'd07, 4'd03, 4'd07, 4'd06, 4'd03, 4'd03, 4'dx};
+	 5'd16:	z_addr <= {4'd06, 4'd08, 4'd05, 4'd08, 4'd08, 4'd08, 4'd07, 4'd04, 4'd07, 4'dx};
+	 5'd17:	z_addr <= {4'd07, 4'd09, 4'd06, 4'd09, 4'd09, 4'd09, 4'd08, 4'd05, 4'd08, 4'dx};
+	 5'd18:	z_addr <= {4'd08, 4'd10, 4'd07, 4'd10, 4'd10, 4'd10, 4'd09, 4'd06, 4'd09, 4'dx};
+	 5'd19:	z_addr <= {4'd09, 4'd11, 4'd08, 4'd11, 4'd11, 4'd11, 4'd10, 4'd07, 4'd10, 4'dx};
+	 5'd20:	z_addr <= {4'd10, 4'd01, 4'd09, 4'd00, 4'd04, 4'd03, 4'd11, 4'd08, 4'd11, 4'dx};
+	 5'd21:	z_addr <= {4'd11, 4'd02, 4'd10, 4'd03, 4'd05, 4'd00, 4'd00, 4'd09, 4'd04, 4'dx};
+	 5'd22:	z_addr <= {4'd04, 4'd03, 4'd11, 4'd04, 4'd06, 4'd02, 4'd01, 4'd10, 4'd05, 4'dx};
+	 5'd23:	z_addr <= {4'd03, 4'd04, 4'd00, 4'd05, 4'd07, 4'd01, 4'd02, 4'd11, 4'd06, 4'dx};
+	 //
+         default:	z_addr <= {10*WORD_COUNTER_WIDTH{1'bX}};
+	 //
+       endcase
+
+   always @(posedge clk)
+     //
+     case (index_x)
+       //
+       //                    10     9     8     7     6     5     4     3     2     1
+       //                     |     |     |     |     |     |     |     |     |     |
+       5'd00:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd01:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd02:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd03:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd04:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd05:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd06:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd07:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd08:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd09:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd10:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+       5'd11:	z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+
+       5'd12:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd13:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd14:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd15:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd16:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd17:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd18:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd19:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd20:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd21:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd22:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       5'd23:	z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+       //
+       default:	z_wren <= {10{1'b0}};
+       //
+     endcase
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                    10     9     8     7     6     5     4     3     2     1
+	 //                     |     |     |     |     |     |     |     |     |     |
+	 5'd00:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd01:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd02:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd03:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd04:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd05:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd06:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd07:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd08:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd09:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd10:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd11:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+
+	 5'd12:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd13:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd14:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd15:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd16:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd17:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd18:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd19:	z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd20:	z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0};
+	 5'd21:	z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd22:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd23:	z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 //
+         default:	z_mask <= {10{1'bX}};
+	 //
+       endcase
+
+   always @(posedge clk)
+     //
+     if (inc_index_x)
+       //
+       case (index_x)
+	 //
+	 //                    10     9     8     7     6     5     4     3     2     1
+	 //                     |     |     |     |     |     |     |     |     |     |
+	 5'd00:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd01:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd02:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd03:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd04:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd05:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd06:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd07:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd08:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd09:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd10:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd11:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+
+	 5'd12:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd13:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd14:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd15:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd16:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd17:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd18:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd19:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd20:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd21:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd22:	z_save <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 5'd23:	z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+	 //
+         default:	z_save <= {10{1'bX}};
+	 //
+       endcase
+
+
+    //
+    // Modulus
+    //
+    wire [WORD_COUNTER_WIDTH-1:0] n_addr;
+    wire [                32-1:0] n_dout; 
+    ecdsa384_modulus_distmem p384_q_rom
+    (
+    	.clk    (clk),
+        .b_addr (n_addr),
+        .b_out  (n_dout)
+    );    
+
+   //
+   // Intermediate Numbers
+   //
+   reg [WORD_COUNTER_WIDTH-1:0] 	reduce_z_addr[1:10];
+   wire [                32-1:0] 	reduce_z_dout[1:10];
+
+   reg [31: 0] 				x_din_dly;
+   always @(posedge clk)
+     //
+     x_din_dly <= x_din;
+
+
+   genvar 				z;
+   generate for (z=1; z<=10; z=z+1)
+     //
+     begin : gen_z_bram
+	//
+	bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+	bram_c_inst
+		   (
+		    .clk		(clk),
+
+		    .a_addr	(z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+		    .a_wr		(z_wren[z-1] & store_word_z),
+		    .a_in		(z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+		    .a_out	(),
+
+		    .b_addr	(reduce_z_addr[z]),
+		    .b_out	(reduce_z_dout[z])
+		    );
+	//
+     end
+      //
+   endgenerate
+
+
+
+
+   wire	[                32-1:0]	bram_sum0_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_sum0_wr_addr;
+   wire 				bram_sum0_wr_wren;
+
+   wire [                32-1:0] 	bram_sum1_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_sum1_wr_addr;
+   wire 				bram_sum1_wr_wren;
+
+   wire [                32-1:0] 	bram_diff_wr_din;
+   wire [WORD_COUNTER_WIDTH-1:0] 	bram_diff_wr_addr;
+   wire 				bram_diff_wr_wren;
+
+   wire [                32-1:0] 	bram_sum0_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_sum0_rd_addr;
+
+   wire [                32-1:0] 	bram_sum1_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_sum1_rd_addr;
+
+   wire [                32-1:0] 	bram_diff_rd_dout;
+   reg [WORD_COUNTER_WIDTH-1:0] 	bram_diff_rd_addr;
+
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_sum0_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_sum0_wr_addr),
+      .a_wr		(bram_sum0_wr_wren),
+      .a_in		(bram_sum0_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_sum0_rd_addr),
+      .b_out	(bram_sum0_rd_dout)
+      );
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_sum1_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_sum1_wr_addr),
+      .a_wr		(bram_sum1_wr_wren),
+      .a_in		(bram_sum1_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_sum1_rd_addr),
+      .b_out	(bram_sum1_rd_dout)
+      );
+
+   bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+   bram_diff_inst
+     (
+      .clk		(clk),
+
+      .a_addr	(bram_diff_wr_addr),
+      .a_wr		(bram_diff_wr_wren),
+      .a_in		(bram_diff_wr_din),
+      .a_out	(),
+
+      .b_addr	(bram_diff_rd_addr),
+      .b_out	(bram_diff_rd_dout)
+      );
+
+
+   wire [WORD_COUNTER_WIDTH-1:0] 	adder0_ab_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] 	adder1_ab_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] 	subtractor_ab_addr;
+
+   reg [                32-1:0] 	adder0_a_din;
+   reg [                32-1:0] 	adder0_b_din;
+
+   reg [                32-1:0] 	adder1_a_din;
+   reg [                32-1:0] 	adder1_b_din;
+
+   reg [                32-1:0] 	subtractor_a_din;
+   reg [                32-1:0] 	subtractor_b_din;
+
+   // n_addr - only 1 output, because all modules are in sync
+
+   modular_adder #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   adder_inst0
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_adder0_done),
+
+      .ab_addr		(adder0_ab_addr),
+      .n_addr		(),
+      .s_addr		(bram_sum0_wr_addr),
+      .s_wren		(bram_sum0_wr_wren),
+
+      .a_din		(adder0_a_din),
+      .b_din		(adder0_b_din),
+      .n_din		(n_dout),
+      .s_dout		(bram_sum0_wr_din)
+      );
+
+   modular_adder #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   adder_inst1
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_adder1_done),
+
+      .ab_addr		(adder1_ab_addr),
+      .n_addr		(),
+      .s_addr		(bram_sum1_wr_addr),
+      .s_wren		(bram_sum1_wr_wren),
+
+      .a_din		(adder1_a_din),
+      .b_din		(adder1_b_din),
+      .n_din		(n_dout),
+      .s_dout		(bram_sum1_wr_din)
+      );
+
+   modular_subtractor #
+     (
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH)
+      )
+   subtractor_inst
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(reduce_start),
+      .rdy			(reduce_subtractor_done),
+
+      .ab_addr		(subtractor_ab_addr),
+      .n_addr		(n_addr),
+      .d_addr		(bram_diff_wr_addr),
+      .d_wren		(bram_diff_wr_wren),
+
+      .a_din		(subtractor_a_din),
+      .b_din		(subtractor_b_din),
+      .n_din		(n_dout),
+      .d_dout		(bram_diff_wr_din)
+      );
+
+
+   //
+   // Address (Operand) Selector
+   //
+   always @(*)
+     //
+     case (fsm_shreg_reduce_stage_stop)
+       //
+       5'b10000: begin
+	  reduce_z_addr[ 1]	= adder0_ab_addr;
+	  reduce_z_addr[ 2]	= adder1_ab_addr;
+	  reduce_z_addr[ 3]	= adder0_ab_addr;
+	  reduce_z_addr[ 4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 8]	= subtractor_ab_addr;
+	  reduce_z_addr[ 9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[10]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+       end
+       //
+       5'b01000: begin
+	  //
+	  reduce_z_addr[ 1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 4]	= adder0_ab_addr;
+	  reduce_z_addr[ 5]	= adder1_ab_addr;
+	  reduce_z_addr[ 6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 9]	= subtractor_ab_addr;
+	  reduce_z_addr[10]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder1_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00100: begin
+	  //
+	  reduce_z_addr[1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[6]	= adder0_ab_addr;
+	  reduce_z_addr[7]	= adder1_ab_addr;
+	  reduce_z_addr[8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[10]	= subtractor_ab_addr;
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder1_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00010: begin
+	  //
+	  reduce_z_addr[ 1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[10]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= adder0_ab_addr;
+	  bram_diff_rd_addr = subtractor_ab_addr;
+       end
+       //
+       5'b00001: begin
+	  //
+	  reduce_z_addr[ 1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[10]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= adder0_ab_addr;
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = adder0_ab_addr;
+       end
+       //
+       default: begin
+	  reduce_z_addr[ 1]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 2]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 3]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 4]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 5]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 6]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 7]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 8]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[ 9]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  reduce_z_addr[10]	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum0_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_sum1_rd_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+	  bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+       end
+       //
+     endcase
+
+
+   //
+   // adder 0
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder0_a_din = reduce_z_dout[1];
+	5'b01000:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00100:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00010:	adder0_a_din = bram_sum0_rd_dout;
+	5'b00001:	adder0_a_din = bram_sum0_rd_dout;
+	default:		adder0_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder0_b_din = reduce_z_dout[3];
+	5'b01000:	adder0_b_din = reduce_z_dout[4];
+	5'b00100:	adder0_b_din = reduce_z_dout[6];
+	5'b00010:	adder0_b_din = bram_sum1_rd_dout;
+	5'b00001:	adder0_b_din = bram_diff_rd_dout;
+	default:		adder0_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+   //
+   // adder 1
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder1_a_din = reduce_z_dout[2];
+	5'b01000:	adder1_a_din = bram_sum1_rd_dout;
+	5'b00100:	adder1_a_din = bram_sum1_rd_dout;
+	5'b00010:	adder1_a_din = {32{1'bX}};
+	5'b00001:	adder1_a_din = {32{1'bX}};
+	default:		adder1_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	adder1_b_din = reduce_z_dout[2];
+	5'b01000:	adder1_b_din = reduce_z_dout[5];
+	5'b00100:	adder1_b_din = reduce_z_dout[7];
+	5'b00010:	adder1_b_din = {32{1'bX}};
+	5'b00001:	adder1_b_din = {32{1'bX}};
+	default:		adder1_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+
+   //
+   // subtractor
+   //
+   always @(*) begin
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	subtractor_a_din = {32{1'b0}};
+	5'b01000:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00100:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00010:	subtractor_a_din = bram_diff_rd_dout;
+	5'b00001:	subtractor_a_din = {32{1'bX}};
+	default:		subtractor_a_din = {32{1'bX}};
+      endcase
+      //
+      case (fsm_shreg_reduce_stage_stop)
+	5'b10000:	subtractor_b_din = reduce_z_dout[8];
+	5'b01000:	subtractor_b_din = reduce_z_dout[9];
+	5'b00100:	subtractor_b_din = reduce_z_dout[10];
+	5'b00010:	subtractor_b_din = {32{1'b0}};
+	5'b00001:	subtractor_b_din = {32{1'bX}};
+	default:		subtractor_b_din = {32{1'bX}};
+      endcase
+      //
+   end
+
+
+   //
+   // Address Mapping
+   //
+   assign p_addr	= bram_sum0_wr_addr;
+   assign p_wren	= bram_sum0_wr_wren & store_p;
+   assign p_dout	= bram_sum0_wr_din;
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/modular/ecdsa384_modulus_distmem.v
similarity index 62%
rename from rtl/util/bram_1rw_1ro_readfirst.v
rename to rtl/modular/ecdsa384_modulus_distmem.v
index db62726..fad28c8 100644
--- a/rtl/util/bram_1rw_1ro_readfirst.v
+++ b/rtl/modular/ecdsa384_modulus_distmem.v
@@ -1,6 +1,6 @@
 //======================================================================
 //
-// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+// Copyright (c) 2016, 2018 NORDUnet A/S All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -30,72 +30,41 @@
 //
 //======================================================================
 
-`timescale 1ns / 1ps
-
-module bram_1rw_1ro_readfirst
-  #(parameter MEM_WIDTH            = 32,
-    parameter MEM_ADDR_BITS        = 8)
-   (
-    input wire 			   clk,
-
-    input wire [MEM_ADDR_BITS-1:0] a_addr,
-    input wire 			   a_wr,
-    input wire [MEM_WIDTH-1:0] 	   a_in,
-    output wire [MEM_WIDTH-1:0]    a_out,
-
-    input wire [MEM_ADDR_BITS-1:0] b_addr,
-    output wire [MEM_WIDTH-1:0]    b_out
-    );
-
-
-   //
-   // BRAM
-   //
-   (* RAM_STYLE="BLOCK" *)
-   reg [MEM_WIDTH-1:0] 		   bram[0:(2**MEM_ADDR_BITS)-1];
-
-
-   //
-   // Initialization
-   //
-   /**
-    integer c;
-    initial begin
-    for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
-    bram[c] = {MEM_WIDTH{1'b0}};
-	end
-    **/
-
+module ecdsa384_modulus_distmem
+  (
+   input wire 		clk,
+   input wire [ 4-1:0] 	b_addr,
+   output wire [32-1:0] b_out
+   );
 
 
    //
    // Output Registers
    //
-   reg [MEM_WIDTH-1:0] 		   bram_reg_a;
-   reg [MEM_WIDTH-1:0] 		   bram_reg_b;
+   (* ram_style="distributed" *)
+   reg [31:0] 		bram_reg_b;
 
-   assign a_out = bram_reg_a;
    assign b_out = bram_reg_b;
 
 
-   //
-   // Read-Write Port A
-   //
-   always @(posedge clk) begin
-      //
-      bram_reg_a <= bram[a_addr];
-      //
-      if (a_wr) bram[a_addr] <= a_in;
-      //
-   end
-
-
    //
    // Read-Only Port B
    //
    always @(posedge clk)
      //
-     bram_reg_b <= bram[b_addr];
-
+     case (b_addr)
+       4'b0000:	bram_reg_b <= 32'hffffffff;
+       4'b0001:	bram_reg_b <= 32'h00000000;
+       4'b0010:	bram_reg_b <= 32'h00000000;
+       4'b0011:	bram_reg_b <= 32'hffffffff;
+       4'b0100:	bram_reg_b <= 32'hfffffffe;
+       4'b0101:	bram_reg_b <= 32'hffffffff;
+       4'b0110:	bram_reg_b <= 32'hffffffff;
+       4'b0111:	bram_reg_b <= 32'hffffffff;
+       4'b1000:	bram_reg_b <= 32'hffffffff;
+       4'b1001:	bram_reg_b <= 32'hffffffff;
+       4'b1010:	bram_reg_b <= 32'hffffffff;
+       4'b1011:	bram_reg_b <= 32'hffffffff;
+     endcase
 
 endmodule
diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v
deleted file mode 100644
index 189059d..0000000
--- a/rtl/modular/modular_adder.v
+++ /dev/null
@@ -1,298 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_adder.v
-// -----------------------------------------------------------------------------
-// Modular adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_adder
-  (
-   clk, rst_n,
-   ena, rdy,
-   ab_addr, n_addr, s_addr, s_wren,
-   a_din, b_din, n_din, s_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter	OPERAND_NUM_WORDS	= 8;
-   parameter	WORD_COUNTER_WIDTH	= 3;
-
-
-   //
-   // Handy Numbers
-   //
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
-
-
-   //
-   // Handy Functions
-   //
-   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_NEXT_OR_ZERO;
-      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
-      begin
-	 WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
-				   WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
-      end
-   endfunction
-
-
-   //
-   // Ports
-   //
-   input		wire										clk;			// system clock
-   input		wire										rst_n;		// active-low async reset
-
-   input		wire										ena;			// enable input
-   output	wire 											rdy;			// ready output
-
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								ab_addr;		// index of current A and B words
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								n_addr;		// index of current N word
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								s_addr;		// index of current S word
-   output	wire 											s_wren;		// store current S word now
-
-   input		wire [                  31:0] 							a_din;		// A
-   input		wire [                  31:0] 							b_din;		// B
-   input		wire [                  31:0] 							n_din;		// N
-   output	wire [                  31:0] 								s_dout;		// S = (A + B) mod N
-
-
-   //
-   // Word Indices
-   //
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_ab;
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_n;
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_s;
-
-   /* map registers to output ports */
-   assign ab_addr	= index_ab;
-   assign n_addr	= index_n;
-   assign s_addr	= index_s;
-
-
-   //
-   // Adder
-   //
-   wire [31: 0] 											add32_s;
-   wire 												add32_c_in;
-   wire 												add32_c_out;
-
-   adder32_wrapper adder32
-     (
-      .clk		(clk),
-      .a			(a_din),
-      .b			(b_din),
-      .s			(add32_s),
-      .c_in		(add32_c_in),
-      .c_out	(add32_c_out)
-      );
-
-
-   //
-   // Subtractor
-   //
-   wire [31: 0] 											sub32_d;
-   wire 												sub32_b_in;
-   wire 												sub32_b_out;
-
-   subtractor32_wrapper subtractor32
-     (
-      .clk		(clk),
-      .a			(add32_s),
-      .b			(n_din),
-      .d			(sub32_d),
-      .b_in		(sub32_b_in),
-      .b_out	(sub32_b_out)
-      );
-
-
-   //
-   // FSM
-   //
-
-   localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
-   reg [FSM_SHREG_WIDTH-1:0] 										fsm_shreg;
-
-   assign rdy = fsm_shreg[0];
-
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_ab	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_n		= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_sum_ab	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_sum_ab_n	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_data_s	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_s		= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
-   wire 												fsm_latch_msb_carry	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-   wire 												fsm_latch_msb_borrow	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
-
-   wire 												inc_index_ab		= |fsm_shreg_inc_index_ab;
-   wire 												inc_index_n		= |fsm_shreg_inc_index_n;
-   wire 												store_sum_ab		= |fsm_shreg_store_sum_ab;
-   wire 												store_sum_ab_n	= |fsm_shreg_store_sum_ab_n;
-   wire 												store_data_s		= |fsm_shreg_store_data_s;
-   wire 												inc_index_s		= |fsm_shreg_inc_index_s;
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0)
-       //
-       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
-   //
-     else begin
-	//
-	if (rdy)	fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
-	//
-	else		fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
-	//
-     end
-
-
-
-
-
-
-
-   //
-   // Carry & Borrow Masking Logic
-   //
-   reg	add32_c_mask;
-   reg	sub32_b_mask;
-
-   always @(posedge clk) begin
-      //
-      add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
-      sub32_b_mask <= (index_n  == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
-      //
-   end
-
-   assign add32_c_in = add32_c_out & ~add32_c_mask;
-   assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
-
-   //
-   // Carry & Borrow Latch Logic
-   //
-   reg add32_carry_latch;
-   reg sub32_borrow_latch;
-
-   always @(posedge clk) begin
-      //
-      if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out;
-      if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
-      //
-   end
-
-
-   //
-   // Intermediate Results
-   //
-   reg	[32*OPERAND_NUM_WORDS-1:0]		s_ab;
-   reg [32*OPERAND_NUM_WORDS-1:0] 		s_ab_n;
-
-   always @(posedge clk)
-     //
-     if (store_data_s) begin
-	//
-	s_ab		<= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]};
-	s_ab_n	<= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
-	//
-     end else begin
-	//
-	if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]};
-	if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
-	//
-     end
-
-
-   //
-   // Word Index Increment Logic
-   //
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	index_ab		<= WORD_INDEX_ZERO;
-	index_n		<= WORD_INDEX_ZERO;
-	index_s		<= WORD_INDEX_ZERO;
-	//
-     end else begin
-	//
-	if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
-	if (inc_index_n)	index_n	<= WORD_INDEX_NEXT_OR_ZERO(index_n);
-	if (inc_index_s)	index_s	<= WORD_INDEX_NEXT_OR_ZERO(index_s);
-	//
-     end
-
-
-   //
-   // Output Sum Selector
-   //
-   wire	mux_select_ab = sub32_borrow_latch && !add32_carry_latch;
-
-
-   //
-   // Output Data and Write Enable Logic
-   //
-   reg 	s_wren_reg;
-   reg [31: 0] s_dout_reg;
-   wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0];
-
-   assign s_wren = s_wren_reg;
-   assign s_dout = s_dout_reg;
-
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	s_wren_reg	<= 1'b0;
-	s_dout_reg	<= {32{1'bX}};
-	//
-     end else begin
-	//
-	s_wren_reg <= store_data_s;
-	s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}};
-	//
-     end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
deleted file mode 100644
index f097362..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
+++ /dev/null
@@ -1,148 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_copy
-  (
-   clk, rst_n,
-   ena, rdy,
-   s_addr,  s_din,
-   a1_addr,        a1_wren, a1_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter OPERAND_NUM_WORDS	= 8;
-   parameter OPERAND_ADDR_BITS	= 3;
-
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= OPERAND_NUM_WORDS + 2;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-
-   input		wire									ena;
-   output	wire 										rdy;
-
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [OPERAND_ADDR_BITS-1:0] 							a1_addr;
-
-   output	wire 										a1_wren;
-
-   input		wire [                 31:0] 						s_din;
-
-   output	wire [                 31:0] 							a1_dout;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [OPERAND_ADDR_BITS-1:0] 									addr_s;
-
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_s_max		= OPERAND_NUM_WORDS - 1;
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_s_zero		= {OPERAND_ADDR_BITS{1'b0}};
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_s_next		= (addr_s < addr_s_max) ?
-												addr_s + 1'b1 : addr_s_zero;
-
-   reg [OPERAND_ADDR_BITS-1:0] 									addr_a1;
-
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_a1_max		= OPERAND_NUM_WORDS - 1;
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_a1_zero	= {OPERAND_ADDR_BITS{1'b0}};
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_a1_next	= (addr_a1 < addr_a1_max) ?
-												addr_a1 + 1'b1 : addr_a1_zero;
-
-   assign s_addr  = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s};
-   assign a1_addr = addr_a1;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment Logic
-   //
-   wire 											inc_addr_s;
-   wire 											inc_addr_a1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_s_start		= 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_s_stop		= OPERAND_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_a1_start	= 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_a1_stop		= OPERAND_NUM_WORDS + 1;
-
-   assign inc_addr_s		= (proc_cnt >= cnt_inc_addr_s_start)  && (proc_cnt <= cnt_inc_addr_s_stop);
-   assign inc_addr_a1	= (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop);
-
-   always @(posedge clk) begin
-      //
-      if (inc_addr_s)	addr_s <= addr_s_next;
-      else					addr_s <= addr_s_zero;
-      //
-      if (inc_addr_a1)	addr_a1 <= addr_a1_next;
-      else					addr_a1 <= addr_a1_zero;
-      //
-   end
-
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_a1;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start	= 2;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop	= OPERAND_NUM_WORDS + 1;
-
-   assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop);
-
-   assign a1_wren = wren_a1;
-
-
-   //
-   // Data Logic
-   //
-   assign a1_dout = s_din;
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v
deleted file mode 100644
index 5a909c0..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_init.v
+++ /dev/null
@@ -1,172 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_init
-  (
-   clk, rst_n,
-   ena, rdy,
-   a_addr, a_din,
-   q_addr, q_din,
-   r_addr, r_wren, r_dout,
-   s_addr, s_wren, s_dout,
-   u_addr, u_wren, u_dout,
-   v_addr, v_wren, v_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter OPERAND_NUM_WORDS	= 8;
-   parameter OPERAND_ADDR_BITS	= 3;
-
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= OPERAND_NUM_WORDS + 3;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   output	wire [OPERAND_ADDR_BITS-1:0] 							a_addr;
-   output	wire [OPERAND_ADDR_BITS-1:0] 							q_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							r_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							v_addr;
-
-   output	wire 										r_wren;
-   output	wire 										s_wren;
-   output	wire 										u_wren;
-   output	wire 										v_wren;
-
-   input		wire [                 31:0] 						a_din;
-   input		wire [                 31:0] 						q_din;
-   output	wire [                 31:0] 							r_dout;
-   output	wire [                 31:0] 							s_dout;
-   output	wire [                 31:0] 							u_dout;
-   output	wire [                 31:0] 							v_dout;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [OPERAND_ADDR_BITS-1:0] 									addr_aq;
-
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_aq_max		= OPERAND_NUM_WORDS - 1;
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_aq_zero	= {OPERAND_ADDR_BITS{1'b0}};
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_aq_next	= (addr_aq < addr_aq_max) ?
-												addr_aq + 1'b1 : addr_aq_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_rsuv;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_rsuv_max	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_rsuv_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_rsuv_next	= (addr_rsuv < addr_rsuv_max) ?
-												addr_rsuv + 1'b1 : addr_rsuv_zero;
-
-   assign a_addr = addr_aq;
-   assign q_addr = addr_aq;
-
-   assign r_addr = addr_rsuv;
-   assign s_addr = addr_rsuv;
-   assign u_addr = addr_rsuv;
-   assign v_addr = addr_rsuv;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment Logic
-   //
-   wire 											inc_addr_aq;
-   wire 											inc_addr_rsuv;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_aq_start	= 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_aq_stop		= OPERAND_NUM_WORDS;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_rsuv_start	= 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_rsuv_stop	= BUFFER_NUM_WORDS + 1;
-
-   assign inc_addr_aq   = (proc_cnt >= cnt_inc_addr_aq_start)   && (proc_cnt <= cnt_inc_addr_aq_stop);
-   assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop);
-
-   always @(posedge clk) begin
-      //
-      if (inc_addr_aq)	addr_aq <= addr_aq_next;
-      else					addr_aq <= addr_aq_zero;
-      //
-      if (inc_addr_rsuv)	addr_rsuv <= addr_rsuv_next;
-      else						addr_rsuv <= addr_rsuv_zero;
-      //
-   end
-
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_rsuv;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start	= 2;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop	= BUFFER_NUM_WORDS + 1;
-
-   assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop);
-
-   assign r_wren = wren_rsuv;
-   assign s_wren = wren_rsuv;
-   assign u_wren = wren_rsuv;
-   assign v_wren = wren_rsuv;
-
-
-   //
-   // Data Logic
-   //
-   assign r_dout = 32'd0;
-   assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0;
-   assign u_dout = (proc_cnt != cnt_wren_rsuv_stop)  ? q_din : 32'd0;
-   assign v_dout = (proc_cnt != cnt_wren_rsuv_stop)  ? a_din : 32'd0;
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
deleted file mode 100644
index 724b9f8..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
+++ /dev/null
@@ -1,286 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_compare
-  (
-   clk, rst_n,
-   ena, rdy,
-
-   u_addr, u_din,
-   v_addr, v_din,
-
-   u_gt_v, v_eq_1,
-   u_is_even, v_is_even
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= 1 * BUFFER_NUM_WORDS + 10;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_addr;
-
-   input		wire [              32-1:0] 						u_din;
-   input		wire [              32-1:0] 						v_din;
-
-   output	wire 										u_gt_v;
-   output	wire 										v_eq_1;
-   output	wire 										u_is_even;
-   output	wire 										v_is_even;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_in;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_prev	= (addr_in > addr_in_zero) ?
-												addr_in - 1'b1 : addr_in_last;
-
-   assign u_addr					= addr_in;
-   assign v_addr					= addr_in;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Decrement Logic
-   //
-   wire 											dec_addr_in;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_in_start	= 0 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_in_stop		= 1 * BUFFER_NUM_WORDS + 0;
-
-   assign dec_addr_in   = (proc_cnt >= cnt_dec_addr_in_start)   && (proc_cnt <= cnt_dec_addr_in_stop);
-
-   always @(posedge clk)
-     //
-     if (rdy)						addr_in <= addr_in_last;
-     else if (dec_addr_in)	addr_in <= addr_in_prev;
-
-
-   //
-   // Comparison Stage Flags
-   //
-   wire 											calc_leg;
-   wire 											calc_leg_final;
-   wire 											calc_parity;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_calc_leg_start	= 0 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] 									cnt_calc_leg_stop		= 1 * BUFFER_NUM_WORDS + 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_calc_parity		= 1 * BUFFER_NUM_WORDS + 1;
-
-   assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop);
-   assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop);
-   assign calc_parity = (proc_cnt == cnt_calc_parity);
-
-
-   //
-   // Dummy Input
-   //
-   reg 												sub32_din_1_lsb;
-   wire [31: 0] 										sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb};
-
-   always @(posedge clk)
-     //
-     sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0;
-
-
-   //
-   // Subtractor (u - v)
-   //
-   wire [31: 0] 										sub32_u_minus_v_difference_out;
-   wire 											sub32_u_minus_v_borrow_in;
-   wire 											sub32_u_minus_v_borrow_out;
-
-   subtractor32_wrapper sub32_u_minus_v
-     (
-      .clk		(clk),
-      .a			(u_din),
-      .b			(v_din),
-      .d			(sub32_u_minus_v_difference_out),
-      .b_in		(sub32_u_minus_v_borrow_in),
-      .b_out	(sub32_u_minus_v_borrow_out)
-      );
-
-
-   //
-   // Subtractor (v - 1)
-   //
-   wire [31: 0] 										sub32_v_minus_1_difference_out;
-   wire 											sub32_v_minus_1_borrow_in;
-   wire 											sub32_v_minus_1_borrow_out;
-
-   subtractor32_wrapper sub32_v_minus_1
-     (
-      .clk		(clk),
-      .a			(v_din),
-      .b			(sub32_din_1),
-      .d			(sub32_v_minus_1_difference_out),
-      .b_in		(sub32_v_minus_1_borrow_in),
-      .b_out	(sub32_v_minus_1_borrow_out)
-      );
-
-
-
-   //
-   // Borrow Masking Logic
-   //
-   reg 												mask_borrow;
-
-   always @(posedge clk)
-     //
-     mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ?
-		    1'b0 : 1'b1;
-
-   assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow;
-   assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow;
-
-
-   //
-   // Comparison Logic
-   //
-   reg 												cmp_u_v_l;
-   reg 												cmp_u_v_e;
-   reg 												cmp_u_v_g;
-
-   reg 												cmp_v_1_l;
-   reg 												cmp_v_1_e;
-   reg 												cmp_v_1_g;
-
-   wire 											cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g);
-   wire 											cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g);
-
-   wire 											cmp_u_v_borrow_is_set			= (sub32_u_minus_v_borrow_out     ==  1'b1) ? 1'b1 : 1'b0;
-   wire 											cmp_u_v_difference_is_nonzero	= (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
-   wire 											cmp_v_1_borrow_is_set			= (sub32_v_minus_1_borrow_out     ==  1'b1) ? 1'b1 : 1'b0;
-   wire 											cmp_v_1_difference_is_nonzero	= (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
-   reg 												u_is_even_reg;
-   reg 												v_is_even_reg;
-
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	if (ena) begin
-	   //
-	   cmp_u_v_l		<= 1'b0;
-	   cmp_u_v_e		<= 1'b0;
-	   cmp_u_v_g		<= 1'b0;
-	   //
-	   cmp_v_1_l		<= 1'b0;
-	   cmp_v_1_e		<= 1'b0;
-	   cmp_v_1_g		<= 1'b0;
-	   //
-	   u_is_even_reg	<= 1'bX;
-	   v_is_even_reg	<= 1'bX;
-	   //
-	end
-	//
-     end else begin
-	//
-	// parity
-	//
-	if (calc_parity) begin
-	   u_is_even_reg <= ~u_din[0];
-	   v_is_even_reg <= ~v_din[0];
-	end
-	//
-	// u <> v
-	//
-	if (cmp_unresolved_u_v && calc_leg) begin
-	   //
-	   if (cmp_u_v_borrow_is_set)
-	     cmp_u_v_l <= 1'b1;
-	   //
-	   if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero)
-	     cmp_u_v_g <= 1'b1;
-	   //
-	   if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final)
-	     cmp_u_v_e <= 1'b1;
-	   //
-	end
-	//
-	// v <> 1
-	//
-	if (cmp_unresolved_v_1 && calc_leg) begin
-	   //
-	   if (cmp_v_1_borrow_is_set)
-	     cmp_v_1_l <= 1'b1;
-	   //
-	   if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero)
-	     cmp_v_1_g <= 1'b1;
-	   //
-	   if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final)
-	     cmp_v_1_e <= 1'b1;
-	   //
-	end
-	//
-     end
-
-
-   //
-   // Output Flags
-   //
-   assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e &&  cmp_u_v_g;
-   assign v_eq_1 = !cmp_v_1_l &&  cmp_v_1_e && !cmp_v_1_g;
-
-   assign u_is_even = u_is_even_reg;
-   assign v_is_even = v_is_even_reg;
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
deleted file mode 100644
index 3ebea00..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
+++ /dev/null
@@ -1,408 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_precalc
-  (
-   clk, rst_n,
-   ena, rdy,
-
-   r_addr, r_din,
-   s_addr, s_din,
-   u_addr, u_din,
-   v_addr, v_din,
-
-   r_dbl_addr,          r_dbl_wren,          r_dbl_dout,
-   s_dbl_addr,          s_dbl_wren,          s_dbl_dout,
-   r_plus_s_addr,       r_plus_s_wren,       r_plus_s_dout,
-   u_half_addr,         u_half_wren,         u_half_dout,
-   v_half_addr,         v_half_wren,         v_half_dout,
-   u_minus_v_addr,      u_minus_v_wren,      u_minus_v_dout,      u_minus_v_din,
-   v_minus_u_addr,      v_minus_u_wren,      v_minus_u_dout,      v_minus_u_din,
-   u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
-   v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= 2 * BUFFER_NUM_WORDS + 4;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_addr;
-
-   input		wire [              32-1:0] 						r_din;
-   input		wire [              32-1:0] 						s_din;
-   input		wire [              32-1:0] 						u_din;
-   input		wire [              32-1:0] 						v_din;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_dbl_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							s_dbl_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_plus_s_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_minus_v_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_minus_u_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_minus_v_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_minus_u_half_addr;
-
-   output	wire [              32-1:0] 							r_dbl_dout;
-   output	wire [              32-1:0] 							s_dbl_dout;
-   output	wire [              32-1:0] 							r_plus_s_dout;
-   output	wire [              32-1:0] 							u_half_dout;
-   output	wire [              32-1:0] 							v_half_dout;
-   output	wire [              32-1:0] 							u_minus_v_dout;
-   output	wire [              32-1:0] 							v_minus_u_dout;
-   output	wire [              32-1:0] 							u_minus_v_half_dout;
-   output	wire [              32-1:0] 							v_minus_u_half_dout;
-
-   output	wire 										r_dbl_wren;
-   output	wire 										s_dbl_wren;
-   output	wire 										r_plus_s_wren;
-   output	wire 										u_half_wren;
-   output	wire 										v_half_wren;
-   output	wire 										u_minus_v_wren;
-   output	wire 										v_minus_u_wren;
-   output	wire 										u_minus_v_half_wren;
-   output	wire 										v_minus_u_half_wren;
-
-   input		wire [              32-1:0] 						u_minus_v_din;
-   input		wire [              32-1:0] 						v_minus_u_din;
-
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_in;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_next	= (addr_in < addr_in_last) ?
-												addr_in + 1'b1 : addr_in_zero;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_prev	= (addr_in > addr_in_zero) ?
-												addr_in - 1'b1 : addr_in_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out1;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_next	= (addr_out1 < addr_out1_last) ?
-												addr_out1 + 1'b1 : addr_out1_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out2;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_next	= (addr_out2 < addr_out2_last) ?
-												addr_out2 + 1'b1 : addr_out2_zero;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_prev	= (addr_out2 > addr_out2_zero) ?
-												addr_out2 - 1'b1 : addr_out2_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out3;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_prev	= (addr_out3 > addr_out3_zero) ?
-												addr_out3 - 1'b1 : addr_out3_last;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out4;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out4_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out4_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out4_prev	= (addr_out4 > addr_out4_zero) ?
-												addr_out4 - 1'b1 : addr_out4_last;
-
-
-   assign r_addr					= addr_in;
-   assign s_addr					= addr_in;
-   assign u_addr					= addr_in;
-   assign v_addr					= addr_in;
-
-   assign r_dbl_addr				= addr_out1;
-   assign s_dbl_addr				= addr_out1;
-   assign r_plus_s_addr			= addr_out2;
-   assign u_half_addr			= addr_out3;
-   assign v_half_addr			= addr_out3;
-   assign u_minus_v_addr		= addr_out2;
-   assign v_minus_u_addr		= addr_out2;
-   assign u_minus_v_half_addr	= addr_out4;
-   assign v_minus_u_half_addr	= addr_out4;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment/Decrement Logic
-   //
-   wire 											inc_addr_in;
-   wire 											dec_addr_in;
-   wire 											inc_addr_out1;
-   wire 											inc_addr_out2;
-   wire 											dec_addr_out2;
-   wire 											dec_addr_out3;
-   wire 											dec_addr_out4;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_start	= 0 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_stop		= 1 * BUFFER_NUM_WORDS - 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_start	= 0 * BUFFER_NUM_WORDS + 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_stop	= 1 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out2_start	= 0 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out2_stop	= 1 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_start	= 1 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_stop	= 2 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_in_start	= 1 * BUFFER_NUM_WORDS + 0;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_in_stop		= 2 * BUFFER_NUM_WORDS - 2;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_start	= 1 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_stop	= 2 * BUFFER_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out4_start	= 1 * BUFFER_NUM_WORDS + 4;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out4_stop	= 2 * BUFFER_NUM_WORDS + 3;
-
-   assign inc_addr_in   = (proc_cnt >= cnt_inc_addr_in_start)   && (proc_cnt <= cnt_inc_addr_in_stop);
-   assign dec_addr_in   = (proc_cnt >= cnt_dec_addr_in_start)   && (proc_cnt <= cnt_dec_addr_in_stop);
-   assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
-   assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
-   assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
-   assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
-   assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
-
-
-   always @(posedge clk) begin
-      //
-      if (rdy) begin
-	 //
-	 addr_in 		<= addr_in_zero;
-	 addr_out1	<= addr_out1_zero;
-	 addr_out2	<= addr_out2_zero;
-	 addr_out3	<= addr_out3_last;
-	 addr_out4	<= addr_out4_last;
-	 //
-      end else begin
-	 //
-	 if (inc_addr_in)				addr_in <= addr_in_next;
-	 else if (dec_addr_in)		addr_in <= addr_in_prev;
-	 //
-	 if (inc_addr_out1)			addr_out1 <= addr_out1_next;
-	 else								addr_out1 <= addr_out1_zero;
-	 //
-	 if (inc_addr_out2)			addr_out2 <= addr_out2_next;
-	 else if (dec_addr_out2)		addr_out2 <= addr_out2_prev;
-	 //
-	 if (dec_addr_out3)			addr_out3 <= addr_out3_prev;
-	 else								addr_out3 <= addr_out3_last;
-	 //
-	 if (dec_addr_out4)			addr_out4 <= addr_out4_prev;
-	 else								addr_out4 <= addr_out4_last;
-	 //
-      end
-      //
-   end
-
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_out1;
-   wire	wren_out2;
-   wire	wren_out3;
-   wire	wren_out4;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start	= 0 * BUFFER_NUM_WORDS + 2;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop	= 1 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start	= 0 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop	= 1 * BUFFER_NUM_WORDS + 2;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start	= 1 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop	= 2 * BUFFER_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start	= 1 * BUFFER_NUM_WORDS + 4;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop	= 2 * BUFFER_NUM_WORDS + 3;
-
-   assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
-   assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
-   assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
-   assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
-
-   assign r_dbl_wren				= wren_out1;
-   assign s_dbl_wren				= wren_out1;
-   assign r_plus_s_wren			= wren_out2;
-   assign u_half_wren			= wren_out3;
-   assign v_half_wren			= wren_out3;
-   assign u_minus_v_wren		= wren_out2;
-   assign v_minus_u_wren		= wren_out2;
-   assign u_minus_v_half_wren	= wren_out4;
-   assign v_minus_u_half_wren	= wren_out4;
-
-
-   //
-   // Adder (r + s)
-   //
-   wire [31: 0] 	    add32_r_plus_s_sum_out;
-   wire 		    add32_r_plus_s_carry_in;
-   wire 		    add32_r_plus_s_carry_out;
-
-   adder32_wrapper add32_r_plus_s
-     (
-      .clk		(clk),
-      .a			(r_din),
-      .b			(s_din),
-      .s			(add32_r_plus_s_sum_out),
-      .c_in		(add32_r_plus_s_carry_in),
-      .c_out	(add32_r_plus_s_carry_out)
-      );
-
-   //
-   // Subtractor (u - v)
-   //
-   wire [31: 0] 	    sub32_u_minus_v_difference_out;
-   wire 		    sub32_u_minus_v_borrow_in;
-   wire 		    sub32_u_minus_v_borrow_out;
-
-   subtractor32_wrapper sub32_u_minus_v
-     (
-      .clk		(clk),
-      .a			(u_din),
-      .b			(v_din),
-      .d			(sub32_u_minus_v_difference_out),
-      .b_in		(sub32_u_minus_v_borrow_in),
-      .b_out	(sub32_u_minus_v_borrow_out)
-      );
-
-   //
-   // Subtractor (v - u)
-   //
-   wire [31: 0] 	    sub32_v_minus_u_difference_out;
-   wire 		    sub32_v_minus_u_borrow_in;
-   wire 		    sub32_v_minus_u_borrow_out;
-
-   subtractor32_wrapper sub32_v_minus_u
-     (
-      .clk		(clk),
-      .a			(v_din),
-      .b			(u_din),
-      .d			(sub32_v_minus_u_difference_out),
-      .b_in		(sub32_v_minus_u_borrow_in),
-      .b_out	(sub32_v_minus_u_borrow_out)
-      );
-
-
-   //
-   // Carry & Borrow Masking Logic
-   //
-   reg 			    mask_carry_borrow;
-
-   always @(posedge clk)
-     //
-     mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
-			  1'b0 : 1'b1;
-
-   assign add32_r_plus_s_carry_in   = add32_r_plus_s_carry_out   & ~mask_carry_borrow;
-   assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
-   assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
-
-
-   //
-   // Carry Bits
-   //
-   reg 			    r_dbl_carry;
-   reg 			    s_dbl_carry;
-   reg 			    u_half_carry;
-   reg 			    v_half_carry;
-   reg 			    u_minus_v_half_carry;
-   reg 			    v_minus_u_half_carry;
-
-   always @(posedge clk) begin
-
-      r_dbl_carry					<= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
-							   r_din[31] : 1'b0;
-
-      s_dbl_carry					<= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
-							   s_din[31] : 1'b0;
-
-      u_half_carry				<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
-						   u_din[0] : 1'b0;
-
-      v_half_carry				<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
-						   v_din[0] : 1'b0;
-
-      u_minus_v_half_carry		<= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
-					   u_minus_v_din[0] : 1'b0;
-
-      v_minus_u_half_carry		<= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
-					   v_minus_u_din[0] : 1'b0;
-
-   end
-
-
-   //
-   // Data Mapper
-   //
-   assign r_dbl_dout				= {r_din[30:0], r_dbl_carry};
-   assign s_dbl_dout				= {s_din[30:0], s_dbl_carry};
-   assign r_plus_s_dout			= add32_r_plus_s_sum_out;
-   assign u_half_dout			= {u_half_carry, u_din[31:1]};
-   assign v_half_dout			= {v_half_carry, v_din[31:1]};
-   assign u_minus_v_dout		= sub32_u_minus_v_difference_out;
-   assign v_minus_u_dout		= sub32_v_minus_u_difference_out;
-   assign u_minus_v_half_dout	= {u_minus_v_half_carry, u_minus_v_din[31:1]};
-   assign v_minus_u_half_dout	= {v_minus_u_half_carry, v_minus_u_din[31:1]};
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
deleted file mode 100644
index ede2fc1..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
+++ /dev/null
@@ -1,257 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_update
-  (
-   clk, rst_n,
-   ena, rdy,
-
-   u_gt_v, v_eq_1,
-   u_is_even, v_is_even,
-
-   r_addr, r_wren, r_dout,
-   s_addr, s_wren, s_dout,
-   u_addr, u_wren, u_dout,
-   v_addr, v_wren, v_dout,
-
-   r_dbl_addr,          r_dbl_din,
-   s_dbl_addr,          s_dbl_din,
-   r_plus_s_addr,       r_plus_s_din,
-   u_half_addr,         u_half_din,
-   v_half_addr,         v_half_din,
-   u_minus_v_half_addr, u_minus_v_half_din,
-   v_minus_u_half_addr, v_minus_u_half_din
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= BUFFER_NUM_WORDS + 3;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   input		wire									u_gt_v;
-   input		wire									v_eq_1;
-   input		wire									u_is_even;
-   input		wire									v_is_even;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_addr;
-
-   output	wire 										r_wren;
-   output	wire 										s_wren;
-   output	wire 										u_wren;
-   output	wire 										v_wren;
-
-   output	wire [              32-1:0] 							r_dout;
-   output	wire [              32-1:0] 							s_dout;
-   output	wire [              32-1:0] 							u_dout;
-   output	wire [              32-1:0] 							v_dout;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_dbl_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							s_dbl_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							r_plus_s_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_minus_v_half_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_minus_u_half_addr;
-
-   input		wire [              32-1:0] 						r_dbl_din;
-   input		wire [              32-1:0] 						s_dbl_din;
-   input		wire [              32-1:0] 						r_plus_s_din;
-   input		wire [              32-1:0] 						u_half_din;
-   input		wire [              32-1:0] 						v_half_din;
-   input		wire [              32-1:0] 						u_minus_v_half_din;
-   input		wire [              32-1:0] 						v_minus_u_half_din;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_in;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_max		= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_next	= (addr_in < addr_in_max) ?
-												addr_in + 1'b1 : addr_in_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_max	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_next	= (addr_out < addr_out_max) ?
-												addr_out + 1'b1 : addr_out_zero;
-
-   assign r_addr					= addr_out;
-   assign s_addr					= addr_out;
-   assign u_addr					= addr_out;
-   assign v_addr					= addr_out;
-
-   assign r_dbl_addr				= addr_in;
-   assign s_dbl_addr				= addr_in;
-   assign r_plus_s_addr			= addr_in;
-   assign u_half_addr			= addr_in;
-   assign v_half_addr			= addr_in;
-   assign u_minus_v_half_addr	= addr_in;
-   assign v_minus_u_half_addr	= addr_in;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment Logic
-   //
-   wire 											inc_addr_in;
-   wire 											inc_addr_out;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_start	= 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_stop		= BUFFER_NUM_WORDS;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out_start	= 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out_stop	= BUFFER_NUM_WORDS + 1;
-
-   assign inc_addr_in  = (proc_cnt >= cnt_inc_addr_in_start)  && (proc_cnt <= cnt_inc_addr_in_stop);
-   assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
-   always @(posedge clk) begin
-      //
-      if (inc_addr_in)	addr_in <= addr_in_next;
-      else					addr_in <= addr_in_zero;
-      //
-      if (inc_addr_out)	addr_out <= addr_out_next;
-      else					addr_out <= addr_out_zero;
-      //
-   end
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_out;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out_start	= 2;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop		= BUFFER_NUM_WORDS + 1;
-
-   assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
-   reg 			    r_wren_allow;
-   reg 			    s_wren_allow;
-   reg 			    u_wren_allow;
-   reg 			    v_wren_allow;
-
-   assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy;
-   assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy;
-   assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy;
-   assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy;
-
-
-   //
-   // Data Logic
-   //
-   reg [31: 0] 		    r_dout_mux;
-   reg [31: 0] 		    s_dout_mux;
-   reg [31: 0] 		    u_dout_mux;
-   reg [31: 0] 		    v_dout_mux;
-
-   assign r_dout = r_dout_mux;
-   assign s_dout = s_dout_mux;
-   assign u_dout = u_dout_mux;
-   assign v_dout = v_dout_mux;
-
-   always @(*) begin
-      //
-      // r, s, u, v
-      //
-      if (u_is_even) begin
-	 //
-	 u_dout_mux		= u_half_din;
-	 v_dout_mux		= {32{1'bX}};
-	 r_dout_mux		= {32{1'bX}};
-	 s_dout_mux		= s_dbl_din;
-	 //
-	 u_wren_allow	= 1'b1;
-	 v_wren_allow	= 1'b0;
-	 r_wren_allow	= 1'b0;
-	 s_wren_allow	= 1'b1;
-	 //
-      end else begin
-	 //
-	 if (v_is_even) begin
-	    //
-	    u_dout_mux		= {32{1'bX}};
-	 v_dout_mux		= v_half_din;
-	 r_dout_mux		= r_dbl_din;
-	 s_dout_mux		= {32{1'bX}};
-	 //
-	 u_wren_allow	= 1'b0;
-	 v_wren_allow	= 1'b1;
-	 r_wren_allow	= 1'b1;
-	 s_wren_allow	= 1'b0;
-	 //
-      end else begin
-	 //
-	 u_dout_mux		=  u_gt_v ? u_minus_v_half_din : {32{1'bX}};
-	  v_dout_mux		=  u_gt_v ? {32{1'bX}}         : v_minus_u_half_din;
-	  r_dout_mux		=  u_gt_v ? r_plus_s_din       : r_dbl_din;
-	  s_dout_mux		=  u_gt_v ? s_dbl_din          : r_plus_s_din;
-	  //
-	  u_wren_allow	=  u_gt_v;
-	  v_wren_allow	= !u_gt_v;
-	  r_wren_allow	=  1'b1;
-	  s_wren_allow	=  1'b1;
-	  //
-       end
-	 //
-      end
-      //
-   end
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
deleted file mode 100644
index b64b8e7..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
+++ /dev/null
@@ -1,328 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_precalc
-  (
-   clk, rst_n,
-   ena, rdy,
-
-   k,
-
-   s_is_odd, k_is_nul,
-
-   r_addr, r_din, r_wren, r_dout,
-   s_addr, s_din,
-   u_addr,        u_wren, u_dout,
-   v_addr,        v_wren, v_dout,
-   q_addr, q_din
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter OPERAND_NUM_WORDS	= 8;
-   parameter OPERAND_ADDR_BITS	= 3;
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-   parameter K_NUM_BITS				= 10;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= 2 * BUFFER_NUM_WORDS + 4;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   input		wire [       K_NUM_BITS-1:0] 						k;
-
-   output	wire 										s_is_odd;
-   output	wire 										k_is_nul;
-
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							r_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [ BUFFER_ADDR_BITS-1:0] 							v_addr;
-   output	wire [OPERAND_ADDR_BITS-1:0] 							q_addr;
-
-   input		wire [              32-1:0] 						r_din;
-   input		wire [              32-1:0] 						s_din;
-   input		wire [              32-1:0] 						q_din;
-
-   output	wire 										r_wren;
-   output	wire 										u_wren;
-   output	wire 										v_wren;
-
-   output	wire [              32-1:0] 							r_dout;
-   output	wire [              32-1:0] 							u_dout;
-   output	wire [              32-1:0] 							v_dout;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [ BUFFER_ADDR_BITS-1:0] 									addr_in_buf;
-   reg [OPERAND_ADDR_BITS-1:0] 									addr_in_op;
-   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out1;
-   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out2;
-   reg [ BUFFER_ADDR_BITS-1:0] 									addr_out3;
-
-   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_last	= BUFFER_NUM_WORDS - 1;
-   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_next	= (addr_in_buf < addr_in_buf_last) ?
-												addr_in_buf + 1'b1 : addr_in_buf_zero;
-   wire [ BUFFER_ADDR_BITS-1:0] 								addr_in_buf_prev	= (addr_in_buf > addr_in_buf_zero) ?
-												addr_in_buf - 1'b1 : addr_in_buf_zero;
-
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_last	= OPERAND_NUM_WORDS - 1;
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_zero	= {OPERAND_ADDR_BITS{1'b0}};
-   wire [OPERAND_ADDR_BITS-1:0] 								addr_in_op_next	= (addr_in_op < addr_in_op_last) ?
-												addr_in_op + 1'b1 : addr_in_op_zero;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_next	= (addr_out1 < addr_out1_last) ?
-												addr_out1 + 1'b1 : addr_out1_zero;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out1_prev	= (addr_out1 > addr_out1_zero) ?
-												addr_out1 - 1'b1 : addr_out1_zero;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out2_prev	= (addr_out2 > addr_out2_zero) ?
-												addr_out2 - 1'b1 : addr_out2_last;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_last	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out3_prev	= (addr_out3 > addr_out3_zero) ?
-												addr_out3 - 1'b1 : addr_out3_last;
-
-
-   assign s_addr = addr_in_buf;
-   assign q_addr = addr_in_op;
-   assign r_addr = addr_out1;
-   assign u_addr = addr_out2;
-   assign v_addr = addr_out3;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment/Decrement Logic
-   //
-   wire 											inc_addr_buf_in;
-   wire 											dec_addr_buf_in;
-   wire 											inc_addr_op_in;
-   wire 											inc_addr_out1;
-   wire 											dec_addr_out1;
-   wire 											dec_addr_out2;
-   wire 											dec_addr_out3;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_calc_flags					= 0 * BUFFER_NUM_WORDS + 2;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_buf_in_start	= 0 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_buf_in_stop	= 1 * BUFFER_NUM_WORDS - 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_buf_in_start	= 1 * BUFFER_NUM_WORDS + 0;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_buf_in_stop	= 2 * BUFFER_NUM_WORDS - 2;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_op_in_start	= 0 * OPERAND_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_op_in_stop		= 1 * OPERAND_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_start		= 0 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out1_stop		= 1 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out1_start		= 1 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out1_stop		= 2 * BUFFER_NUM_WORDS + 1;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_start		= 1 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out2_stop		= 2 * BUFFER_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_start		= 1 * BUFFER_NUM_WORDS + 4;
-   wire [PROC_CNT_BITS-1:0] 									cnt_dec_addr_out3_stop		= 2 * BUFFER_NUM_WORDS + 3;
-
-   assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
-   assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
-   assign inc_addr_op_in  = (proc_cnt >= cnt_inc_addr_op_in_start)  && (proc_cnt <= cnt_inc_addr_op_in_stop);
-   assign inc_addr_out1   = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
-   assign dec_addr_out1   = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
-   assign dec_addr_out2   = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
-   assign dec_addr_out3   = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
-
-   always @(posedge clk) begin
-      //
-      if (rdy) begin
-	 //
-	 addr_in_buf		<= addr_in_buf_zero;
-	 addr_in_op		<= addr_in_op_zero;
-	 addr_out1		<= addr_out1_zero;
-	 addr_out2		<= addr_out2_last;
-	 addr_out3		<= addr_out3_last;
-	 //
-      end else begin
-	 //
-	 if (inc_addr_buf_in)			addr_in_buf	<= addr_in_buf_next;
-	 else if (dec_addr_buf_in)	addr_in_buf	<= addr_in_buf_prev;
-	 //
-	 if (inc_addr_op_in)			addr_in_op	<= addr_in_op_next;
-	 else								addr_in_op	<= addr_in_op_zero;
-	 //
-	 if (inc_addr_out1)			addr_out1	<= addr_out1_next;
-	 else if (dec_addr_out1)		addr_out1	<= addr_out1_prev;
-	 //
-	 if (dec_addr_out2)			addr_out2	<= addr_out2_prev;
-	 else								addr_out2	<= addr_out2_last;
-	 //
-	 if (dec_addr_out3)			addr_out3	<= addr_out3_prev;
-	 else								addr_out3	<= addr_out3_last;
-	 //
-      end
-      //
-   end
-
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_out1;
-   wire	wren_out2;
-   wire	wren_out3;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start	= 0 * BUFFER_NUM_WORDS + 3;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop	= 1 * BUFFER_NUM_WORDS + 2;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start	= 1 * BUFFER_NUM_WORDS + 1;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop	= 2 * BUFFER_NUM_WORDS + 0;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start	= 1 * BUFFER_NUM_WORDS + 4;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop	= 2 * BUFFER_NUM_WORDS + 3;
-
-   assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
-   assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
-   assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
-
-   assign r_wren = wren_out1;
-   assign u_wren = wren_out2;
-   assign v_wren = wren_out3;
-
-   //
-   // Adder (s + q)
-   //
-   wire [31: 0] 	    q_din_masked;
-   wire [31: 0] 	    add32_s_plus_q_sum_out;
-   wire 		    add32_s_plus_q_carry_in;
-   wire 		    add32_s_plus_q_carry_out;
-
-   adder32_wrapper add32_r_plus_s
-     (
-      .clk		(clk),
-      .a			(s_din),
-      .b			(q_din_masked),
-      .s			(add32_s_plus_q_sum_out),
-      .c_in		(add32_s_plus_q_carry_in),
-      .c_out	(add32_s_plus_q_carry_out)
-      );
-
-
-   //
-   // Carry Masking Logic
-   //
-   wire 		    mask_carry;
-
-   assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
-
-
-   //
-   // Addend Masking Logic
-   //
-   reg 			    q_din_mask;
-
-   always @(posedge clk)
-     q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;
-
-   assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
-
-   assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;
-
-
-   //
-   // Carry Bits
-   //
-   reg 			    s_half_carry;
-   reg 			    s_plus_q_half_carry;
-
-   always @(posedge clk) begin
-      //
-      s_half_carry				<= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
-						   s_din[0] : 1'b0;
-      //
-      s_plus_q_half_carry		<= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
-					   r_din[0] : 1'b0;
-      //
-   end
-
-   //
-   // Data Mapper
-   //
-   assign r_dout = add32_s_plus_q_sum_out;
-   assign u_dout = {s_half_carry,        s_din[31:1]};
-   assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-   //
-   // Output Flags
-   //
-   reg	s_is_odd_reg;
-   reg	k_is_nul_reg;
-
-   assign s_is_odd = s_is_odd_reg;
-   assign k_is_nul = k_is_nul_reg;
-
-   always @(posedge clk)
-     //
-     if (proc_cnt == cnt_calc_flags) begin
-	s_is_odd_reg <= s_din[0];
-	k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
deleted file mode 100644
index b6c63b2..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
+++ /dev/null
@@ -1,153 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_update
-  (
-   clk, rst_n,
-   ena, rdy,
-
-   s_is_odd, k_is_nul,
-
-   s_addr, s_wren, s_dout,
-   u_addr,                 u_din,
-   v_addr,                 v_din
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter BUFFER_NUM_WORDS		= 9;
-   parameter BUFFER_ADDR_BITS		= 4;
-
-
-   //
-   // clog2
-   //
-`include "../modinv_clog2.v"
-
-
-   //
-   // Constants
-   //
-   localparam PROC_NUM_CYCLES	= BUFFER_NUM_WORDS + 3;
-   localparam PROC_CNT_BITS	= clog2(PROC_NUM_CYCLES);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-   input		wire									ena;
-   output	wire 										rdy;
-
-   input		wire									s_is_odd;
-   input		wire									k_is_nul;
-
-   output	wire [BUFFER_ADDR_BITS-1:0] 							s_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							u_addr;
-   output	wire [BUFFER_ADDR_BITS-1:0] 							v_addr;
-
-   output	wire 										s_wren;
-
-   output	wire [              32-1:0] 							s_dout;
-
-   input		wire [              32-1:0] 						u_din;
-   input		wire [              32-1:0] 						v_din;
-
-
-   //
-   // Counter
-   //
-   reg [PROC_CNT_BITS-1:0] 									proc_cnt;
-
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_max	= PROC_NUM_CYCLES - 1;
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_zero	= {PROC_CNT_BITS{1'b0}};
-   wire [PROC_CNT_BITS-1:0] 									proc_cnt_next	= (proc_cnt < proc_cnt_max) ?
-												proc_cnt + 1'b1 : proc_cnt_zero;
-
-   //
-   // Addresses
-   //
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_in;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_max		= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_in_next	= (addr_in < addr_in_max) ?
-												addr_in + 1'b1 : addr_in_zero;
-
-   reg [BUFFER_ADDR_BITS-1:0] 									addr_out;
-
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_max	= BUFFER_NUM_WORDS - 1;
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_zero	= {BUFFER_ADDR_BITS{1'b0}};
-   wire [BUFFER_ADDR_BITS-1:0] 									addr_out_next	= (addr_out < addr_out_max) ?
-												addr_out + 1'b1 : addr_out_zero;
-
-   assign s_addr					= addr_out;
-   assign u_addr					= addr_in;
-   assign v_addr					= addr_in;
-
-
-   //
-   // Ready Flag
-   //
-   assign rdy = (proc_cnt == proc_cnt_zero);
-
-
-   //
-   // Address Increment Logic
-   //
-   wire 											inc_addr_in;
-   wire 											inc_addr_out;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_start	= 1;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_in_stop		= BUFFER_NUM_WORDS;
-
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out_start	= 2;
-   wire [PROC_CNT_BITS-1:0] 									cnt_inc_addr_out_stop	= BUFFER_NUM_WORDS + 1;
-
-   assign inc_addr_in  = (proc_cnt >= cnt_inc_addr_in_start)  && (proc_cnt <= cnt_inc_addr_in_stop);
-   assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
-   always @(posedge clk) begin
-      //
-      if (inc_addr_in)	addr_in <= addr_in_next;
-      else					addr_in <= addr_in_zero;
-      //
-      if (inc_addr_out)	addr_out <= addr_out_next;
-      else					addr_out <= addr_out_zero;
-      //
-   end
-
-   //
-   // Write Enable Logic
-   //
-   wire	wren_out;
-
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out_start	= 2;
-   wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop		= BUFFER_NUM_WORDS + 1;
-
-   assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
-   assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy;
-
-
-   //
-   // Data Logic
-   //
-   assign s_dout = s_is_odd ? v_din : u_din;
-
-
-   //
-   // Primary Counter Logic
-   //
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
-     else begin
-	if (!rdy)		proc_cnt <= proc_cnt_next;
-	else if (ena)	proc_cnt <= proc_cnt_next;
-     end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v
deleted file mode 100644
index 04a7739..0000000
--- a/rtl/modular/modular_invertor/modinv_clog2.v
+++ /dev/null
@@ -1,10 +0,0 @@
-function	integer clog2;
-   input	integer value;
-   integer 		result;
-   begin
-      value = value - 1;
-      for (result = 0; value > 0; result = result + 1)
-	value = value >> 1;
-      clog2 = result;
-   end
-endfunction
diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v
deleted file mode 100644
index 9fafa2d..0000000
--- a/rtl/modular/modular_invertor/modular_invertor.v
+++ /dev/null
@@ -1,981 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_invertor.v
-// -----------------------------------------------------------------------------
-// Modular invertor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_invertor
-  (
-   clk, rst_n,
-   ena, rdy,
-   a_addr, q_addr, a1_addr, a1_wren,
-   a_din, q_din, a1_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter MAX_OPERAND_WIDTH = 256;
-
-
-   //
-   // clog2
-   //
-`include "modinv_clog2.v"
-
-
-   //
-   // More Parameters
-   //
-   localparam OPERAND_NUM_WORDS	= MAX_OPERAND_WIDTH / 32;
-   localparam OPERAND_ADDR_BITS	= clog2(OPERAND_NUM_WORDS);
-
-   localparam BUFFER_NUM_WORDS	= OPERAND_NUM_WORDS + 1;
-   localparam BUFFER_ADDR_BITS	= clog2(BUFFER_NUM_WORDS);
-
-   localparam LOOP_NUM_ROUNDS		= 2 * MAX_OPERAND_WIDTH;
-   localparam ROUND_COUNTER_BITS	= clog2(LOOP_NUM_ROUNDS);
-
-   localparam K_NUM_BITS			= clog2(LOOP_NUM_ROUNDS + 1);
-
-
-   //
-   // Ports
-   //
-   input		wire									clk;
-   input		wire									rst_n;
-
-   input		wire									ena;
-   output	wire 										rdy;
-
-   output	wire [OPERAND_ADDR_BITS-1:0] 							a_addr;
-   output	reg [OPERAND_ADDR_BITS-1:0] 							q_addr;
-   output	wire [OPERAND_ADDR_BITS-1:0] 							a1_addr;
-   output	wire 										a1_wren;
-
-   input		wire [32-1:0] 								a_din;
-   input		wire [32-1:0] 								q_din;
-   output	wire [32-1:0] 									a1_dout;
-
-
-   //
-   // "Redundant" Power of 2 (K)
-   //
-   reg [K_NUM_BITS-1:0] 									k;
-
-
-   //
-   // Buffers
-   //
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_r_wr_addr;
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_r_rd_addr;
-   reg 												buf_r_wr_en;
-   reg [              32-1:0] 									buf_r_wr_din;
-   wire [              32-1:0] 									buf_r_wr_dout;
-   wire [              32-1:0] 									buf_r_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_r
-     (	.clk(clk),
-	.a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din),
-	.b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout)
-	);
-
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_s_wr_addr;
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_s_rd_addr;
-   reg 												buf_s_wr_en;
-   reg [              32-1:0] 									buf_s_wr_din;
-   wire [              32-1:0] 									buf_s_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_s
-     (	.clk(clk),
-	.a_addr(buf_s_wr_addr), .a_out(),              .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din),
-	.b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout)
-	);
-
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_u_wr_addr;
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_u_rd_addr;
-   reg 												buf_u_wr_en;
-   reg [              32-1:0] 									buf_u_wr_din;
-   wire [              32-1:0] 									buf_u_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_u
-     (	.clk(clk),
-	.a_addr(buf_u_wr_addr), .a_out(),              .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din),
-	.b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout)
-	);
-
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_v_wr_addr;
-   reg [BUFFER_ADDR_BITS-1:0] 									buf_v_rd_addr;
-   reg 												buf_v_wr_en;
-   reg [              32-1:0] 									buf_v_wr_din;
-   wire [              32-1:0] 									buf_v_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_v
-     (	.clk(clk),
-	.a_addr(buf_v_wr_addr), .a_out(),              .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din),
-	.b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_r_dbl_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_r_dbl_rd_addr;
-   wire 											buf_r_dbl_wr_en;
-   wire [              32-1:0] 									buf_r_dbl_wr_din;
-   wire [              32-1:0] 									buf_r_dbl_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_r_dbl
-     (	.clk(clk),
-	.a_addr(buf_r_dbl_wr_addr), .a_out(),                  .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din),
-	.b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_s_dbl_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_s_dbl_rd_addr;
-   wire 											buf_s_dbl_wr_en;
-   wire [              32-1:0] 									buf_s_dbl_wr_din;
-   wire [              32-1:0] 									buf_s_dbl_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_s_dbl
-     (	.clk(clk),
-	.a_addr(buf_s_dbl_wr_addr), .a_out(),                  .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din),
-	.b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_r_plus_s_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_r_plus_s_rd_addr;
-   wire 											buf_r_plus_s_wr_en;
-   wire [              32-1:0] 									buf_r_plus_s_wr_din;
-   wire [              32-1:0] 									buf_r_plus_s_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_r_plus_s
-     (	.clk(clk),
-	.a_addr(buf_r_plus_s_wr_addr), .a_out(),                     .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din),
-	.b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_minus_v_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_minus_v_rd_addr;
-   wire 											buf_u_minus_v_wr_en;
-   wire [              32-1:0] 									buf_u_minus_v_wr_din;
-   wire [              32-1:0] 									buf_u_minus_v_wr_dout;
-
-   assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_u_minus_v
-     (	.clk(clk),
-	.a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din),
-	.b_addr(buf_u_minus_v_rd_addr), .b_out()
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_minus_u_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_minus_u_rd_addr;
-   wire 											buf_v_minus_u_wr_en;
-   wire [              32-1:0] 									buf_v_minus_u_wr_din;
-   wire [              32-1:0] 									buf_v_minus_u_wr_dout;
-
-   assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_v_minus_u
-     (	.clk(clk),
-	.a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din),
-	.b_addr(buf_v_minus_u_rd_addr), .b_out()
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_half_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_half_rd_addr;
-   wire 											buf_u_half_wr_en;
-   wire [              32-1:0] 									buf_u_half_wr_din;
-   wire [              32-1:0] 									buf_u_half_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_u_half
-     (	.clk(clk),
-	.a_addr(buf_u_half_wr_addr), .a_out(),                   .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din),
-	.b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_half_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_half_rd_addr;
-   wire 											buf_v_half_wr_en;
-   wire [              32-1:0] 									buf_v_half_wr_din;
-   wire [              32-1:0] 									buf_v_half_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_v_half
-     (	.clk(clk),
-	.a_addr(buf_v_half_wr_addr), .a_out(),                   .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din),
-	.b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_minus_v_half_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_u_minus_v_half_rd_addr;
-   wire 											buf_u_minus_v_half_wr_en;
-   wire [              32-1:0] 									buf_u_minus_v_half_wr_din;
-   wire [              32-1:0] 									buf_u_minus_v_half_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_u_minus_v_half
-     (	.clk(clk),
-	.a_addr(buf_u_minus_v_half_wr_addr), .a_out(),                           .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din),
-	.b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout)
-	);
-
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_minus_u_half_wr_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									buf_v_minus_u_half_rd_addr;
-   wire 											buf_v_minus_u_half_wr_en;
-   wire [              32-1:0] 									buf_v_minus_u_half_wr_din;
-   wire [              32-1:0] 									buf_v_minus_u_half_rd_dout;
-
-   bram_1rw_1ro_readfirst #
-     (	.MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
-	)
-   buf_v_minus_u_half
-     (	.clk(clk),
-	.a_addr(buf_v_minus_u_half_wr_addr), .a_out(),                           .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din),
-	.b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout)
-	);
-
-
-   //
-   // Helper Modules
-   //
-   wire 											helper_init_ena;
-   wire 											helper_invert_precalc_ena;
-   wire 											helper_invert_compare_ena;
-   wire 											helper_invert_update_ena;
-   wire 											helper_reduce_precalc_ena;
-   wire 											helper_reduce_update_ena;
-   wire 											helper_copy_ena;
-
-   wire 											helper_init_rdy;
-   wire 											helper_invert_precalc_rdy;
-   wire 											helper_invert_compare_rdy;
-   wire 											helper_invert_update_rdy;
-   wire 											helper_reduce_precalc_rdy;
-   wire 											helper_reduce_update_rdy;
-   wire 											helper_copy_rdy;
-
-   wire 											helper_init_done				= helper_init_rdy           && !helper_init_ena;
-   wire 											helper_invert_precalc_done	= helper_invert_precalc_rdy && !helper_invert_precalc_ena;
-   wire 											helper_invert_compare_done	= helper_invert_compare_rdy && !helper_invert_compare_ena;
-   wire 											helper_invert_update_done	= helper_invert_update_rdy  && !helper_invert_update_ena;
-   wire 											helper_reduce_precalc_done	= helper_reduce_precalc_rdy && !helper_reduce_precalc_ena;
-   wire 											helper_reduce_update_done	= helper_reduce_update_rdy  && !helper_reduce_update_ena;
-   wire 											helper_copy_done				= helper_copy_rdy           && !helper_copy_ena;
-
-
-   //
-   // Helper Module - Initialization
-   //
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_init_r_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_init_s_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_init_u_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_init_v_addr;
-   wire [OPERAND_ADDR_BITS-1:0] 								helper_init_q_addr;
-
-   wire 											helper_init_r_wren;
-   wire 											helper_init_s_wren;
-   wire 											helper_init_u_wren;
-   wire 											helper_init_v_wren;
-
-   wire [              32-1:0] 									helper_init_r_data;
-   wire [              32-1:0] 									helper_init_s_data;
-   wire [              32-1:0] 									helper_init_u_data;
-   wire [              32-1:0] 									helper_init_v_data;
-
-   modinv_helper_init #
-     (
-      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
-      .OPERAND_ADDR_BITS	(OPERAND_ADDR_BITS),
-
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_init
-     (
-      .clk 		(clk),
-      .rst_n	(rst_n),
-
-      .ena 		(helper_init_ena),
-      .rdy 		(helper_init_rdy),
-
-      .a_addr	(a_addr),
-      .q_addr	(helper_init_q_addr),
-
-      .r_addr	(helper_init_r_addr),
-      .s_addr	(helper_init_s_addr),
-      .u_addr	(helper_init_u_addr),
-      .v_addr	(helper_init_v_addr),
-
-      .q_din	(q_din),
-      .a_din	(a_din),
-
-      .r_dout	(helper_init_r_data),
-      .s_dout	(helper_init_s_data),
-      .u_dout	(helper_init_u_data),
-      .v_dout	(helper_init_v_data),
-
-      .r_wren	(helper_init_r_wren),
-      .s_wren	(helper_init_s_wren),
-      .u_wren	(helper_init_u_wren),
-      .v_wren	(helper_init_v_wren)
-      );
-
-
-   //
-   // Helper Module - Inversion Pre-Calculation
-   //
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_precalc_r_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_precalc_s_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_precalc_u_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_precalc_v_addr;
-
-   modinv_helper_invert_precalc #
-     (
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_invert_precalc
-     (
-      .clk 							(clk),
-      .rst_n						(rst_n),
-
-      .ena 							(helper_invert_precalc_ena),
-      .rdy 							(helper_invert_precalc_rdy),
-
-      .r_addr						(helper_invert_precalc_r_addr),
-      .s_addr						(helper_invert_precalc_s_addr),
-      .u_addr						(helper_invert_precalc_u_addr),
-      .v_addr						(helper_invert_precalc_v_addr),
-
-      .r_din						(buf_r_rd_dout),
-      .s_din						(buf_s_rd_dout),
-      .u_din						(buf_u_rd_dout),
-      .v_din						(buf_v_rd_dout),
-
-      .r_dbl_addr					(buf_r_dbl_wr_addr),
-      .s_dbl_addr					(buf_s_dbl_wr_addr),
-      .r_plus_s_addr				(buf_r_plus_s_wr_addr),
-
-      .u_half_addr				(buf_u_half_wr_addr),
-      .v_half_addr				(buf_v_half_wr_addr),
-      .u_minus_v_addr			(buf_u_minus_v_wr_addr),
-      .v_minus_u_addr			(buf_v_minus_u_wr_addr),
-      .u_minus_v_half_addr		(buf_u_minus_v_half_wr_addr),
-      .v_minus_u_half_addr		(buf_v_minus_u_half_wr_addr),
-
-      .r_dbl_dout					(buf_r_dbl_wr_din),
-      .s_dbl_dout					(buf_s_dbl_wr_din),
-      .r_plus_s_dout				(buf_r_plus_s_wr_din),
-
-      .u_half_dout				(buf_u_half_wr_din),
-      .v_half_dout				(buf_v_half_wr_din),
-      .u_minus_v_dout			(buf_u_minus_v_wr_din),
-      .v_minus_u_dout			(buf_v_minus_u_wr_din),
-      .u_minus_v_half_dout		(buf_u_minus_v_half_wr_din),
-      .v_minus_u_half_dout		(buf_v_minus_u_half_wr_din),
-
-      .r_dbl_wren					(buf_r_dbl_wr_en),
-      .s_dbl_wren					(buf_s_dbl_wr_en),
-      .r_plus_s_wren				(buf_r_plus_s_wr_en),
-
-      .u_half_wren				(buf_u_half_wr_en),
-      .v_half_wren				(buf_v_half_wr_en),
-      .u_minus_v_wren			(buf_u_minus_v_wr_en),
-      .v_minus_u_wren			(buf_v_minus_u_wr_en),
-      .u_minus_v_half_wren		(buf_u_minus_v_half_wr_en),
-      .v_minus_u_half_wren		(buf_v_minus_u_half_wr_en),
-
-      .u_minus_v_din				(buf_u_minus_v_wr_dout),
-      .v_minus_u_din				(buf_v_minus_u_wr_dout)
-      );
-
-
-   //
-   // Helper Module - Inversion Comparison
-   //
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_compare_u_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_compare_v_addr;
-
-   wire 											flag_invert_u_gt_v;
-   wire 											flag_invert_v_eq_1;
-   wire 											flag_invert_u_is_even;
-   wire 											flag_invert_v_is_even;
-
-   modinv_helper_invert_compare #
-     (
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_invert_compare
-     (
-      .clk 			(clk),
-      .rst_n		(rst_n),
-
-      .ena 			(helper_invert_compare_ena),
-      .rdy 			(helper_invert_compare_rdy),
-
-      .u_addr		(helper_invert_compare_u_addr),
-      .v_addr		(helper_invert_compare_v_addr),
-
-      .u_din		(buf_u_rd_dout),
-      .v_din		(buf_v_rd_dout),
-
-      .u_gt_v		(flag_invert_u_gt_v),
-      .v_eq_1		(flag_invert_v_eq_1),
-      .u_is_even	(flag_invert_u_is_even),
-      .v_is_even	(flag_invert_v_is_even)
-      );
-
-
-   //
-   // Helper Module - Inversion Update
-   //
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_update_r_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_update_s_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_update_u_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_invert_update_v_addr;
-
-   wire 											helper_invert_update_r_wren;
-   wire 											helper_invert_update_s_wren;
-   wire 											helper_invert_update_u_wren;
-   wire 											helper_invert_update_v_wren;
-
-   wire [              32-1:0] 									helper_invert_update_r_data;
-   wire [              32-1:0] 									helper_invert_update_s_data;
-   wire [              32-1:0] 									helper_invert_update_u_data;
-   wire [              32-1:0] 									helper_invert_update_v_data;
-
-   modinv_helper_invert_update #
-     (
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_invert_update
-     (
-      .clk 							(clk),
-      .rst_n						(rst_n),
-
-      .ena 							(helper_invert_update_ena),
-      .rdy 							(helper_invert_update_rdy),
-
-      .u_gt_v						(flag_invert_u_gt_v),
-      .v_eq_1						(flag_invert_v_eq_1),
-      .u_is_even					(flag_invert_u_is_even),
-      .v_is_even					(flag_invert_v_is_even),
-
-      .r_addr						(helper_invert_update_r_addr),
-      .s_addr						(helper_invert_update_s_addr),
-      .u_addr						(helper_invert_update_u_addr),
-      .v_addr						(helper_invert_update_v_addr),
-
-      .r_wren						(helper_invert_update_r_wren),
-      .s_wren						(helper_invert_update_s_wren),
-      .u_wren						(helper_invert_update_u_wren),
-      .v_wren						(helper_invert_update_v_wren),
-
-      .r_dout						(helper_invert_update_r_data),
-      .s_dout						(helper_invert_update_s_data),
-      .u_dout						(helper_invert_update_u_data),
-      .v_dout						(helper_invert_update_v_data),
-
-      .r_dbl_addr					(buf_r_dbl_rd_addr),
-      .s_dbl_addr					(buf_s_dbl_rd_addr),
-      .r_plus_s_addr				(buf_r_plus_s_rd_addr),
-      .u_half_addr				(buf_u_half_rd_addr),
-      .v_half_addr				(buf_v_half_rd_addr),
-      .u_minus_v_half_addr		(buf_u_minus_v_half_rd_addr),
-      .v_minus_u_half_addr		(buf_v_minus_u_half_rd_addr),
-
-      .r_dbl_din					(buf_r_dbl_rd_dout),
-      .s_dbl_din					(buf_s_dbl_rd_dout),
-      .r_plus_s_din				(buf_r_plus_s_rd_dout),
-      .u_half_din					(buf_u_half_rd_dout),
-      .v_half_din					(buf_v_half_rd_dout),
-      .u_minus_v_half_din		(buf_u_minus_v_half_rd_dout),
-      .v_minus_u_half_din		(buf_v_minus_u_half_rd_dout)
-      );
-
-
-   //
-   // Helper Module - Reduction Pre-Calculation
-   //
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_reduce_precalc_r_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_reduce_precalc_s_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_reduce_precalc_u_addr;
-   wire [ BUFFER_ADDR_BITS-1:0] 								helper_reduce_precalc_v_addr;
-   wire [OPERAND_ADDR_BITS-1:0] 								helper_reduce_precalc_q_addr;
-
-   wire 											helper_reduce_precalc_r_wren;
-   wire 											helper_reduce_precalc_u_wren;
-   wire 											helper_reduce_precalc_v_wren;
-
-   wire [              32-1:0] 									helper_reduce_precalc_r_data;
-   wire [              32-1:0] 									helper_reduce_precalc_u_data;
-   wire [              32-1:0] 									helper_reduce_precalc_v_data;
-
-   wire 											flag_reduce_s_is_odd;
-   wire 											flag_invert_k_is_nul;
-
-   modinv_helper_reduce_precalc #
-     (
-      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
-      .OPERAND_ADDR_BITS	(OPERAND_ADDR_BITS),
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS),
-      .K_NUM_BITS				(K_NUM_BITS)
-      )
-   helper_reduce_precalc
-     (
-      .clk 			(clk),
-      .rst_n		(rst_n),
-
-      .ena 			(helper_reduce_precalc_ena),
-      .rdy 			(helper_reduce_precalc_rdy),
-
-      .r_addr		(helper_reduce_precalc_r_addr),
-      .s_addr		(helper_reduce_precalc_s_addr),
-      .u_addr		(helper_reduce_precalc_u_addr),
-      .v_addr		(helper_reduce_precalc_v_addr),
-      .q_addr		(helper_reduce_precalc_q_addr),
-
-      .k				(k),
-
-      .s_is_odd	(flag_reduce_s_is_odd),
-      .k_is_nul	(flag_reduce_k_is_nul),
-
-      .r_din		(buf_r_wr_dout),
-      .s_din		(buf_s_rd_dout),
-      .q_din		(q_din),
-
-      .r_wren		(helper_reduce_precalc_r_wren),
-      .u_wren		(helper_reduce_precalc_u_wren),
-      .v_wren		(helper_reduce_precalc_v_wren),
-
-      .r_dout		(helper_reduce_precalc_r_data),
-      .u_dout		(helper_reduce_precalc_u_data),
-      .v_dout		(helper_reduce_precalc_v_data)
-      );
-
-   //
-   // Helper Module - Reduction Update
-   //
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_reduce_update_s_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_reduce_update_u_addr;
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_reduce_update_v_addr;
-
-   wire 											helper_reduce_update_s_wren;
-
-   wire [              32-1:0] 									helper_reduce_update_s_data;
-
-   modinv_helper_reduce_update #
-     (
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_reduce_update
-     (
-      .clk 							(clk),
-      .rst_n						(rst_n),
-
-      .ena 							(helper_reduce_update_ena),
-      .rdy 							(helper_reduce_update_rdy),
-
-      .s_is_odd					(flag_reduce_s_is_odd),
-      .k_is_nul					(flag_reduce_k_is_nul),
-
-      .s_addr						(helper_reduce_update_s_addr),
-      .u_addr						(helper_reduce_update_u_addr),
-      .v_addr						(helper_reduce_update_v_addr),
-
-      .s_wren						(helper_reduce_update_s_wren),
-
-      .s_dout						(helper_reduce_update_s_data),
-
-      .u_din						(buf_u_rd_dout),
-      .v_din						(buf_v_rd_dout)
-      );
-
-
-   //
-   // Helper Module - Copying
-   //
-   wire [BUFFER_ADDR_BITS-1:0] 									helper_copy_s_addr;
-
-   modinv_helper_copy #
-     (
-      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS),
-      .OPERAND_ADDR_BITS	(OPERAND_ADDR_BITS),
-
-      .BUFFER_NUM_WORDS		(BUFFER_NUM_WORDS),
-      .BUFFER_ADDR_BITS		(BUFFER_ADDR_BITS)
-      )
-   helper_copy
-     (
-      .clk 		(clk),
-      .rst_n	(rst_n),
-
-      .ena 		(helper_copy_ena),
-      .rdy 		(helper_copy_rdy),
-
-      .s_addr	(helper_copy_s_addr),
-      .a1_addr	(a1_addr),
-
-      .s_din	(buf_s_rd_dout),
-
-      .a1_dout	(a1_dout),
-
-      .a1_wren	(a1_wren)
-      );
-
-
-   //
-   // Round Counter
-   //
-   reg [ROUND_COUNTER_BITS-1:0] 								round_counter;
-   wire [ROUND_COUNTER_BITS-1:0] 								round_counter_max = LOOP_NUM_ROUNDS - 1;
-   wire [ROUND_COUNTER_BITS-1:0] 								round_counter_zero = {ROUND_COUNTER_BITS{1'b0}};
-   wire [ROUND_COUNTER_BITS-1:0] 								round_counter_next =
-												(round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero;
-
-
-   //
-   // FSM
-   //
-   localparam FSM_STATE_IDLE				= 4'd0;
-
-   localparam FSM_STATE_INIT				= 4'd1;
-
-   localparam FSM_STATE_INVERT_PRECALC	= 4'd11;
-   localparam FSM_STATE_INVERT_COMPARE	= 4'd12;
-   localparam FSM_STATE_INVERT_UPDATE	= 4'd13;
-
-   localparam FSM_STATE_REDUCE_PRECALC	= 4'd14;
-   localparam FSM_STATE_REDUCE_UPDATE	= 4'd15;
-
-   localparam FSM_STATE_COPY				= 4'd2;
-
-   localparam FSM_STATE_DONE				= 4'd3;
-
-   reg [3:0] 											fsm_state = FSM_STATE_IDLE;
-   reg [3:0] 											fsm_state_dly = FSM_STATE_IDLE;
-
-   wire 											fsm_state_new = (fsm_state != fsm_state_dly);
-
-   wire [3:0] 											fsm_state_invert_next = (round_counter < round_counter_max) ?
-												FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC;
-
-   wire [3:0] 											fsm_state_reduce_next = (round_counter < round_counter_max) ?
-												FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY;
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
-     else case (fsm_state)
-	    FSM_STATE_IDLE:				fsm_state <= ena                        ? FSM_STATE_INIT           : FSM_STATE_IDLE;
-	    FSM_STATE_INIT:				fsm_state <= helper_init_done           ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT;
-	    FSM_STATE_INVERT_PRECALC:	fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC;
-	    FSM_STATE_INVERT_COMPARE:	fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE  : FSM_STATE_INVERT_COMPARE;
-	    FSM_STATE_INVERT_UPDATE:	fsm_state <= helper_invert_update_done  ? fsm_state_invert_next    : FSM_STATE_INVERT_UPDATE;
-	    FSM_STATE_REDUCE_PRECALC:	fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE  : FSM_STATE_REDUCE_PRECALC;
-	    FSM_STATE_REDUCE_UPDATE:	fsm_state <= helper_reduce_update_done  ? fsm_state_reduce_next    : FSM_STATE_REDUCE_UPDATE;
-	    FSM_STATE_COPY:				fsm_state <= helper_copy_done           ? FSM_STATE_DONE           : FSM_STATE_COPY;
-	    FSM_STATE_DONE:				fsm_state <= FSM_STATE_IDLE;
-	    default:							fsm_state <= FSM_STATE_IDLE;
-	  endcase
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0)	fsm_state_dly <= FSM_STATE_IDLE;
-     else						fsm_state_dly <= fsm_state;
-
-
-   assign helper_init_ena				= (fsm_state == FSM_STATE_INIT)           && fsm_state_new;
-   assign helper_invert_precalc_ena	= (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new;
-   assign helper_invert_compare_ena	= (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new;
-   assign helper_invert_update_ena	= (fsm_state == FSM_STATE_INVERT_UPDATE)  && fsm_state_new;
-   assign helper_reduce_precalc_ena	= (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new;
-   assign helper_reduce_update_ena	= (fsm_state == FSM_STATE_REDUCE_UPDATE)  && fsm_state_new;
-   assign helper_copy_ena				= (fsm_state == FSM_STATE_COPY)           && fsm_state_new;
-
-
-   //
-   // Counter Increment
-   //
-   always @(posedge clk) begin
-      //
-      if ((fsm_state == FSM_STATE_INIT) && helper_init_done)
-	round_counter <= round_counter_zero;
-      //
-      if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done)
-	round_counter <= round_counter_next;
-      //
-      if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done)
-	round_counter <= round_counter_next;
-      //
-   end
-
-
-   //
-   // Q Address Selector
-   //
-   always @(*) begin
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				q_addr = helper_init_q_addr;
-	FSM_STATE_REDUCE_PRECALC:	q_addr = helper_reduce_precalc_q_addr;
-	default:							q_addr = {OPERAND_ADDR_BITS{1'bX}};
-      endcase
-      //
-   end
-
-
-   //
-   // Buffer Address Selector
-   //
-   always @(*) begin
-      //
-      // Write Ports
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_r_wr_addr = helper_init_r_addr;
-	FSM_STATE_INVERT_UPDATE:	buf_r_wr_addr = helper_invert_update_r_addr;
-	FSM_STATE_REDUCE_PRECALC:	buf_r_wr_addr = helper_reduce_precalc_r_addr;
-	default:							buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_s_wr_addr = helper_init_s_addr;
-	FSM_STATE_INVERT_UPDATE:	buf_s_wr_addr = helper_invert_update_s_addr;
-	FSM_STATE_REDUCE_UPDATE:	buf_s_wr_addr = helper_reduce_update_s_addr;
-	default:							buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_u_wr_addr = helper_init_u_addr;
-	FSM_STATE_INVERT_UPDATE:	buf_u_wr_addr = helper_invert_update_u_addr;
-	FSM_STATE_REDUCE_PRECALC:	buf_u_wr_addr = helper_reduce_precalc_u_addr;
-	default:							buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_v_wr_addr = helper_init_v_addr;
-	FSM_STATE_INVERT_UPDATE:	buf_v_wr_addr = helper_invert_update_v_addr;
-	FSM_STATE_REDUCE_PRECALC:	buf_v_wr_addr = helper_reduce_precalc_v_addr;
-	default:							buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      // Read Ports
-      //
-      case (fsm_state)
-	FSM_STATE_INVERT_PRECALC:	buf_r_rd_addr = helper_invert_precalc_r_addr;
-	default:							buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INVERT_PRECALC:	buf_s_rd_addr = helper_invert_precalc_s_addr;
-	FSM_STATE_REDUCE_PRECALC:	buf_s_rd_addr = helper_reduce_precalc_s_addr;
-	FSM_STATE_COPY:				buf_s_rd_addr = helper_copy_s_addr;
-	default:							buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INVERT_PRECALC:	buf_u_rd_addr = helper_invert_precalc_u_addr;
-	FSM_STATE_INVERT_COMPARE:	buf_u_rd_addr = helper_invert_compare_u_addr;
-	FSM_STATE_REDUCE_UPDATE:	buf_u_rd_addr = helper_reduce_update_u_addr;
-	default:							buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INVERT_PRECALC:	buf_v_rd_addr = helper_invert_precalc_v_addr;
-	FSM_STATE_INVERT_COMPARE:	buf_v_rd_addr = helper_invert_compare_v_addr;
-	FSM_STATE_REDUCE_UPDATE:	buf_v_rd_addr = helper_reduce_update_v_addr;
-	default:							buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
-      endcase
-      //
-   end
-
-
-   //
-   // Buffer Write Enable Logic
-   //
-   always @(*) begin
-      //
-      // Write Ports
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_r_wr_en  = helper_init_r_wren;
-	FSM_STATE_INVERT_UPDATE:	buf_r_wr_en = helper_invert_update_r_wren;
-	FSM_STATE_REDUCE_PRECALC:	buf_r_wr_en = helper_reduce_precalc_r_wren;
-	default:							buf_r_wr_en = 1'b0;
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_s_wr_en = helper_init_s_wren;
-	FSM_STATE_INVERT_UPDATE:	buf_s_wr_en = helper_invert_update_s_wren;
-	FSM_STATE_REDUCE_UPDATE:	buf_s_wr_en = helper_reduce_update_s_wren;
-	default:							buf_s_wr_en = 1'b0;
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_u_wr_en = helper_init_u_wren;
-	FSM_STATE_INVERT_UPDATE:	buf_u_wr_en = helper_invert_update_u_wren;
-	FSM_STATE_REDUCE_PRECALC:	buf_u_wr_en = helper_reduce_precalc_u_wren;
-	default:							buf_u_wr_en = 1'b0;
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_v_wr_en = helper_init_v_wren;
-	FSM_STATE_INVERT_UPDATE:	buf_v_wr_en = helper_invert_update_v_wren;
-	FSM_STATE_REDUCE_PRECALC:	buf_v_wr_en = helper_reduce_precalc_v_wren;
-	default:							buf_v_wr_en = 1'b0;
-      endcase
-      //
-   end
-
-
-   //
-   // Buffer Write Data Selector
-   //
-   always @(*) begin
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_r_wr_din = helper_init_r_data;
-	FSM_STATE_INVERT_UPDATE:	buf_r_wr_din = helper_invert_update_r_data;
-	FSM_STATE_REDUCE_PRECALC:	buf_r_wr_din = helper_reduce_precalc_r_data;
-	default:							buf_r_wr_din = {32{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_s_wr_din = helper_init_s_data;
-	FSM_STATE_INVERT_UPDATE:	buf_s_wr_din = helper_invert_update_s_data;
-	FSM_STATE_REDUCE_UPDATE:	buf_s_wr_din = helper_reduce_update_s_data;
-	default:							buf_s_wr_din = {32{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_u_wr_din = helper_init_u_data;
-	FSM_STATE_INVERT_UPDATE:	buf_u_wr_din = helper_invert_update_u_data;
-	FSM_STATE_REDUCE_PRECALC:	buf_u_wr_din = helper_reduce_precalc_u_data;
-	default:							buf_u_wr_din = {32{1'bX}};
-      endcase
-      //
-      case (fsm_state)
-	FSM_STATE_INIT:				buf_v_wr_din = helper_init_v_data;
-	FSM_STATE_INVERT_UPDATE:	buf_v_wr_din = helper_invert_update_v_data;
-	FSM_STATE_REDUCE_PRECALC:	buf_v_wr_din = helper_reduce_precalc_v_data;
-	default:							buf_v_wr_din = {32{1'bX}};
-      endcase
-      //
-   end
-
-
-   //
-   // Ready Logic
-   //
-   reg rdy_reg = 1'b1;
-
-   assign rdy = rdy_reg;
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0) rdy_reg <= 1'b1;
-     else begin
-
-	/* clear */
-	if (rdy && ena) rdy_reg <= 1'b0;
-
-	/* set */
-	if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1;
-
-     end
-
-
-   //
-   // Store Redundant Power of 2 (K)
-   //
-   always @(posedge clk)
-     //
-     if (helper_init_ena)
-       k <= {K_NUM_BITS{1'b0}};
-     else begin
-
-	if (helper_invert_update_ena && !flag_invert_v_eq_1)
-	  k <= k + 1'b1;
-
-	if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}}))
-	  k <= k - 1'b1;
-
-     end
-
-     endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v
deleted file mode 100644
index 1637f45..0000000
--- a/rtl/modular/modular_subtractor.v
+++ /dev/null
@@ -1,292 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_subtractor.v
-// -----------------------------------------------------------------------------
-// Modular subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_subtractor
-  (
-   clk, rst_n,
-   ena, rdy,
-   ab_addr, n_addr, d_addr, d_wren,
-   a_din, b_din, n_din, d_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter	OPERAND_NUM_WORDS	= 8;
-   parameter	WORD_COUNTER_WIDTH	= 3;
-
-
-   //
-   // Handy Numbers
-   //
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
-
-
-   //
-   // Handy Functions
-   //
-   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_NEXT_OR_ZERO;
-      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
-      begin
-	 WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
-				   WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
-      end
-   endfunction
-
-
-   //
-   // Ports
-   //
-   input		wire										clk;			// system clock
-   input		wire										rst_n;		// active-low async reset
-
-   input		wire										ena;			// enable input
-   output	wire 											rdy;			// ready output
-
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								ab_addr;		// index of current A and B words
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								n_addr;		// index of current N word
-   output	wire [WORD_COUNTER_WIDTH-1:0] 								d_addr;		// index of current D word
-   output	wire 											d_wren;		// store current D word now
-
-   input		wire [                  31:0] 							a_din;		// A
-   input		wire [                  31:0] 							b_din;		// B
-   input		wire [                  31:0] 							n_din;		// N
-   output	wire [                  31:0] 								d_dout;		// D = (A - B) mod N
-
-
-   //
-   // Word Indices
-   //
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_ab;
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_n;
-   reg [WORD_COUNTER_WIDTH-1:0] 									index_d;
-
-   /* map registers to output ports */
-   assign ab_addr	= index_ab;
-   assign n_addr	= index_n;
-   assign d_addr	= index_d;
-
-
-   //
-   // Subtractor
-   //
-   wire [31: 0] 											sub32_d;
-   wire 												sub32_b_in;
-   wire 												sub32_b_out;
-
-   subtractor32_wrapper subtractor32
-     (
-      .clk		(clk),
-      .a			(a_din),
-      .b			(b_din),
-      .d			(sub32_d),
-      .b_in		(sub32_b_in),
-      .b_out	(sub32_b_out)
-      );
-
-
-   //
-   // Adder
-   //
-   wire [31: 0] 											add32_s;
-   wire 												add32_c_in;
-   wire 												add32_c_out;
-
-   adder32_wrapper adder32
-     (
-      .clk		(clk),
-      .a			(sub32_d),
-      .b			(n_din),
-      .s			(add32_s),
-      .c_in		(add32_c_in),
-      .c_out	(add32_c_out)
-      );
-
-
-   //
-   // FSM
-   //
-
-   localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
-   reg [FSM_SHREG_WIDTH-1:0] 										fsm_shreg;
-
-   assign rdy = fsm_shreg[0];
-
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_ab	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_n		= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_dif_ab	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_dif_ab_n	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_store_data_d	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
-   wire [OPERAND_NUM_WORDS-1:0] 									fsm_shreg_inc_index_d		= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
-   wire 												fsm_latch_msb_borrow	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
-   wire 												inc_index_ab		= |fsm_shreg_inc_index_ab;
-   wire 												inc_index_n		= |fsm_shreg_inc_index_n;
-   wire 												store_dif_ab		= |fsm_shreg_store_dif_ab;
-   wire 												store_dif_ab_n	= |fsm_shreg_store_dif_ab_n;
-   wire 												store_data_d		= |fsm_shreg_store_data_d;
-   wire 												inc_index_d		= |fsm_shreg_inc_index_d;
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0)
-       //
-       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
-   //
-     else begin
-	//
-	if (rdy)	fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
-	//
-	else		fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
-	//
-     end
-
-
-   //
-   // Borrow & Carry Masking Logic
-   //
-   reg	sub32_b_mask;
-   reg	add32_c_mask;
-
-
-   always @(posedge clk) begin
-      //
-      sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
-      add32_c_mask <= (index_n  == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
-      //
-   end
-
-   assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-   assign add32_c_in = add32_c_out & ~add32_c_mask;
-
-
-
-   //
-   // Borrow & Carry Latch Logic
-   //
-   reg sub32_borrow_latch;
-
-   always @(posedge clk) begin
-      //
-      if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
-      //
-   end
-
-
-   //
-   // Intermediate Results
-   //
-   reg	[32*OPERAND_NUM_WORDS-1:0]		d_ab;
-   reg [32*OPERAND_NUM_WORDS-1:0] 		d_ab_n;
-
-   always @(posedge clk)
-     //
-     if (store_data_d) begin
-	//
-	d_ab		<= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]};
-	d_ab_n	<= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
-	//
-     end else begin
-	//
-	if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]};
-	if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
-	//
-     end
-
-
-   //
-   // Word Index Increment Logic
-   //
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	index_ab		<= WORD_INDEX_ZERO;
-	index_n		<= WORD_INDEX_ZERO;
-	index_d		<= WORD_INDEX_ZERO;
-	//
-     end else begin
-	//
-	if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
-	if (inc_index_n)	index_n	<= WORD_INDEX_NEXT_OR_ZERO(index_n);
-	if (inc_index_d)	index_d	<= WORD_INDEX_NEXT_OR_ZERO(index_d);
-	//
-     end
-
-
-   //
-   // Output Sum Selector
-   //
-   wire	mux_select_ab_n = sub32_borrow_latch;
-
-
-   //
-   // Output Data and Write Enable Logic
-   //
-   reg 	d_wren_reg;
-   reg [31: 0] d_dout_reg;
-   wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0];
-
-   assign d_wren = d_wren_reg;
-   assign d_dout = d_dout_reg;
-
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	d_wren_reg	<= 1'b0;
-	d_dout_reg	<= {32{1'bX}};
-	//
-     end else begin
-	//
-	d_wren_reg <= store_data_d;
-	d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}};
-	//
-     end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v
deleted file mode 100644
index 756ee83..0000000
--- a/rtl/multiword/mw_comparator.v
+++ /dev/null
@@ -1,220 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_comparator.v
-// -----------------------------------------------------------------------------
-// Multi-word comparator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_comparator
-  (
-   clk, rst_n,
-   ena, rdy,
-   xy_addr, x_din, y_din,
-   cmp_l, cmp_e, cmp_g
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter	WORD_COUNTER_WIDTH	= 3;
-   parameter	OPERAND_NUM_WORDS		= 8;
-
-
-   //
-   // Handy Numbers
-   //
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
-
-
-   //
-   // Handy Functions
-   //
-   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_PREV_OR_LAST;
-      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
-      begin
-	 WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
-				   WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
-      end
-   endfunction
-
-
-   //
-   // Ports
-   //
-   input		wire											clk;			// system clock
-   input		wire											rst_n;		// active-low async reset
-
-   input		wire											ena;			// enable input
-   output	wire 												rdy;			// ready output
-
-   output	wire [WORD_COUNTER_WIDTH-1:0] 									xy_addr;		// address of current X and Y words
-   input		wire [                32-1:0] 								x_din;		// current X word
-   input		wire [                32-1:0] 								y_din;		// current Y word
-
-   output	wire 												cmp_l;		// X < Y ?
-   output	wire 												cmp_e;		// X = Y ?
-   output	wire 												cmp_g;		// X > Y ?
-
-
-   //
-   // Word Indices
-   //
-   reg [WORD_COUNTER_WIDTH-1:0] 										index_xy;
-
-   reg 														reg_cmp_l;
-   reg 														reg_cmp_e;
-   reg 														reg_cmp_g;
-
-
-   //
-   // Output Mapping
-   //
-   assign xy_addr	= index_xy;
-
-   assign cmp_l = reg_cmp_l;
-   assign cmp_e = reg_cmp_e;
-   assign cmp_g = reg_cmp_g;
-
-
-   //
-   // FSM
-   //
-   localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3;
-
-   reg [FSM_SHREG_WIDTH-1:0] 											fsm_shreg;
-
-   assign rdy = fsm_shreg[0];
-
-   wire [OPERAND_NUM_WORDS-1:0] 										fsm_shreg_dec_index_xy	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
-   wire [OPERAND_NUM_WORDS-1:0] 										fsm_shreg_calc_leg			= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-   wire 													fsm_shreg_calc_leg_last	= fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
-   wire 													dec_index_xy		= |fsm_shreg_dec_index_xy;
-   wire 													calc_leg			= |fsm_shreg_calc_leg;
-   wire 													calc_leg_last	=  fsm_shreg_calc_leg_last;
-
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0)
-       //
-       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
-   //
-     else begin
-	//
-	if (rdy)	fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
-	//
-	else		fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
-	//
-     end
-
-
-   //
-   // Word Index Increment Logic
-   //
-   always @(posedge clk)
-     //
-     if (rdy)						index_xy <= WORD_INDEX_LAST;
-     else if (dec_index_xy)	index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy);
-
-
-   //
-   // 32-bit Subtractor
-   //
-   wire	[31: 0]	sub32_d_out;
-   wire 	sub32_b_in;
-   wire 	sub32_b_out;
-
-   subtractor32_wrapper subtractor32_inst
-     (
-      .clk		(clk),
-
-      .a			(x_din),
-      .b			(y_din),
-
-      .d			(sub32_d_out),
-
-      .b_in		(sub32_b_in),
-      .b_out	(sub32_b_out)
-      );
-
-
-   //
-   // Borrow Masking Logic
-   //
-   reg 		sub32_b_mask;
-
-   always @(posedge clk)
-     //
-     sub32_b_mask <= (index_xy  == WORD_INDEX_LAST) ? 1'b1 : 1'b0;
-
-   assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
-   //
-   // Output Logic
-   //
-   wire 	cmp_unresolved = !(cmp_l || cmp_g);
-
-   wire 	cmp_borrow_is_set				= (sub32_b_out ==  1'b1) ? 1'b1 : 1'b0;
-   wire 	cmp_difference_is_nonzero	= (sub32_d_out != 32'd0) ? 1'b1 : 1'b0;
-
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	//
-	if (ena) begin
-	   //
-	   reg_cmp_l	<= 1'b0;
-	   reg_cmp_e	<= 1'b0;
-	   reg_cmp_g	<= 1'b0;
-	   //
-	end
-	//
-     end else if (cmp_unresolved && calc_leg) begin
-	//
-	if ( cmp_borrow_is_set)																	reg_cmp_l <= 1'b1;
-	if (!cmp_borrow_is_set &&  cmp_difference_is_nonzero)							reg_cmp_g <= 1'b1;
-	if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last)	reg_cmp_e <= 1'b1;
-	//
-     end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v
deleted file mode 100644
index d6e025f..0000000
--- a/rtl/multiword/mw_mover.v
+++ /dev/null
@@ -1,175 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_mover.v
-// -----------------------------------------------------------------------------
-// Multi-word data mover.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_mover
-  (
-   clk, rst_n,
-   ena, rdy,
-   x_addr, y_addr, y_wren,
-   x_din, y_dout
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter	WORD_COUNTER_WIDTH	= 3;
-   parameter	OPERAND_NUM_WORDS		= 8;
-
-
-   //
-   // Handy Numbers
-   //
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_ZERO	= 0;
-   localparam	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_LAST	= OPERAND_NUM_WORDS - 1;
-
-
-   //
-   // Handy Functions
-   //
-   function	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_NEXT_OR_ZERO;
-      input	[WORD_COUNTER_WIDTH-1:0]	WORD_INDEX_CURRENT;
-      begin
-	 WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
-				   WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
-      end
-   endfunction
-
-
-   //
-   // Ports
-   //
-   input		wire											clk;			// system clock
-   input		wire											rst_n;		// active-low async reset
-
-   input		wire											ena;			// enable input
-   output	wire 												rdy;			// ready output
-
-   output	wire [WORD_COUNTER_WIDTH-1:0] 									x_addr;		// address of current X word
-   output	wire [WORD_COUNTER_WIDTH-1:0] 									y_addr;		// address of current Y word
-   output	wire 												y_wren;		// store current Y word
-
-   input		wire [                32-1:0] 								x_din;		// current X word
-   output	wire [                32-1:0] 									y_dout;		// current Y word
-
-
-   //
-   // Word Indices
-   //
-   reg [WORD_COUNTER_WIDTH-1:0] 										index_x;
-   reg [WORD_COUNTER_WIDTH-1:0] 										index_y;
-
-
-   //
-   // Output Mapping
-   //
-   assign x_addr	= index_x;
-   assign y_addr	= index_y;
-
-
-   //
-   // FSM
-   //
-   localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2;
-
-   reg [FSM_SHREG_WIDTH-1:0] 											fsm_shreg;
-
-   assign rdy = fsm_shreg[0];
-
-   wire [OPERAND_NUM_WORDS-1:0] 										fsm_shreg_inc_index_x	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
-   wire [OPERAND_NUM_WORDS-1:0] 										fsm_shreg_inc_index_y	= fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
-
-   wire 													inc_index_x	= |fsm_shreg_inc_index_x;
-   wire 													inc_index_y	= |fsm_shreg_inc_index_y;
-   wire 													store_word_y	= |fsm_shreg_inc_index_x;
-
-
-   always @(posedge clk or negedge rst_n)
-     //
-     if (rst_n == 1'b0)
-       //
-       fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
-   //
-     else begin
-	//
-	if (rdy)	fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
-	//
-	else		fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
-	//
-     end
-
-
-   //
-   // Word Index Increment Logic
-   //
-   always @(posedge clk)
-     //
-     if (rdy) begin
-	index_x <= WORD_INDEX_ZERO;
-	index_y <= WORD_INDEX_ZERO;
-     end else begin
-	if (inc_index_x)	index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x);
-	if (inc_index_y)	index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y);
-     end
-
-
-   //
-   // Write Enable Logic
-   //
-   reg	y_wren_reg;
-
-   assign y_wren = y_wren_reg;
-
-   always @(posedge clk)
-     //
-     if (rdy)		y_wren_reg	<= 1'b0;
-     else			y_wren_reg	<= store_word_y;
-
-
-   //
-   // Output Logic
-   //
-   assign y_dout = x_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list