[Cryptech-Commits] [core/lib] branch master updated: Added primitives with clock enable ports. Added primitives from ModExp.

git at cryptech.is git at cryptech.is
Thu Dec 20 10:45:33 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository core/lib.

The following commit(s) were added to refs/heads/master by this push:
     new fbcbd42  Added primitives with clock enable ports. Added primitives from ModExp.
fbcbd42 is described below

commit fbcbd4218e2711da279d8097620a5b26637bf45b
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Dec 19 15:27:04 2018 +0300

    Added primitives with clock enable ports.
    Added primitives from ModExp.
---
 bench/tb_lowlevel_adder32.v                        | 175 ++++++++++
 bench/tb_lowlevel_adder47.v                        | 151 +++++++++
 bench/tb_lowlevel_subtractor32.v                   | 174 ++++++++++
 bench/tb_modular_adder.v                           | 357 +++++++++++++++++++++
 bench/tb_modular_subtractor.v                      | 356 ++++++++++++++++++++
 bench/tb_mw_comparator.v                           | 322 +++++++++++++++++++
 bench/tb_mw_mover.v                                | 282 ++++++++++++++++
 ...xp_systolic_pe_artix7.v => adder32_ce_artix7.v} | 141 ++++----
 lowlevel/artix7/dsp48e1_wrapper_modexp.v           |   2 +-
 ...ic_pe_artix7.v => modexp_multiplier32_artix7.v} | 175 ++++++----
 lowlevel/artix7/modexp_systolic_pe_artix7.v        |  11 +-
 ...stolic_pe_artix7.v => subtractor32_ce_artix7.v} | 135 +++-----
 lowlevel/cryptech_primitive_switch.vh              |  36 ++-
 .../adder32_ce_generic.v}                          | 108 ++-----
 .../subtractor32_ce_generic.v}                     | 108 ++-----
 15 files changed, 2118 insertions(+), 415 deletions(-)

diff --git a/bench/tb_lowlevel_adder32.v b/bench/tb_lowlevel_adder32.v
new file mode 100644
index 0000000..70fbb15
--- /dev/null
+++ b/bench/tb_lowlevel_adder32.v
@@ -0,0 +1,175 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder32;
+
+   //
+   // Inputs
+   //
+   reg				clk;
+   reg [31: 0] 			a;
+   reg [31: 0] 			b;
+   reg				c_in;
+
+   //
+   // Outputs
+   //
+   wire [31: 0] 		s;
+   wire				c_out;
+
+   //
+   // Test Vectors {a, b, c_in}
+   //
+   wire [64: 0] 		vec_0		= {32'h00000000, 32'h00000000, 1'b0};	// all zeroes, no carry
+   wire [64: 0] 		vec_1		= {32'h00000000, 32'h00000000, 1'b1};	// all zeroes with carry
+   wire [64: 0] 		vec_2		= {32'h00000000, 32'hFFFFFFFF, 1'b0};	// zeroes and ones, no carry
+   wire [64: 0] 		vec_3		= {32'h00000000, 32'hFFFFFFFF, 1'b1};	// zeroes and ones with carry
+   wire [64: 0] 		vec_4		= {32'hFFFFFFFF, 32'h00000000, 1'b0};	// ones and zeroes, no carry
+   wire [64: 0] 		vec_5		= {32'hFFFFFFFF, 32'h00000000, 1'b1};	// ones and zeroes with carry
+   wire [64: 0] 		vec_6		= {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0};	// all ones, no carry
+   wire [64: 0] 		vec_7		= {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1};	// all ones with carry
+
+   wire [64: 0] 		vec_8		= {32'hd898c296, 32'h37bf51f5, 1'b0};	// random values, no carry
+   wire [64: 0] 		vec_9		= {32'hf4a13945, 32'hcbb64068, 1'b0};	// random values, no carry
+   wire [64: 0] 		vec_10	= {32'h2deb33a0, 32'h6b315ece, 1'b0};	// random values, no carry
+   wire [64: 0] 		vec_11	= {32'h77037d81, 32'h2bce3357, 1'b0};	// random values, no carry
+   wire [64: 0] 		vec_12	= {32'h63a440f2, 32'h7c0f9e16, 1'b1};	// random values with carry
+   wire [64: 0] 		vec_13	= {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1};	// random values with carry
+   wire [64: 0] 		vec_14	= {32'he12c4247, 32'hfe1a7f9b, 1'b1};	// random values with carry
+   wire [64: 0] 		vec_15	= {32'h6b17d1f2, 32'h4fe342e2, 1'b1};	// random values with carry
+
+
+   //
+   // UUT
+   //
+   adder32_wrapper uut
+     (
+      .clk		(clk),
+      .a			(a),
+      .b			(b),
+      .s			(s),
+      .c_in		(c_in),
+      .c_out	(c_out)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				ok = 1;
+   initial begin
+      //
+      clk = 0;
+      //
+      #100;
+      //
+      test_adder32(vec_0);
+      test_adder32(vec_1);
+      test_adder32(vec_2);
+      test_adder32(vec_3);
+      test_adder32(vec_4);
+      test_adder32(vec_5);
+      test_adder32(vec_6);
+      test_adder32(vec_7);
+      //
+      test_adder32(vec_8);
+      test_adder32(vec_9);
+      test_adder32(vec_10);
+      test_adder32(vec_11);
+      test_adder32(vec_12);
+      test_adder32(vec_13);
+      test_adder32(vec_14);
+      test_adder32(vec_15);
+      //
+      if (ok)	$display("tb_lowlevel_adder32: SUCCESS");
+      else	$display("tb_lowlevel_adder32: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Routine
+   //
+   reg	[31: 0]	ss;		// reference value of sum
+   reg 		cc;		// reference value of carry
+   reg 		ss_ok;	// result matches reference value
+
+   task test_adder32;
+
+      input	[64: 0] vec;
+
+      begin
+
+	 /* break down test vector */
+	 a		= vec[64:33];
+	 b		= vec[32: 1];
+	 c_in	= vec[ 0: 0];
+
+	 /* calculate reference values */
+	 {cc, ss} = {1'b0, a} + {1'b0, b} + {32'd0, c_in};
+
+	 /* send one clock tick */
+	 #10 clk = 1;
+	 #10 clk = 0;
+
+	 /* check outputs */
+	 ss_ok = (s == ss) && (c_out == cc);
+
+	 /* display results */
+	 $display("test_adder32(): 0x%08X + 0x%08X + %01d = {%01d, 0x%08X} [%0s]", a, b, c_in, c_out, s, ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && ss_ok;
+
+      end
+
+   endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_adder47.v b/bench/tb_lowlevel_adder47.v
new file mode 100644
index 0000000..663e8f9
--- /dev/null
+++ b/bench/tb_lowlevel_adder47.v
@@ -0,0 +1,151 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder47.v
+// -----------------------------------------------------------------------------
+// Testbench for 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder47;
+
+   //
+   // Inputs
+   //
+   reg				clk;
+   reg [46: 0] 			a;
+   reg [46: 0] 			b;
+
+   //
+   // Outputs
+   //
+   wire [46: 0] 		s;
+
+   //
+   // Test Vectors {a, b}
+   //
+   wire [93: 0] 		vec_0		= {47'h2a87ca22be8b, 47'h05378eb1c71e};
+   wire [93: 0] 		vec_1		= {47'h7320ad746e1d, 47'h3b628ba79b98};
+   wire [93: 0] 		vec_2		= {47'h59f741e08254, 47'h2a385502f25d};
+   wire [93: 0] 		vec_3		= {47'h3f55296c3a54, 47'h5e3872760ab7};
+   wire [93: 0] 		vec_4		= {47'h3617de4a9626, 47'h2c6f5d9e98bf};
+   wire [93: 0] 		vec_5		= {47'h1292dc29f8f4, 47'h1dbd289a147c};
+   wire [93: 0] 		vec_6		= {47'h69da3113b5f0, 47'h38c00a60b1ce};
+   wire [93: 0] 		vec_7		= {47'h1d7e819d7a43, 47'h1d7c90ea0e5f};
+
+   //
+   // UUT
+   //
+   adder47_wrapper uut
+     (
+      .clk		(clk),
+      .a		(a),
+      .b		(b),
+      .s		(s)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 			ok = 1;
+   initial begin
+      //
+      clk = 0;
+      //
+      #100;
+      //
+      test_adder47(vec_0);
+      test_adder47(vec_1);
+      test_adder47(vec_2);
+      test_adder47(vec_3);
+      test_adder47(vec_4);
+      test_adder47(vec_5);
+      test_adder47(vec_6);
+      test_adder47(vec_7);
+      //
+      if (ok)	$display("tb_lowlevel_adder47: SUCCESS");
+      else	$display("tb_lowlevel_adder47: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Routine
+   //
+   reg	[46: 0]	ss;		// reference value of sum
+   reg 		cc;		// reference value of carry
+   reg 		ss_ok;	// result matches reference value
+
+   task test_adder47;
+
+      input	[93: 0] vec;
+
+      begin
+
+	 /* break down test vector */
+	 a		= vec[93:47];
+	 b		= vec[46: 0];
+
+	 /* calculate reference values */
+	 ss = a + b;
+
+	 /* send one clock tick */
+	 #10 clk = 1;
+	 #10 clk = 0;
+
+	 /* check outputs */
+	 ss_ok = (s == ss);
+
+	 /* display results */
+	 $display("test_adder47(): %s", ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && ss_ok;
+
+      end
+
+   endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_subtractor32.v b/bench/tb_lowlevel_subtractor32.v
new file mode 100644
index 0000000..fd96000
--- /dev/null
+++ b/bench/tb_lowlevel_subtractor32.v
@@ -0,0 +1,174 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_subtractor32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_lowlevel_subtractor32;
+
+   //
+   // Inputs
+   //
+   reg				clk;
+   reg [31: 0] 			a;
+   reg [31: 0] 			b;
+   reg				b_in;
+
+   //
+   // Outputs
+   //
+   wire [31: 0] 		d;
+   wire				b_out;
+
+   //
+   // Test Vectors {a, b, b_in}
+   //
+   wire [64: 0] 		vec_0		= {32'h00000000, 32'h00000000, 1'b0};	// all zeroes, no borrow
+   wire [64: 0] 		vec_1		= {32'h00000000, 32'h00000000, 1'b1};	// all zeroes with borrow
+   wire [64: 0] 		vec_2		= {32'h00000000, 32'hFFFFFFFF, 1'b0};	// zeroes and ones, no borrow
+   wire [64: 0] 		vec_3		= {32'h00000000, 32'hFFFFFFFF, 1'b1};	// zeroes and ones with borrow
+   wire [64: 0] 		vec_4		= {32'hFFFFFFFF, 32'h00000000, 1'b0};	// ones and zeroes, no borrow
+   wire [64: 0] 		vec_5		= {32'hFFFFFFFF, 32'h00000000, 1'b1};	// ones and zeroes with borrow
+   wire [64: 0] 		vec_6		= {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0};	// all ones, no borrow
+   wire [64: 0] 		vec_7		= {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1};	// all ones with borrow
+
+   wire [64: 0] 		vec_8		= {32'hd898c296, 32'h37bf51f5, 1'b0};	// random values, no borrow
+   wire [64: 0] 		vec_9		= {32'hf4a13945, 32'hcbb64068, 1'b0};	// random values, no borrow
+   wire [64: 0] 		vec_10	= {32'h2deb33a0, 32'h6b315ece, 1'b0};	// random values, no borrow
+   wire [64: 0] 		vec_11	= {32'h77037d81, 32'h2bce3357, 1'b0};	// random values, no borrow
+   wire [64: 0] 		vec_12	= {32'h63a440f2, 32'h7c0f9e16, 1'b1};	// random values with borrow
+   wire [64: 0] 		vec_13	= {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1};	// random values with borrow
+   wire [64: 0] 		vec_14	= {32'he12c4247, 32'hfe1a7f9b, 1'b1};	// random values with borrow
+   wire [64: 0] 		vec_15	= {32'h6b17d1f2, 32'h4fe342e2, 1'b1};	// random values with borrow
+
+
+   //
+   // UUT
+   //
+   subtractor32_wrapper uut
+     (
+      .clk		(clk),
+      .a		(a),
+      .b		(b),
+      .d		(d),
+      .b_in		(b_in),
+      .b_out		(b_out)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 			ok = 1;
+   initial begin
+      //
+      clk = 0;
+      //
+      #100;
+      //
+      test_subtractor32(vec_0);
+      test_subtractor32(vec_1);
+      test_subtractor32(vec_2);
+      test_subtractor32(vec_3);
+      test_subtractor32(vec_4);
+      test_subtractor32(vec_5);
+      test_subtractor32(vec_6);
+      test_subtractor32(vec_7);
+      //
+      test_subtractor32(vec_8);
+      test_subtractor32(vec_9);
+      test_subtractor32(vec_10);
+      test_subtractor32(vec_11);
+      test_subtractor32(vec_12);
+      test_subtractor32(vec_13);
+      test_subtractor32(vec_14);
+      test_subtractor32(vec_15);
+      //
+      if (ok)	$display("tb_lowlevel_subtractor32: SUCCESS");
+      else	$display("tb_lowlevel_subtractor32: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Routine
+   //
+   reg	[31: 0]	dd;		// reference value of difference
+   reg 		bb;		// reference value of borrow
+   reg 		dd_ok;	// result matches reference value
+
+   task test_subtractor32;
+
+      input	[64: 0] vec;
+
+      begin
+
+	 /* break down test vector */
+	 a		= vec[64:33];
+	 b		= vec[32: 1];
+	 b_in	= vec[ 0: 0];
+
+	 /* calculate reference values */
+	 {bb, dd} = {1'b0, a} - {1'b0, b} - {32'd0, b_in};
+
+	 /* send one clock tick */
+	 #10 clk = 1;
+	 #10 clk = 0;
+
+	 /* check outputs */
+	 dd_ok = (d == dd) && (b_out == bb);
+
+	 /* display results */
+	 $display("test_subtractor32(): 0x%08X - (0x%08X + %01d) = {%01d, 0x%08X} [%0s]", a, b, b_in, b_out, d, dd_ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && dd_ok;
+
+      end
+
+   endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_adder.v b/bench/tb_modular_adder.v
new file mode 100644
index 0000000..713ff22
--- /dev/null
+++ b/bench/tb_modular_adder.v
@@ -0,0 +1,357 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_adder_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_adder_256;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[255:0]	N	= 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+   localparam	[255:0]	X_1	= 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+   localparam	[255:0]	Y_1	= 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+   localparam	[255:0]	X_2	= 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+   localparam	[255:0]	Y_2	= 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  3;
+   localparam	OPERAND_NUM_WORDS	=  8;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+
+   //
+   // Buffers (X, Y, N)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_s_addr;
+   wire 			 core_s_wren;
+
+   wire [                  31:0] core_x_data;
+   wire [                  31:0] core_y_data;
+   wire [                  31:0] core_n_data;
+   wire [                  31:0] core_s_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_xyn_addr;
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_s_addr;
+   reg 				 tb_xyn_wren;
+
+   reg [                  31:0]  tb_x_data;
+   reg [                  31:0]  tb_y_data;
+   reg [                  31:0]  tb_n_data;
+   wire [                  31:0] tb_s_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_y
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_y_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_y_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_n
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_n_data),
+      .a_out	(),
+
+      .b_addr	(core_n_addr),
+      .b_out	(core_n_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_s
+     (
+      .clk		(clk),
+
+      .a_addr	(core_s_addr),
+      .a_wr		(core_s_wren),
+      .a_in		(core_s_data),
+      .a_out	(),
+
+      .b_addr	(tb_s_addr),
+      .b_out	(tb_s_data)
+      );
+
+
+   //
+   // UUT
+   //
+   modular_adder #
+     (
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH),
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS)
+      )
+   uut
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(ena),
+      .rdy			(rdy),
+
+      .ab_addr		(core_xy_addr),
+      .n_addr		(core_n_addr),
+      .s_addr		(core_s_addr),
+      .s_wren		(core_s_wren),
+
+      .a_din		(core_x_data),
+      .b_din		(core_y_data),
+      .n_din		(core_n_data),
+      .s_dout		(core_s_data)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				 ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n			= 0;
+      ena			= 0;
+
+      tb_xyn_wren	= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n		= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_modular_adder(X_1, Y_1, N);
+      test_modular_adder(X_2, Y_2, N);
+      test_modular_adder(Y_1, X_1, N);
+      test_modular_adder(Y_2, X_2, N);
+
+      test_modular_adder(X_1, X_2, N);
+      test_modular_adder(X_2, X_1, N);
+      test_modular_adder(Y_1, Y_2, N);
+      test_modular_adder(Y_2, Y_1, N);
+
+      test_modular_adder(X_1, Y_2, N);
+      test_modular_adder(Y_2, X_1, N);
+      test_modular_adder(X_2, Y_1, N);
+      test_modular_adder(Y_1, X_2, N);
+
+      /* print result */
+      if (ok)	$display("tb_modular_adder_256: SUCCESS");
+      else	$display("tb_modular_adder_256: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	[256:0]	s;
+   wire [255:0] s_dummy = s[255:0];
+   reg 		s_ok;
+
+   integer 	w;
+
+   reg [255:0] 	x_shreg;
+   reg [255:0] 	y_shreg;
+   reg [255:0] 	n_shreg;
+   reg [255:0] 	s_shreg;
+
+   task test_modular_adder;
+
+      input	[255:0]	x;
+      input [255:0] 	y;
+      input [255:0] 	n;
+
+      begin
+
+	 /* start filling memories */
+	 tb_xyn_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+	 y_shreg = y;
+	 n_shreg = n;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_xyn_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+	    tb_y_data	= y_shreg[31:0];
+	    tb_n_data	= n_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+	    y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+	    n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_xyn_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+	 tb_y_data	= {32{1'bX}};
+	 tb_n_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_xyn_wren	= 0;
+
+	 /* calculate reference value */
+	 s = {1'b0, x} + {1'b0, y};
+	 if (s >= {1'b0, n})
+	   s = s - {1'b0, n};
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* read result */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set address */
+	    tb_s_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	    /* store data word */
+	    s_shreg = {tb_s_data, s_shreg[255:32]};
+
+	 end
+
+	 /* compare */
+	 s_ok = (s_shreg == s[255:0]);
+
+	 /* display results */
+	 $display("test_modular_adder(): %s", s_ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && s_ok;
+
+      end
+
+   endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_subtractor.v b/bench/tb_modular_subtractor.v
new file mode 100644
index 0000000..6cf0e01
--- /dev/null
+++ b/bench/tb_modular_subtractor.v
@@ -0,0 +1,356 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_subtractor_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_subtractor_256;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[255:0]	N	= 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+   localparam	[255:0]	X_1	= 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+   localparam	[255:0]	Y_1	= 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+   localparam	[255:0]	X_2	= 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+   localparam	[255:0]	Y_2	= 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  3;
+   localparam	OPERAND_NUM_WORDS	=  8;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+
+   //
+   // Buffers (X, Y, N)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_d_addr;
+   wire 			 core_d_wren;
+
+   wire [                  31:0] core_x_data;
+   wire [                  31:0] core_y_data;
+   wire [                  31:0] core_n_data;
+   wire [                  31:0] core_d_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_xyn_addr;
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_d_addr;
+   reg 				 tb_xyn_wren;
+
+   reg [                  31:0]  tb_x_data;
+   reg [                  31:0]  tb_y_data;
+   reg [                  31:0]  tb_n_data;
+   wire [                  31:0] tb_d_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_y
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_y_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_y_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_n
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xyn_addr),
+      .a_wr		(tb_xyn_wren),
+      .a_in		(tb_n_data),
+      .a_out	(),
+
+      .b_addr	(core_n_addr),
+      .b_out	(core_n_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_s
+     (
+      .clk		(clk),
+
+      .a_addr	(core_d_addr),
+      .a_wr		(core_d_wren),
+      .a_in		(core_d_data),
+      .a_out	(),
+
+      .b_addr	(tb_d_addr),
+      .b_out	(tb_d_data)
+      );
+
+
+   //
+   // UUT
+   //
+   modular_subtractor #
+     (
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH),
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS)
+      )
+   uut
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(ena),
+      .rdy			(rdy),
+
+      .ab_addr		(core_xy_addr),
+      .n_addr		(core_n_addr),
+      .d_addr		(core_d_addr),
+      .d_wren		(core_d_wren),
+
+      .a_din		(core_x_data),
+      .b_din		(core_y_data),
+      .n_din		(core_n_data),
+      .d_dout		(core_d_data)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				 ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n			= 0;
+      ena			= 0;
+
+      tb_xyn_wren	= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n		= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_modular_subtractor(X_1, Y_1, N);
+      test_modular_subtractor(X_2, Y_2, N);
+      test_modular_subtractor(Y_1, X_1, N);
+      test_modular_subtractor(Y_2, X_2, N);
+
+      test_modular_subtractor(X_1, X_2, N);
+      test_modular_subtractor(X_2, X_1, N);
+      test_modular_subtractor(Y_1, Y_2, N);
+      test_modular_subtractor(Y_2, Y_1, N);
+
+      test_modular_subtractor(X_1, Y_2, N);
+      test_modular_subtractor(Y_2, X_1, N);
+      test_modular_subtractor(X_2, Y_1, N);
+      test_modular_subtractor(Y_1, X_2, N);
+
+      /* print result */
+      if (ok)	$display("tb_modular_subtractor_256: SUCCESS");
+      else	$display("tb_modular_subtractor_256: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	[256:0]	d;
+   wire [255:0] d_dummy = d[255:0];
+   reg 		d_ok;
+
+   integer 	w;
+
+   reg [255:0] 	x_shreg;
+   reg [255:0] 	y_shreg;
+   reg [255:0] 	n_shreg;
+   reg [255:0] 	d_shreg;
+
+   task test_modular_subtractor;
+
+      input	[255:0]	x;
+      input [255:0] 	y;
+      input [255:0] 	n;
+
+      begin
+
+	 /* start filling memories */
+	 tb_xyn_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+	 y_shreg = y;
+	 n_shreg = n;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_xyn_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+	    tb_y_data	= y_shreg[31:0];
+	    tb_n_data	= n_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+	    y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+	    n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_xyn_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+	 tb_y_data	= {32{1'bX}};
+	 tb_n_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_xyn_wren	= 0;
+
+	 /* calculate reference value */
+	 d = {1'b0, (x < y) ? n : {256{1'b0}}};
+	 d = d + {1'b0, x} - {1'b0, y};
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* read result */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set address */
+	    tb_d_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	    /* store data word */
+	    d_shreg = {tb_d_data, d_shreg[255:32]};
+
+	 end
+
+	 /* compare */
+	 d_ok = (d_shreg == d[255:0]);
+
+	 /* display results */
+	 $display("test_modular_subtractor(): %s", d_ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && d_ok;
+
+      end
+
+   endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_comparator.v b/bench/tb_mw_comparator.v
new file mode 100644
index 0000000..ed830a8
--- /dev/null
+++ b/bench/tb_mw_comparator.v
@@ -0,0 +1,322 @@
+//------------------------------------------------------------------------------
+//
+// tb_mw_comparator.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word comparator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_comparator;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[383:0]	A_0	= 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+   localparam	[383:0]	A_1	= 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F75;
+   localparam	[383:0]	A_2	= 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F77;
+
+   localparam	[383:0]	A_3	= 384'hBDC7B53C_616B13B5_77622510_75BA95FC_375D568B_79E730D4_18A9143C_18905F76;
+   localparam	[383:0]	A_4	= 384'hBDC7B53C_616B13B5_77622510_75BA95FC_575D568B_79E730D4_18A9143C_18905F76;
+
+   localparam	[383:0]	A_5	= 384'hBDC7B53C_616B13B5_77622510_75BA95FB_475D568B_79E730D4_18A9143C_18905F76;
+   localparam	[383:0]	A_6	= 384'hBDC7B53C_616B13B5_77622510_75BA95FD_475D568B_79E730D4_18A9143C_18905F76;
+
+   localparam	[383:0]	A_7	= 384'hADC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+   localparam	[383:0]	A_8	= 384'hCDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+   localparam	[383:0]	B_0	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+   localparam	[383:0]	B_1	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E4;
+   localparam	[383:0]	B_2	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E6;
+
+   localparam	[383:0]	B_3	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_9D784F98_850046D4_10DDD64D_F6BB32E5;
+   localparam	[383:0]	B_4	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_BD784F98_850046D4_10DDD64D_F6BB32E5;
+
+   localparam	[383:0]	B_5	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3BF_AD784F98_850046D4_10DDD64D_F6BB32E5;
+   localparam	[383:0]	B_6	= 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C1_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+   localparam	[383:0]	B_7	= 384'h248A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+   localparam	[383:0]	B_8	= 384'h448A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  3;
+   parameter	OPERAND_NUM_WORDS	=  8;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+   wire	core_cmp_l;
+   wire	core_cmp_e;
+   wire	core_cmp_g;
+
+
+   //
+   // Buffers (X, Y)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+
+   wire [                32-1:0] core_x_data;
+   wire [                32-1:0] core_y_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_xy_addr;
+   reg 				 tb_xy_wren;
+
+   reg [                32-1:0]  tb_x_data;
+   reg [                32-1:0]  tb_y_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xy_addr),
+      .a_wr		(tb_xy_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_y
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_xy_addr),
+      .a_wr		(tb_xy_wren),
+      .a_in		(tb_y_data),
+      .a_out	(),
+
+      .b_addr	(core_xy_addr),
+      .b_out	(core_y_data)
+      );
+
+
+   //
+   // UUT
+   //
+   mw_comparator #
+     (
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH),
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS)
+      )
+   uut
+     (
+      .clk		(clk),
+      .rst_n	(rst_n),
+
+      .ena		(ena),
+      .rdy		(rdy),
+
+      .xy_addr	(core_xy_addr),
+      .x_din	(core_x_data),
+      .y_din	(core_y_data),
+
+      .cmp_l	(core_cmp_l),
+      .cmp_e	(core_cmp_e),
+      .cmp_g	(core_cmp_g)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				 ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n		= 0;
+      ena		= 0;
+
+      tb_xy_wren	= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n		= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_mw_comparator(A_0, A_0);
+
+      test_mw_comparator(A_0, A_1);
+      test_mw_comparator(A_0, A_2);
+      test_mw_comparator(A_0, A_3);
+      test_mw_comparator(A_0, A_4);
+      test_mw_comparator(A_0, A_5);
+      test_mw_comparator(A_0, A_6);
+      test_mw_comparator(A_0, A_7);
+      test_mw_comparator(A_0, A_8);
+
+      test_mw_comparator(B_0, B_0);
+
+      test_mw_comparator(B_0, B_1);
+      test_mw_comparator(B_0, B_2);
+      test_mw_comparator(B_0, B_3);
+      test_mw_comparator(B_0, B_4);
+      test_mw_comparator(B_0, B_5);
+      test_mw_comparator(B_0, B_6);
+      test_mw_comparator(B_0, B_7);
+      test_mw_comparator(B_0, B_8);
+
+      /* print result */
+      if (ok)	$display("tb_mw_comparator: SUCCESS");
+      else	$display("tb_mw_comparator: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	cmp_l;
+   reg	cmp_e;
+   reg	cmp_g;
+   reg	cmp_ok;
+
+   integer w;
+
+   task test_mw_comparator;
+
+      input [255:0]	x;
+      input [255:0] 	y;
+
+      reg [255:0] 	x_shreg;
+      reg [255:0] 	y_shreg;
+
+      begin
+
+	 /* start filling memories */
+	 tb_xy_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+	 y_shreg = y;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_xy_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+	    tb_y_data	= y_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+	    y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_xy_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+	 tb_y_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_xy_wren	= 0;
+
+	 /* calculate reference values */
+	 cmp_l = (x <  y) ? 1 : 0;
+	 cmp_e = (x == y) ? 1 : 0;
+	 cmp_g = (x >  y) ? 1 : 0;
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* compare */
+	 cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g);
+
+	 /* display results */
+	 $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && cmp_ok;
+
+      end
+
+   endtask
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_mover.v b/bench/tb_mw_mover.v
new file mode 100644
index 0000000..08bdb9e
--- /dev/null
+++ b/bench/tb_mw_mover.v
@@ -0,0 +1,282 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_mover.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word data mover.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_mover;
+
+
+   //
+   // Test Vectors
+   //
+   localparam	[255:0]	X_1	= 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+   localparam	[255:0]	X_2	= 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+   localparam	[255:0]	X_3	= 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+   localparam	[255:0]	X_4	= 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+   //
+   // Core Parameters
+   //
+   localparam	WORD_COUNTER_WIDTH	=  3;
+   localparam	OPERAND_NUM_WORDS	=  8;
+
+
+   //
+   // Clock (100 MHz)
+   //
+   reg clk = 1'b0;
+   always #5 clk = ~clk;
+
+
+   //
+   // Inputs, Outputs
+   //
+   reg rst_n;
+   reg ena;
+   wire rdy;
+
+
+   //
+   // Buffers (X, Y)
+   //
+   wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+   wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+   wire 			 core_y_wren;
+
+   wire [                32-1:0] core_x_data;
+   wire [                32-1:0] core_y_data;
+
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_x_addr;
+   reg [WORD_COUNTER_WIDTH-1:0]  tb_y_addr;
+   reg 				 tb_x_wren;
+
+   reg [                32-1:0]  tb_x_data;
+   wire [                32-1:0] tb_y_data;
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_x
+     (
+      .clk		(clk),
+
+      .a_addr	(tb_x_addr),
+      .a_wr		(tb_x_wren),
+      .a_in		(tb_x_data),
+      .a_out	(),
+
+      .b_addr	(core_x_addr),
+      .b_out	(core_x_data)
+      );
+
+   bram_1rw_1ro_readfirst #
+     (
+      .MEM_WIDTH			(32),
+      .MEM_ADDR_BITS		(WORD_COUNTER_WIDTH)
+      )
+   bram_d
+     (
+      .clk		(clk),
+
+      .a_addr	(core_y_addr),
+      .a_wr		(core_y_wren),
+      .a_in		(core_y_data),
+      .a_out	(),
+
+      .b_addr	(tb_y_addr),
+      .b_out	(tb_y_data)
+      );
+
+
+   //
+   // UUT
+   //
+   mw_mover #
+     (
+      .WORD_COUNTER_WIDTH	(WORD_COUNTER_WIDTH),
+      .OPERAND_NUM_WORDS	(OPERAND_NUM_WORDS)
+      )
+   uut
+     (
+      .clk			(clk),
+      .rst_n		(rst_n),
+
+      .ena			(ena),
+      .rdy			(rdy),
+
+      .x_addr		(core_x_addr),
+      .y_addr		(core_y_addr),
+      .y_wren		(core_y_wren),
+
+      .x_din		(core_x_data),
+      .y_dout		(core_y_data)
+      );
+
+
+   //
+   // Testbench Routine
+   //
+   reg 				 ok = 1;
+   initial begin
+
+      /* initialize control inputs */
+      rst_n			= 0;
+      ena			= 0;
+
+      tb_x_wren	= 0;
+
+      /* wait for some time */
+      #200;
+
+      /* de-assert reset */
+      rst_n		= 1;
+
+      /* wait for some time */
+      #100;
+
+      /* run tests */
+      test_modular_mover(X_1);
+      test_modular_mover(X_2);
+      test_modular_mover(X_3);
+      test_modular_mover(X_4);
+
+      /* print result */
+      if (ok)	$display("tb_modular_mover: SUCCESS");
+      else	$display("tb_modular_mover: FAILURE");
+      //
+      $finish;
+      //
+   end
+
+
+   //
+   // Test Task
+   //
+   reg	[255:0]	y;
+   reg 		y_ok;
+
+   integer 	w;
+
+   reg [255:0] 	x_shreg;
+   reg [255:0] 	y_shreg;
+
+   task test_modular_mover;
+
+      input	[255:0]	x;
+
+      begin
+
+	 /* start filling memories */
+	 tb_x_wren	= 1;
+
+	 /* initialize shift registers */
+	 x_shreg = x;
+
+	 /* write all the words */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set addresses */
+	    tb_x_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* set data words */
+	    tb_x_data	= x_shreg[31:0];
+
+	    /* shift inputs */
+	    x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	 end
+
+	 /* wipe addresses */
+	 tb_x_addr	= {WORD_COUNTER_WIDTH{1'bX}};
+
+	 /* wipe data words */
+	 tb_x_data	= {32{1'bX}};
+
+	 /* stop filling memories */
+	 tb_x_wren	= 0;
+
+	 /* start operation */
+	 ena = 1;
+
+	 /* clear flag */
+	 #10 ena = 0;
+
+	 /* wait for operation to complete */
+	 while (!rdy) #10;
+
+	 /* read result */
+	 for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+	    /* set address */
+	    tb_y_addr	= w[WORD_COUNTER_WIDTH-1:0];
+
+	    /* wait for 1 clock tick */
+	    #10;
+
+	    /* store data word */
+	    y_shreg = {tb_y_data, y_shreg[255:32]};
+
+	 end
+
+	 /* compare */
+	 y_ok = (y_shreg == x);
+
+	 /* display results */
+	 $display("test_modular_mover(): %s", y_ok ? "OK" : "ERROR");
+
+	 /* update global flag */
+	 ok = ok && y_ok;
+
+      end
+
+   endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/adder32_ce_artix7.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/adder32_ce_artix7.v
index 08391f5..0f6d44d 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/adder32_ce_artix7.v
@@ -1,12 +1,12 @@
 //------------------------------------------------------------------------------
 //
-// modexp_systolic_pe_artix7.v
+// adder32_ce_artix7.v
 // -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit adder w/ clock enable.
 //
 // Authors: Pavel Shatov
 //
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, 2018 NORDUnet A/S
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,59 @@
 //
 //------------------------------------------------------------------------------
 
-module modexp_systolic_pe_artix7
-	(
-		input					clk,
-		input		[31: 0]	a,
-		input		[31: 0]	b,
-		input		[31: 0]	t,
-		input		[31: 0]	c_in,
-		output	[31: 0]	p,
-		output	[31: 0]	c_out
-	);
-	
-	reg	[31: 0]	t_dly;
-	reg	[31: 0]	c_in_dly;
-	
-	always @(posedge clk) t_dly <= t;
-	always @(posedge clk) c_in_dly <= c_in;
-	
-	wire	[31: 0]	t_c_in_s;
-	wire				t_c_in_c_out;
-	
-	reg				t_c_in_c_out_dly;
-	
-	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-	
-	modexpa7_adder32_artix7 add_t_c_in
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(t_dly),
-		.b			(c_in_dly),
-		.c_in		(1'b0),
-		.s			(t_c_in_s),
-		.c_out	(t_c_in_c_out)
-	);
-
-	wire	[63: 0]	a_b;
-	
-	wire	[31: 0]	a_b_lsb = a_b[31: 0];
-	wire	[31: 0]	a_b_msb = a_b[63:32];
-	
-	reg	[31: 0]	a_b_msb_dly;
-	
-	always @(posedge clk) a_b_msb_dly <= a_b_msb;
-	
-	modexpa7_multiplier32_artix7 mul_a_b
-	(
-		.clk	(clk),
-		.a		(a),
-		.b		(b),
-		.p		(a_b)
-	);
-	
-	wire	[31: 0]	add_p_s;
-	wire				add_p_c_out;
-	
-	reg	[31: 0]	add_p_s_dly;
-	
-	always @(posedge clk) add_p_s_dly <= add_p_s;
-	
-	assign p = add_p_s_dly;
-	
-	modexpa7_adder32_artix7 add_p
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_lsb),
-		.b			(t_c_in_s),
-		.c_in		(1'b0),
-		.s			(add_p_s),
-		.c_out	(add_p_c_out)
-	);
-
-	modexpa7_adder32_artix7 add_c_out
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_msb_dly),
-		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
-		.c_in		(add_p_c_out),
-		.s			(c_out),
-		.c_out	()
-	);
+module adder32_ce_artix7
+  (
+   input 	  clk, // clock
+   input      ce, // clock enable
+   input [31: 0]  a, // operand input
+   input [31: 0]  b, // operand input
+   output [31: 0] s, // sum output
+   input 	  c_in, // carry input
+   output 	  c_out		// carry output
+   );
+
+   //
+   // Lower and higher parts of operand
+   //
+   wire [17: 0]   bl = b[17: 0];
+   wire [13: 0]   bh = b[31:18];
+
+
+   //
+   // DSP48E1 Slice
+   //
+
+   /* Operation Mode */
+   wire [ 3: 0]   dsp48e1_alumode	= 4'b0000;
+   wire [ 6: 0]   dsp48e1_opmode		= 7'b0110011;
+
+   /* Internal Product */
+   wire [47: 0]   p_int;
+
+   dsp48e1_wrapper dsp_adder
+     (
+      .clk			(clk),
+
+      .ce			(ce),
+
+      .carry		(c_in),
+
+      .alumode		(dsp48e1_alumode),
+      .opmode		(dsp48e1_opmode),
+
+      .a				({{16{1'b0}}, bh}),
+      .b				(bl),
+      .c				({{16{1'b0}}, a}),
+
+      .p				(p_int)
+      );
+
+   //
+   // Output Mapping
+   //
+   assign s 		= p_int[31: 0];
+   assign c_out	= p_int[32];
+
 
 endmodule
 
diff --git a/lowlevel/artix7/dsp48e1_wrapper_modexp.v b/lowlevel/artix7/dsp48e1_wrapper_modexp.v
index 17d8efe..27c8bf8 100644
--- a/lowlevel/artix7/dsp48e1_wrapper_modexp.v
+++ b/lowlevel/artix7/dsp48e1_wrapper_modexp.v
@@ -36,7 +36,7 @@
 //
 //------------------------------------------------------------------------------
 
-module modexpa7_dsp48e1_wrapper_modexp #
+module dsp48e1_wrapper_modexp #
 	(
 		parameter	AREG		= 1'b0,
 		parameter	PREG		= 1'b0,
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/modexp_multiplier32_artix7.v
similarity index 50%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/modexp_multiplier32_artix7.v
index 08391f5..d4bd3f4 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/modexp_multiplier32_artix7.v
@@ -1,8 +1,8 @@
 //------------------------------------------------------------------------------
 //
-// modexp_systolic_pe_artix7.v
+// modexp_multiplier32_artix7.v
 // -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit multiplier.
 //
 // Authors: Pavel Shatov
 //
@@ -36,87 +36,132 @@
 //
 //------------------------------------------------------------------------------
 
-module modexp_systolic_pe_artix7
+module modexp_multiplier32_artix7
 	(
 		input					clk,
 		input		[31: 0]	a,
 		input		[31: 0]	b,
-		input		[31: 0]	t,
-		input		[31: 0]	c_in,
-		output	[31: 0]	p,
-		output	[31: 0]	c_out
+		output	[63: 0]	p
 	);
+
+		/* split a, b into smaller words */
+	wire	[16: 0]	a_lo = a[16: 0];
+	wire	[16: 0]	b_lo = b[16: 0];
+	wire	[14: 0]	a_hi = a[31:17];
+	wire	[14: 0]	b_hi = b[31:17];
+
+		/* smaller sub-products */
+	wire	[47: 0]	dsp1_p;
+	wire	[47: 0]	dsp2_p;
+	wire	[47: 0]	dsp4_p;
+
+		/* direct output mapping */
+	assign p[63:34] = dsp4_p[29: 0];
 	
-	reg	[31: 0]	t_dly;
-	reg	[31: 0]	c_in_dly;
-	
-	always @(posedge clk) t_dly <= t;
-	always @(posedge clk) c_in_dly <= c_in;
-	
-	wire	[31: 0]	t_c_in_s;
-	wire				t_c_in_c_out;
-	
-	reg				t_c_in_c_out_dly;
-	
-	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
+		/* delayed output mapping */
+	genvar fd;
+	generate for (fd=0; fd<17; fd=fd+1)
+		begin : gen_FD
+			FD # (.INIT( 1'b0)) FD_inst1 (.C(clk), .D(dsp1_p[fd]), .Q(p[fd +  0]));
+			FD # (.INIT( 1'b0)) FD_inst3 (.C(clk), .D(dsp2_p[fd]), .Q(p[fd + 17]));
+		end
+	endgenerate
+
+		/* product chains */
+	wire	[47: 0]	dsp1_p_chain;
+	wire	[47: 0]	dsp3_p_chain;
+	wire	[47: 0]	dsp2_p_chain;
 	
-	modexpa7_adder32_artix7 add_t_c_in
+		/* operand chains */
+	wire	[29: 0]	a_lo_chain;
+	wire	[29: 0]	a_hi_chain;  
+  
+		//
+		// a_lo * b_lo
+		//
+	dsp48e1_wrapper_modexp #
+	(
+		.AREG			(1'b1),
+		.PREG			(1'b0),
+		.A_INPUT		("DIRECT")
+	)
+	dsp1
 	(
 		.clk		(clk),
-		.ce		(1'b1),
-		.a			(t_dly),
-		.b			(c_in_dly),
-		.c_in		(1'b0),
-		.s			(t_c_in_s),
-		.c_out	(t_c_in_c_out)
+		.opmode	(7'b0110101),
+		.a			({13'd0, a_lo}),
+		.b			({1'b0, b_lo}),
+		.p			(dsp1_p),
+		.acin		(30'd0),
+		.pcin		(48'd0),
+		.acout	(a_lo_chain),
+		.pcout	(dsp1_p_chain)
 	);
-
-	wire	[63: 0]	a_b;
-	
-	wire	[31: 0]	a_b_lsb = a_b[31: 0];
-	wire	[31: 0]	a_b_msb = a_b[63:32];
-	
-	reg	[31: 0]	a_b_msb_dly;
 	
-	always @(posedge clk) a_b_msb_dly <= a_b_msb;
-	
-	modexpa7_multiplier32_artix7 mul_a_b
+		//
+		// a_hi * b_lo
+		//
+	dsp48e1_wrapper_modexp #
+	(
+		.AREG			(1'b1),
+		.PREG			(1'b0),
+		.A_INPUT		("DIRECT")
+	)
+	dsp2
 	(
-		.clk	(clk),
-		.a		(a),
-		.b		(b),
-		.p		(a_b)
+		.clk		(clk),
+		.opmode	(7'b0010101),
+		.a			({15'd0, a_hi}),
+		.b			({1'd0, b_lo}),
+		.p			(dsp2_p),
+		.acin		(30'd0),
+		.pcin		(dsp3_p_chain),
+		.acout	(a_hi_chain),
+		.pcout	(dsp2_p_chain)
 	);
 	
-	wire	[31: 0]	add_p_s;
-	wire				add_p_c_out;
-	
-	reg	[31: 0]	add_p_s_dly;
-	
-	always @(posedge clk) add_p_s_dly <= add_p_s;
-	
-	assign p = add_p_s_dly;
-	
-	modexpa7_adder32_artix7 add_p
+		//
+		// a_lo * b_hi
+		//
+	dsp48e1_wrapper_modexp #
+	(
+		.AREG			(1'b0),
+		.PREG			(1'b0),
+		.A_INPUT		("CASCADE")
+	)
+	dsp3
 	(
 		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_lsb),
-		.b			(t_c_in_s),
-		.c_in		(1'b0),
-		.s			(add_p_s),
-		.c_out	(add_p_c_out)
-	);
-
-	modexpa7_adder32_artix7 add_c_out
+		.opmode	(7'b1010101),
+		.a			(30'd0),
+		.b			({3'd0, b_hi}),
+		.p			(),
+		.acin		(a_lo_chain),
+		.pcin		(dsp1_p_chain),
+		.acout	(),
+		.pcout	(dsp3_p_chain)
+	);	
+	
+		//
+		// a_hi * b_hi
+		//
+	dsp48e1_wrapper_modexp #
+	(
+		.AREG			(1'b0),
+		.PREG			(1'b1),
+		.A_INPUT		("CASCADE")
+	)
+	dsp4
 	(
 		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_msb_dly),
-		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
-		.c_in		(add_p_c_out),
-		.s			(c_out),
-		.c_out	()
+		.opmode	(7'b1010101),
+		.a			(30'd0),
+		.b			({3'd0, b_hi}),
+		.p			(dsp4_p),
+		.acin		(a_hi_chain),
+		.pcin		(dsp2_p_chain),
+		.acout	(),
+		.pcout	()
 	);
 
 endmodule
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/modexp_systolic_pe_artix7.v
index 08391f5..9cf01f2 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/modexp_systolic_pe_artix7.v
@@ -60,10 +60,9 @@ module modexp_systolic_pe_artix7
 	
 	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
 	
-	modexpa7_adder32_artix7 add_t_c_in
+	adder32_artix7 add_t_c_in
 	(
 		.clk		(clk),
-		.ce		(1'b1),
 		.a			(t_dly),
 		.b			(c_in_dly),
 		.c_in		(1'b0),
@@ -80,7 +79,7 @@ module modexp_systolic_pe_artix7
 	
 	always @(posedge clk) a_b_msb_dly <= a_b_msb;
 	
-	modexpa7_multiplier32_artix7 mul_a_b
+	modexp_multiplier32_artix7 mul_a_b
 	(
 		.clk	(clk),
 		.a		(a),
@@ -97,10 +96,9 @@ module modexp_systolic_pe_artix7
 	
 	assign p = add_p_s_dly;
 	
-	modexpa7_adder32_artix7 add_p
+	adder32_artix7 add_p
 	(
 		.clk		(clk),
-		.ce		(1'b1),
 		.a			(a_b_lsb),
 		.b			(t_c_in_s),
 		.c_in		(1'b0),
@@ -108,10 +106,9 @@ module modexp_systolic_pe_artix7
 		.c_out	(add_p_c_out)
 	);
 
-	modexpa7_adder32_artix7 add_c_out
+	adder32_artix7 add_c_out
 	(
 		.clk		(clk),
-		.ce		(1'b1),
 		.a			(a_b_msb_dly),
 		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
 		.c_in		(add_p_c_out),
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/subtractor32_ce_artix7.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/subtractor32_ce_artix7.v
index 08391f5..c0238ea 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/subtractor32_ce_artix7.v
@@ -1,12 +1,12 @@
 //------------------------------------------------------------------------------
 //
-// modexp_systolic_pe_artix7.v
+// subtractor32_ce_artix7.v
 // -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit subtractor w/ clock enable.
 //
 // Authors: Pavel Shatov
 //
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, NORDUnet A/S
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,57 @@
 //
 //------------------------------------------------------------------------------
 
-module modexp_systolic_pe_artix7
-	(
-		input					clk,
-		input		[31: 0]	a,
-		input		[31: 0]	b,
-		input		[31: 0]	t,
-		input		[31: 0]	c_in,
-		output	[31: 0]	p,
-		output	[31: 0]	c_out
-	);
-	
-	reg	[31: 0]	t_dly;
-	reg	[31: 0]	c_in_dly;
-	
-	always @(posedge clk) t_dly <= t;
-	always @(posedge clk) c_in_dly <= c_in;
-	
-	wire	[31: 0]	t_c_in_s;
-	wire				t_c_in_c_out;
-	
-	reg				t_c_in_c_out_dly;
-	
-	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-	
-	modexpa7_adder32_artix7 add_t_c_in
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(t_dly),
-		.b			(c_in_dly),
-		.c_in		(1'b0),
-		.s			(t_c_in_s),
-		.c_out	(t_c_in_c_out)
-	);
+module subtractor32_ce_artix7
+  (
+   input 	  clk,
+   input      ce,
+   input [31: 0]  a,
+   input [31: 0]  b,
+   output [31: 0] d,
+   input 	  b_in,
+   output 	  b_out
+   );
 
-	wire	[63: 0]	a_b;
-	
-	wire	[31: 0]	a_b_lsb = a_b[31: 0];
-	wire	[31: 0]	a_b_msb = a_b[63:32];
-	
-	reg	[31: 0]	a_b_msb_dly;
-	
-	always @(posedge clk) a_b_msb_dly <= a_b_msb;
-	
-	modexpa7_multiplier32_artix7 mul_a_b
-	(
-		.clk	(clk),
-		.a		(a),
-		.b		(b),
-		.p		(a_b)
-	);
-	
-	wire	[31: 0]	add_p_s;
-	wire				add_p_c_out;
-	
-	reg	[31: 0]	add_p_s_dly;
-	
-	always @(posedge clk) add_p_s_dly <= add_p_s;
-	
-	assign p = add_p_s_dly;
-	
-	modexpa7_adder32_artix7 add_p
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_lsb),
-		.b			(t_c_in_s),
-		.c_in		(1'b0),
-		.s			(add_p_s),
-		.c_out	(add_p_c_out)
-	);
+   //
+   // Lower and higher parts of operand
+   //
+   wire [17: 0]   bl = b[17: 0];
+   wire [13: 0]   bh = b[31:18];
 
-	modexpa7_adder32_artix7 add_c_out
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_msb_dly),
-		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
-		.c_in		(add_p_c_out),
-		.s			(c_out),
-		.c_out	()
-	);
+   //
+   // DSP48E1 Slice
+   //
+
+   /* Operation Mode */
+   wire [ 3: 0]   dsp48e1_alumode	= 4'b0011;
+   wire [ 6: 0]   dsp48e1_opmode		= 7'b0110011;
+
+   /* Internal Product */
+   wire [47: 0]   p_int;
+
+   dsp48e1_wrapper dsp_subtractor
+     (
+      .clk			(clk),
+
+      .ce			(ce),
+
+      .carry		(b_in),
+
+      .alumode		(dsp48e1_alumode),
+      .opmode		(dsp48e1_opmode),
+
+      .a				({{16{1'b0}}, bh}),
+      .b				(bl),
+      .c				({{16{1'b0}}, a}),
+
+      .p				(p_int)
+      );
+
+   //
+   // Output Mapping
+   //
+   assign d 		= p_int[31: 0];
+   assign b_out	= p_int[32];
 
 endmodule
 
diff --git a/lowlevel/cryptech_primitive_switch.vh b/lowlevel/cryptech_primitive_switch.vh
index a4aad45..44b0852 100644
--- a/lowlevel/cryptech_primitive_switch.vh
+++ b/lowlevel/cryptech_primitive_switch.vh
@@ -49,19 +49,29 @@
     //
     // Generic Math Primitives
     //
-`define CRYPTECH_PRIMITIVE_MAC16_GENERIC    mac16_generic
-`define CRYPTECH_PRIMITIVE_ADD32_GENERIC    adder32_generic
-`define CRYPTECH_PRIMITIVE_ADD47_GENERIC    adder47_generic
-`define CRYPTECH_PRIMITIVE_SUB32_GENERIC    subtractor32_generic
+`define CRYPTECH_PRIMITIVE_MAC16_GENERIC mac16_generic
+`define CRYPTECH_PRIMITIVE_ADD32_GENERIC adder32_generic
+`define CRYPTECH_PRIMITIVE_ADD47_GENERIC adder47_generic
+`define CRYPTECH_PRIMITIVE_SUB32_GENERIC subtractor32_generic
+
+`define CRYPTECH_PRIMITIVE_ADD32_CE_GENERIC adder32_ce_generic
+`define CRYPTECH_PRIMITIVE_SUB32_CE_GENERIC subtractor32_ce_generic
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_GENERIC modexp_systolic_pe_generic
     
 
     //
     // Xilinx Math Primitives for Artix-7 Family
     //
-`define CRYPTECH_PRIMITIVE_MAC16_VENDOR     mac16_artix7
-`define CRYPTECH_PRIMITIVE_ADD32_VENDOR     adder32_artix7
-`define CRYPTECH_PRIMITIVE_ADD47_VENDOR     adder47_artix7
-`define CRYPTECH_PRIMITIVE_SUB32_VENDOR     subtractor32_artix7
+`define CRYPTECH_PRIMITIVE_MAC16_VENDOR  mac16_artix7
+`define CRYPTECH_PRIMITIVE_ADD32_VENDOR  adder32_artix7
+`define CRYPTECH_PRIMITIVE_ADD47_VENDOR  adder47_artix7
+`define CRYPTECH_PRIMITIVE_SUB32_VENDOR  subtractor32_artix7
+
+`define CRYPTECH_PRIMITIVE_ADD32_CE_VENDOR adder32_ce_artix7
+`define CRYPTECH_PRIMITIVE_SUB32_CE_VENDOR subtractor32_ce_artix7
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_VENDOR modexp_systolic_pe_artix7
 
 
 
@@ -78,6 +88,11 @@
 `define CRYPTECH_PRIMITIVE_ADD47 `CRYPTECH_PRIMITIVE_ADD47_GENERIC
 `define CRYPTECH_PRIMITIVE_SUB32 `CRYPTECH_PRIMITIVE_SUB32_GENERIC
 
+`define CRYPTECH_PRIMITIVE_ADD32_CE `CRYPTECH_PRIMITIVE_ADD32_CE_GENERIC
+`define CRYPTECH_PRIMITIVE_SUB32_CE `CRYPTECH_PRIMITIVE_SUB32_CE_GENERIC
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE `CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_GENERIC
+
 `else
 
     // vendor-specific primitives
@@ -86,6 +101,11 @@
 `define CRYPTECH_PRIMITIVE_ADD32 `CRYPTECH_PRIMITIVE_ADD32_VENDOR
 `define CRYPTECH_PRIMITIVE_SUB32 `CRYPTECH_PRIMITIVE_SUB32_VENDOR
 
+`define CRYPTECH_PRIMITIVE_ADD32_CE `CRYPTECH_PRIMITIVE_ADD32_CE_VENDOR
+`define CRYPTECH_PRIMITIVE_SUB32_CE `CRYPTECH_PRIMITIVE_SUB32_CE_VENDOR
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE `CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_VENDOR
+
 `endif
 
 
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/generic/adder32_ce_generic.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/generic/adder32_ce_generic.v
index 08391f5..329bcbf 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/generic/adder32_ce_generic.v
@@ -1,12 +1,12 @@
 //------------------------------------------------------------------------------
 //
-// modexp_systolic_pe_artix7.v
+// adder32_ce_generic.v
 // -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Generic 32-bit adder w/ clock enable.
 //
 // Authors: Pavel Shatov
 //
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, 2018 NORDUnet A/S
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,30 @@
 //
 //------------------------------------------------------------------------------
 
-module modexp_systolic_pe_artix7
-	(
-		input					clk,
-		input		[31: 0]	a,
-		input		[31: 0]	b,
-		input		[31: 0]	t,
-		input		[31: 0]	c_in,
-		output	[31: 0]	p,
-		output	[31: 0]	c_out
-	);
-	
-	reg	[31: 0]	t_dly;
-	reg	[31: 0]	c_in_dly;
-	
-	always @(posedge clk) t_dly <= t;
-	always @(posedge clk) c_in_dly <= c_in;
-	
-	wire	[31: 0]	t_c_in_s;
-	wire				t_c_in_c_out;
-	
-	reg				t_c_in_c_out_dly;
-	
-	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-	
-	modexpa7_adder32_artix7 add_t_c_in
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(t_dly),
-		.b			(c_in_dly),
-		.c_in		(1'b0),
-		.s			(t_c_in_s),
-		.c_out	(t_c_in_c_out)
-	);
+module adder32_ce_generic
+  (
+   input 	  clk, // clock
+   input      ce, // clock enable
+   input [31: 0]  a, // operand input
+   input [31: 0]  b, // operand input
+   output [31: 0] s, // sum output
+   input 	  c_in, // carry input
+   output 	  c_out		// carry output
+   );
 
-	wire	[63: 0]	a_b;
-	
-	wire	[31: 0]	a_b_lsb = a_b[31: 0];
-	wire	[31: 0]	a_b_msb = a_b[63:32];
-	
-	reg	[31: 0]	a_b_msb_dly;
-	
-	always @(posedge clk) a_b_msb_dly <= a_b_msb;
-	
-	modexpa7_multiplier32_artix7 mul_a_b
-	(
-		.clk	(clk),
-		.a		(a),
-		.b		(b),
-		.p		(a_b)
-	);
-	
-	wire	[31: 0]	add_p_s;
-	wire				add_p_c_out;
-	
-	reg	[31: 0]	add_p_s_dly;
-	
-	always @(posedge clk) add_p_s_dly <= add_p_s;
-	
-	assign p = add_p_s_dly;
-	
-	modexpa7_adder32_artix7 add_p
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_lsb),
-		.b			(t_c_in_s),
-		.c_in		(1'b0),
-		.s			(add_p_s),
-		.c_out	(add_p_c_out)
-	);
+   //
+   // Sum
+   //
+   reg [32: 0] 	  s_int;
 
-	modexpa7_adder32_artix7 add_c_out
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_msb_dly),
-		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
-		.c_in		(add_p_c_out),
-		.s			(c_out),
-		.c_out	()
-	);
+   always @(posedge clk)
+     if (ce) s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in};
+
+   //
+   // Output
+   //
+   assign s = s_int[31:0];
+   assign c_out = s_int[32];
 
 endmodule
 
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/generic/subtractor32_ce_generic.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/generic/subtractor32_ce_generic.v
index 08391f5..da97e76 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/generic/subtractor32_ce_generic.v
@@ -1,12 +1,12 @@
 //------------------------------------------------------------------------------
 //
-// modexp_systolic_pe_artix7.v
+// subtractor32_ce_generic.v
 // -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Generic 32-bit subtractor w/ clock enable.
 //
 // Authors: Pavel Shatov
 //
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, NORDUnet A/S
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,30 @@
 //
 //------------------------------------------------------------------------------
 
-module modexp_systolic_pe_artix7
-	(
-		input					clk,
-		input		[31: 0]	a,
-		input		[31: 0]	b,
-		input		[31: 0]	t,
-		input		[31: 0]	c_in,
-		output	[31: 0]	p,
-		output	[31: 0]	c_out
-	);
-	
-	reg	[31: 0]	t_dly;
-	reg	[31: 0]	c_in_dly;
-	
-	always @(posedge clk) t_dly <= t;
-	always @(posedge clk) c_in_dly <= c_in;
-	
-	wire	[31: 0]	t_c_in_s;
-	wire				t_c_in_c_out;
-	
-	reg				t_c_in_c_out_dly;
-	
-	always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-	
-	modexpa7_adder32_artix7 add_t_c_in
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(t_dly),
-		.b			(c_in_dly),
-		.c_in		(1'b0),
-		.s			(t_c_in_s),
-		.c_out	(t_c_in_c_out)
-	);
+module subtractor32_ce_generic
+  (
+   input 	  clk,
+   input      ce,
+   input [31: 0]  a,
+   input [31: 0]  b,
+   output [31: 0] d,
+   input 	  b_in,
+   output 	  b_out
+   );
 
-	wire	[63: 0]	a_b;
-	
-	wire	[31: 0]	a_b_lsb = a_b[31: 0];
-	wire	[31: 0]	a_b_msb = a_b[63:32];
-	
-	reg	[31: 0]	a_b_msb_dly;
-	
-	always @(posedge clk) a_b_msb_dly <= a_b_msb;
-	
-	modexpa7_multiplier32_artix7 mul_a_b
-	(
-		.clk	(clk),
-		.a		(a),
-		.b		(b),
-		.p		(a_b)
-	);
-	
-	wire	[31: 0]	add_p_s;
-	wire				add_p_c_out;
-	
-	reg	[31: 0]	add_p_s_dly;
-	
-	always @(posedge clk) add_p_s_dly <= add_p_s;
-	
-	assign p = add_p_s_dly;
-	
-	modexpa7_adder32_artix7 add_p
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_lsb),
-		.b			(t_c_in_s),
-		.c_in		(1'b0),
-		.s			(add_p_s),
-		.c_out	(add_p_c_out)
-	);
+   //
+   // Difference
+   //
+   reg [32: 0] 	  d_int;
 
-	modexpa7_adder32_artix7 add_c_out
-	(
-		.clk		(clk),
-		.ce		(1'b1),
-		.a			(a_b_msb_dly),
-		.b			({{31{1'b0}}, t_c_in_c_out_dly}),
-		.c_in		(add_p_c_out),
-		.s			(c_out),
-		.c_out	()
-	);
+   always @(posedge clk)
+     if (ce) d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in};
+
+   //
+   // Output
+   //
+   assign d = d_int[31:0];
+   assign b_out = d_int[32];
 
 endmodule
 

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list