[Cryptech-Commits] [core/lib] branch master updated: Added primitives with clock enable ports. Added primitives from ModExp.
git at cryptech.is
git at cryptech.is
Thu Dec 20 10:45:33 UTC 2018
This is an automated email from the git hooks/post-receive script.
meisterpaul1 at yandex.ru pushed a commit to branch master
in repository core/lib.
The following commit(s) were added to refs/heads/master by this push:
new fbcbd42 Added primitives with clock enable ports. Added primitives from ModExp.
fbcbd42 is described below
commit fbcbd4218e2711da279d8097620a5b26637bf45b
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Wed Dec 19 15:27:04 2018 +0300
Added primitives with clock enable ports.
Added primitives from ModExp.
---
bench/tb_lowlevel_adder32.v | 175 ++++++++++
bench/tb_lowlevel_adder47.v | 151 +++++++++
bench/tb_lowlevel_subtractor32.v | 174 ++++++++++
bench/tb_modular_adder.v | 357 +++++++++++++++++++++
bench/tb_modular_subtractor.v | 356 ++++++++++++++++++++
bench/tb_mw_comparator.v | 322 +++++++++++++++++++
bench/tb_mw_mover.v | 282 ++++++++++++++++
...xp_systolic_pe_artix7.v => adder32_ce_artix7.v} | 141 ++++----
lowlevel/artix7/dsp48e1_wrapper_modexp.v | 2 +-
...ic_pe_artix7.v => modexp_multiplier32_artix7.v} | 175 ++++++----
lowlevel/artix7/modexp_systolic_pe_artix7.v | 11 +-
...stolic_pe_artix7.v => subtractor32_ce_artix7.v} | 135 +++-----
lowlevel/cryptech_primitive_switch.vh | 36 ++-
.../adder32_ce_generic.v} | 108 ++-----
.../subtractor32_ce_generic.v} | 108 ++-----
15 files changed, 2118 insertions(+), 415 deletions(-)
diff --git a/bench/tb_lowlevel_adder32.v b/bench/tb_lowlevel_adder32.v
new file mode 100644
index 0000000..70fbb15
--- /dev/null
+++ b/bench/tb_lowlevel_adder32.v
@@ -0,0 +1,175 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg c_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] s;
+ wire c_out;
+
+ //
+ // Test Vectors {a, b, c_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no carry
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with carry
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no carry
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with carry
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no carry
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with carry
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no carry
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with carry
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no carry
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no carry
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no carry
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no carry
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with carry
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with carry
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with carry
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with carry
+
+
+ //
+ // UUT
+ //
+ adder32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s),
+ .c_in (c_in),
+ .c_out (c_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder32(vec_0);
+ test_adder32(vec_1);
+ test_adder32(vec_2);
+ test_adder32(vec_3);
+ test_adder32(vec_4);
+ test_adder32(vec_5);
+ test_adder32(vec_6);
+ test_adder32(vec_7);
+ //
+ test_adder32(vec_8);
+ test_adder32(vec_9);
+ test_adder32(vec_10);
+ test_adder32(vec_11);
+ test_adder32(vec_12);
+ test_adder32(vec_13);
+ test_adder32(vec_14);
+ test_adder32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_adder32: SUCCESS");
+ else $display("tb_lowlevel_adder32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ c_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {cc, ss} = {1'b0, a} + {1'b0, b} + {32'd0, c_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss) && (c_out == cc);
+
+ /* display results */
+ $display("test_adder32(): 0x%08X + 0x%08X + %01d = {%01d, 0x%08X} [%0s]", a, b, c_in, c_out, s, ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_adder47.v b/bench/tb_lowlevel_adder47.v
new file mode 100644
index 0000000..663e8f9
--- /dev/null
+++ b/bench/tb_lowlevel_adder47.v
@@ -0,0 +1,151 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_adder47.v
+// -----------------------------------------------------------------------------
+// Testbench for 47-bit adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+
+module tb_lowlevel_adder47;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [46: 0] a;
+ reg [46: 0] b;
+
+ //
+ // Outputs
+ //
+ wire [46: 0] s;
+
+ //
+ // Test Vectors {a, b}
+ //
+ wire [93: 0] vec_0 = {47'h2a87ca22be8b, 47'h05378eb1c71e};
+ wire [93: 0] vec_1 = {47'h7320ad746e1d, 47'h3b628ba79b98};
+ wire [93: 0] vec_2 = {47'h59f741e08254, 47'h2a385502f25d};
+ wire [93: 0] vec_3 = {47'h3f55296c3a54, 47'h5e3872760ab7};
+ wire [93: 0] vec_4 = {47'h3617de4a9626, 47'h2c6f5d9e98bf};
+ wire [93: 0] vec_5 = {47'h1292dc29f8f4, 47'h1dbd289a147c};
+ wire [93: 0] vec_6 = {47'h69da3113b5f0, 47'h38c00a60b1ce};
+ wire [93: 0] vec_7 = {47'h1d7e819d7a43, 47'h1d7c90ea0e5f};
+
+ //
+ // UUT
+ //
+ adder47_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .s (s)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_adder47(vec_0);
+ test_adder47(vec_1);
+ test_adder47(vec_2);
+ test_adder47(vec_3);
+ test_adder47(vec_4);
+ test_adder47(vec_5);
+ test_adder47(vec_6);
+ test_adder47(vec_7);
+ //
+ if (ok) $display("tb_lowlevel_adder47: SUCCESS");
+ else $display("tb_lowlevel_adder47: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [46: 0] ss; // reference value of sum
+ reg cc; // reference value of carry
+ reg ss_ok; // result matches reference value
+
+ task test_adder47;
+
+ input [93: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[93:47];
+ b = vec[46: 0];
+
+ /* calculate reference values */
+ ss = a + b;
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ ss_ok = (s == ss);
+
+ /* display results */
+ $display("test_adder47(): %s", ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && ss_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_lowlevel_subtractor32.v b/bench/tb_lowlevel_subtractor32.v
new file mode 100644
index 0000000..fd96000
--- /dev/null
+++ b/bench/tb_lowlevel_subtractor32.v
@@ -0,0 +1,174 @@
+//------------------------------------------------------------------------------
+//
+// tb_lowlevel_subtractor32.v
+// -----------------------------------------------------------------------------
+// Testbench for 32-bit subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_lowlevel_subtractor32;
+
+ //
+ // Inputs
+ //
+ reg clk;
+ reg [31: 0] a;
+ reg [31: 0] b;
+ reg b_in;
+
+ //
+ // Outputs
+ //
+ wire [31: 0] d;
+ wire b_out;
+
+ //
+ // Test Vectors {a, b, b_in}
+ //
+ wire [64: 0] vec_0 = {32'h00000000, 32'h00000000, 1'b0}; // all zeroes, no borrow
+ wire [64: 0] vec_1 = {32'h00000000, 32'h00000000, 1'b1}; // all zeroes with borrow
+ wire [64: 0] vec_2 = {32'h00000000, 32'hFFFFFFFF, 1'b0}; // zeroes and ones, no borrow
+ wire [64: 0] vec_3 = {32'h00000000, 32'hFFFFFFFF, 1'b1}; // zeroes and ones with borrow
+ wire [64: 0] vec_4 = {32'hFFFFFFFF, 32'h00000000, 1'b0}; // ones and zeroes, no borrow
+ wire [64: 0] vec_5 = {32'hFFFFFFFF, 32'h00000000, 1'b1}; // ones and zeroes with borrow
+ wire [64: 0] vec_6 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b0}; // all ones, no borrow
+ wire [64: 0] vec_7 = {32'hFFFFFFFF, 32'hFFFFFFFF, 1'b1}; // all ones with borrow
+
+ wire [64: 0] vec_8 = {32'hd898c296, 32'h37bf51f5, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_9 = {32'hf4a13945, 32'hcbb64068, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_10 = {32'h2deb33a0, 32'h6b315ece, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_11 = {32'h77037d81, 32'h2bce3357, 1'b0}; // random values, no borrow
+ wire [64: 0] vec_12 = {32'h63a440f2, 32'h7c0f9e16, 1'b1}; // random values with borrow
+ wire [64: 0] vec_13 = {32'hf8bce6e5, 32'h8ee7eb4a, 1'b1}; // random values with borrow
+ wire [64: 0] vec_14 = {32'he12c4247, 32'hfe1a7f9b, 1'b1}; // random values with borrow
+ wire [64: 0] vec_15 = {32'h6b17d1f2, 32'h4fe342e2, 1'b1}; // random values with borrow
+
+
+ //
+ // UUT
+ //
+ subtractor32_wrapper uut
+ (
+ .clk (clk),
+ .a (a),
+ .b (b),
+ .d (d),
+ .b_in (b_in),
+ .b_out (b_out)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+ //
+ clk = 0;
+ //
+ #100;
+ //
+ test_subtractor32(vec_0);
+ test_subtractor32(vec_1);
+ test_subtractor32(vec_2);
+ test_subtractor32(vec_3);
+ test_subtractor32(vec_4);
+ test_subtractor32(vec_5);
+ test_subtractor32(vec_6);
+ test_subtractor32(vec_7);
+ //
+ test_subtractor32(vec_8);
+ test_subtractor32(vec_9);
+ test_subtractor32(vec_10);
+ test_subtractor32(vec_11);
+ test_subtractor32(vec_12);
+ test_subtractor32(vec_13);
+ test_subtractor32(vec_14);
+ test_subtractor32(vec_15);
+ //
+ if (ok) $display("tb_lowlevel_subtractor32: SUCCESS");
+ else $display("tb_lowlevel_subtractor32: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Routine
+ //
+ reg [31: 0] dd; // reference value of difference
+ reg bb; // reference value of borrow
+ reg dd_ok; // result matches reference value
+
+ task test_subtractor32;
+
+ input [64: 0] vec;
+
+ begin
+
+ /* break down test vector */
+ a = vec[64:33];
+ b = vec[32: 1];
+ b_in = vec[ 0: 0];
+
+ /* calculate reference values */
+ {bb, dd} = {1'b0, a} - {1'b0, b} - {32'd0, b_in};
+
+ /* send one clock tick */
+ #10 clk = 1;
+ #10 clk = 0;
+
+ /* check outputs */
+ dd_ok = (d == dd) && (b_out == bb);
+
+ /* display results */
+ $display("test_subtractor32(): 0x%08X - (0x%08X + %01d) = {%01d, 0x%08X} [%0s]", a, b, b_in, b_out, d, dd_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && dd_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_adder.v b/bench/tb_modular_adder.v
new file mode 100644
index 0000000..713ff22
--- /dev/null
+++ b/bench/tb_modular_adder.v
@@ -0,0 +1,357 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_adder_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word adder.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_adder_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_s_addr;
+ wire core_s_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_s_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_s_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_s_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_s_addr),
+ .a_wr (core_s_wren),
+ .a_in (core_s_data),
+ .a_out (),
+
+ .b_addr (tb_s_addr),
+ .b_out (tb_s_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .s_addr (core_s_addr),
+ .s_wren (core_s_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .s_dout (core_s_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_adder(X_1, Y_1, N);
+ test_modular_adder(X_2, Y_2, N);
+ test_modular_adder(Y_1, X_1, N);
+ test_modular_adder(Y_2, X_2, N);
+
+ test_modular_adder(X_1, X_2, N);
+ test_modular_adder(X_2, X_1, N);
+ test_modular_adder(Y_1, Y_2, N);
+ test_modular_adder(Y_2, Y_1, N);
+
+ test_modular_adder(X_1, Y_2, N);
+ test_modular_adder(Y_2, X_1, N);
+ test_modular_adder(X_2, Y_1, N);
+ test_modular_adder(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_adder_256: SUCCESS");
+ else $display("tb_modular_adder_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] s;
+ wire [255:0] s_dummy = s[255:0];
+ reg s_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] s_shreg;
+
+ task test_modular_adder;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ s = {1'b0, x} + {1'b0, y};
+ if (s >= {1'b0, n})
+ s = s - {1'b0, n};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_s_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ s_shreg = {tb_s_data, s_shreg[255:32]};
+
+ end
+
+ /* compare */
+ s_ok = (s_shreg == s[255:0]);
+
+ /* display results */
+ $display("test_modular_adder(): %s", s_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && s_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_modular_subtractor.v b/bench/tb_modular_subtractor.v
new file mode 100644
index 0000000..6cf0e01
--- /dev/null
+++ b/bench/tb_modular_subtractor.v
@@ -0,0 +1,356 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_subtractor_256.v
+// -----------------------------------------------------------------------------
+// Testbench for modular multi-word subtractor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_modular_subtractor_256;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] N = 256'hffffffff00000001000000000000000000000000ffffffffffffffffffffffff;
+
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] Y_1 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+
+ localparam [255:0] X_2 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] Y_2 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y, N)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_n_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_d_addr;
+ wire core_d_wren;
+
+ wire [ 31:0] core_x_data;
+ wire [ 31:0] core_y_data;
+ wire [ 31:0] core_n_data;
+ wire [ 31:0] core_d_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xyn_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_d_addr;
+ reg tb_xyn_wren;
+
+ reg [ 31:0] tb_x_data;
+ reg [ 31:0] tb_y_data;
+ reg [ 31:0] tb_n_data;
+ wire [ 31:0] tb_d_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_n
+ (
+ .clk (clk),
+
+ .a_addr (tb_xyn_addr),
+ .a_wr (tb_xyn_wren),
+ .a_in (tb_n_data),
+ .a_out (),
+
+ .b_addr (core_n_addr),
+ .b_out (core_n_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_s
+ (
+ .clk (clk),
+
+ .a_addr (core_d_addr),
+ .a_wr (core_d_wren),
+ .a_in (core_d_data),
+ .a_out (),
+
+ .b_addr (tb_d_addr),
+ .b_out (tb_d_data)
+ );
+
+
+ //
+ // UUT
+ //
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .ab_addr (core_xy_addr),
+ .n_addr (core_n_addr),
+ .d_addr (core_d_addr),
+ .d_wren (core_d_wren),
+
+ .a_din (core_x_data),
+ .b_din (core_y_data),
+ .n_din (core_n_data),
+ .d_dout (core_d_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xyn_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_subtractor(X_1, Y_1, N);
+ test_modular_subtractor(X_2, Y_2, N);
+ test_modular_subtractor(Y_1, X_1, N);
+ test_modular_subtractor(Y_2, X_2, N);
+
+ test_modular_subtractor(X_1, X_2, N);
+ test_modular_subtractor(X_2, X_1, N);
+ test_modular_subtractor(Y_1, Y_2, N);
+ test_modular_subtractor(Y_2, Y_1, N);
+
+ test_modular_subtractor(X_1, Y_2, N);
+ test_modular_subtractor(Y_2, X_1, N);
+ test_modular_subtractor(X_2, Y_1, N);
+ test_modular_subtractor(Y_1, X_2, N);
+
+ /* print result */
+ if (ok) $display("tb_modular_subtractor_256: SUCCESS");
+ else $display("tb_modular_subtractor_256: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [256:0] d;
+ wire [255:0] d_dummy = d[255:0];
+ reg d_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+ reg [255:0] n_shreg;
+ reg [255:0] d_shreg;
+
+ task test_modular_subtractor;
+
+ input [255:0] x;
+ input [255:0] y;
+ input [255:0] n;
+
+ begin
+
+ /* start filling memories */
+ tb_xyn_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+ n_shreg = n;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xyn_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+ tb_n_data = n_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+ n_shreg = {{32{1'bX}}, n_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xyn_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+ tb_n_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xyn_wren = 0;
+
+ /* calculate reference value */
+ d = {1'b0, (x < y) ? n : {256{1'b0}}};
+ d = d + {1'b0, x} - {1'b0, y};
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_d_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ d_shreg = {tb_d_data, d_shreg[255:32]};
+
+ end
+
+ /* compare */
+ d_ok = (d_shreg == d[255:0]);
+
+ /* display results */
+ $display("test_modular_subtractor(): %s", d_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && d_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_comparator.v b/bench/tb_mw_comparator.v
new file mode 100644
index 0000000..ed830a8
--- /dev/null
+++ b/bench/tb_mw_comparator.v
@@ -0,0 +1,322 @@
+//------------------------------------------------------------------------------
+//
+// tb_mw_comparator.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word comparator.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_comparator;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [383:0] A_0 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_1 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F75;
+ localparam [383:0] A_2 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F77;
+
+ localparam [383:0] A_3 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_375D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_4 = 384'hBDC7B53C_616B13B5_77622510_75BA95FC_575D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_5 = 384'hBDC7B53C_616B13B5_77622510_75BA95FB_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_6 = 384'hBDC7B53C_616B13B5_77622510_75BA95FD_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] A_7 = 384'hADC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+ localparam [383:0] A_8 = 384'hCDC7B53C_616B13B5_77622510_75BA95FC_475D568B_79E730D4_18A9143C_18905F76;
+
+ localparam [383:0] B_0 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_1 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E4;
+ localparam [383:0] B_2 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E6;
+
+ localparam [383:0] B_3 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_9D784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_4 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C0_BD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_5 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3BF_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_6 = 384'h348A6D1F_7C66D21E_8D1490D9_AA6AE3C1_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+ localparam [383:0] B_7 = 384'h248A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+ localparam [383:0] B_8 = 384'h448A6D1F_7C66D21E_8D1490D9_AA6AE3C0_AD784F98_850046D4_10DDD64D_F6BB32E5;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ parameter OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+ wire core_cmp_l;
+ wire core_cmp_e;
+ wire core_cmp_g;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_xy_addr;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_xy_addr;
+ reg tb_xy_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ reg [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_y
+ (
+ .clk (clk),
+
+ .a_addr (tb_xy_addr),
+ .a_wr (tb_xy_wren),
+ .a_in (tb_y_data),
+ .a_out (),
+
+ .b_addr (core_xy_addr),
+ .b_out (core_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .xy_addr (core_xy_addr),
+ .x_din (core_x_data),
+ .y_din (core_y_data),
+
+ .cmp_l (core_cmp_l),
+ .cmp_e (core_cmp_e),
+ .cmp_g (core_cmp_g)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_xy_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_mw_comparator(A_0, A_0);
+
+ test_mw_comparator(A_0, A_1);
+ test_mw_comparator(A_0, A_2);
+ test_mw_comparator(A_0, A_3);
+ test_mw_comparator(A_0, A_4);
+ test_mw_comparator(A_0, A_5);
+ test_mw_comparator(A_0, A_6);
+ test_mw_comparator(A_0, A_7);
+ test_mw_comparator(A_0, A_8);
+
+ test_mw_comparator(B_0, B_0);
+
+ test_mw_comparator(B_0, B_1);
+ test_mw_comparator(B_0, B_2);
+ test_mw_comparator(B_0, B_3);
+ test_mw_comparator(B_0, B_4);
+ test_mw_comparator(B_0, B_5);
+ test_mw_comparator(B_0, B_6);
+ test_mw_comparator(B_0, B_7);
+ test_mw_comparator(B_0, B_8);
+
+ /* print result */
+ if (ok) $display("tb_mw_comparator: SUCCESS");
+ else $display("tb_mw_comparator: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg cmp_l;
+ reg cmp_e;
+ reg cmp_g;
+ reg cmp_ok;
+
+ integer w;
+
+ task test_mw_comparator;
+
+ input [255:0] x;
+ input [255:0] y;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ begin
+
+ /* start filling memories */
+ tb_xy_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+ y_shreg = y;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_xy_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+ tb_y_data = y_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+ y_shreg = {{32{1'bX}}, y_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_xy_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+ tb_y_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_xy_wren = 0;
+
+ /* calculate reference values */
+ cmp_l = (x < y) ? 1 : 0;
+ cmp_e = (x == y) ? 1 : 0;
+ cmp_g = (x > y) ? 1 : 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* compare */
+ cmp_ok = (cmp_l == core_cmp_l) && (cmp_e == core_cmp_e) && (cmp_g == core_cmp_g);
+
+ /* display results */
+ $display("test_mw_comparator(): %s", cmp_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && cmp_ok;
+
+ end
+
+ endtask
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/bench/tb_mw_mover.v b/bench/tb_mw_mover.v
new file mode 100644
index 0000000..08bdb9e
--- /dev/null
+++ b/bench/tb_mw_mover.v
@@ -0,0 +1,282 @@
+//------------------------------------------------------------------------------
+//
+// tb_modular_mover.v
+// -----------------------------------------------------------------------------
+// Testbench for multi-word data mover.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+`timescale 1ns / 1ps
+//------------------------------------------------------------------------------
+
+module tb_mw_mover;
+
+
+ //
+ // Test Vectors
+ //
+ localparam [255:0] X_1 = 256'h1ddbd0769df27bab1e234019dad09dccce1e87e2193b417ffa1a3465d7439ecd;
+ localparam [255:0] X_2 = 256'h1f67cdc34bac91a072945d212f0a03442fc4855788583ecb7b2e375ad3848210;
+ localparam [255:0] X_3 = 256'hff563f653b1392a6fa6b0295a280f7a904a11e22d8ae468e220301d8ac232fcf;
+ localparam [255:0] X_4 = 256'hf6f53c4b57b25453b68e923fb118e4f753d74af01fc58476dd15a80933453899;
+
+
+ //
+ // Core Parameters
+ //
+ localparam WORD_COUNTER_WIDTH = 3;
+ localparam OPERAND_NUM_WORDS = 8;
+
+
+ //
+ // Clock (100 MHz)
+ //
+ reg clk = 1'b0;
+ always #5 clk = ~clk;
+
+
+ //
+ // Inputs, Outputs
+ //
+ reg rst_n;
+ reg ena;
+ wire rdy;
+
+
+ //
+ // Buffers (X, Y)
+ //
+ wire [WORD_COUNTER_WIDTH-1:0] core_x_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] core_y_addr;
+ wire core_y_wren;
+
+ wire [ 32-1:0] core_x_data;
+ wire [ 32-1:0] core_y_data;
+
+ reg [WORD_COUNTER_WIDTH-1:0] tb_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] tb_y_addr;
+ reg tb_x_wren;
+
+ reg [ 32-1:0] tb_x_data;
+ wire [ 32-1:0] tb_y_data;
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_x
+ (
+ .clk (clk),
+
+ .a_addr (tb_x_addr),
+ .a_wr (tb_x_wren),
+ .a_in (tb_x_data),
+ .a_out (),
+
+ .b_addr (core_x_addr),
+ .b_out (core_x_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH)
+ )
+ bram_d
+ (
+ .clk (clk),
+
+ .a_addr (core_y_addr),
+ .a_wr (core_y_wren),
+ .a_in (core_y_data),
+ .a_out (),
+
+ .b_addr (tb_y_addr),
+ .b_out (tb_y_data)
+ );
+
+
+ //
+ // UUT
+ //
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ uut
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (ena),
+ .rdy (rdy),
+
+ .x_addr (core_x_addr),
+ .y_addr (core_y_addr),
+ .y_wren (core_y_wren),
+
+ .x_din (core_x_data),
+ .y_dout (core_y_data)
+ );
+
+
+ //
+ // Testbench Routine
+ //
+ reg ok = 1;
+ initial begin
+
+ /* initialize control inputs */
+ rst_n = 0;
+ ena = 0;
+
+ tb_x_wren = 0;
+
+ /* wait for some time */
+ #200;
+
+ /* de-assert reset */
+ rst_n = 1;
+
+ /* wait for some time */
+ #100;
+
+ /* run tests */
+ test_modular_mover(X_1);
+ test_modular_mover(X_2);
+ test_modular_mover(X_3);
+ test_modular_mover(X_4);
+
+ /* print result */
+ if (ok) $display("tb_modular_mover: SUCCESS");
+ else $display("tb_modular_mover: FAILURE");
+ //
+ $finish;
+ //
+ end
+
+
+ //
+ // Test Task
+ //
+ reg [255:0] y;
+ reg y_ok;
+
+ integer w;
+
+ reg [255:0] x_shreg;
+ reg [255:0] y_shreg;
+
+ task test_modular_mover;
+
+ input [255:0] x;
+
+ begin
+
+ /* start filling memories */
+ tb_x_wren = 1;
+
+ /* initialize shift registers */
+ x_shreg = x;
+
+ /* write all the words */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set addresses */
+ tb_x_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* set data words */
+ tb_x_data = x_shreg[31:0];
+
+ /* shift inputs */
+ x_shreg = {{32{1'bX}}, x_shreg[255:32]};
+
+ /* wait for 1 clock tick */
+ #10;
+
+ end
+
+ /* wipe addresses */
+ tb_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+
+ /* wipe data words */
+ tb_x_data = {32{1'bX}};
+
+ /* stop filling memories */
+ tb_x_wren = 0;
+
+ /* start operation */
+ ena = 1;
+
+ /* clear flag */
+ #10 ena = 0;
+
+ /* wait for operation to complete */
+ while (!rdy) #10;
+
+ /* read result */
+ for (w=0; w<OPERAND_NUM_WORDS; w=w+1) begin
+
+ /* set address */
+ tb_y_addr = w[WORD_COUNTER_WIDTH-1:0];
+
+ /* wait for 1 clock tick */
+ #10;
+
+ /* store data word */
+ y_shreg = {tb_y_data, y_shreg[255:32]};
+
+ end
+
+ /* compare */
+ y_ok = (y_shreg == x);
+
+ /* display results */
+ $display("test_modular_mover(): %s", y_ok ? "OK" : "ERROR");
+
+ /* update global flag */
+ ok = ok && y_ok;
+
+ end
+
+ endtask
+
+
+endmodule
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/adder32_ce_artix7.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/adder32_ce_artix7.v
index 08391f5..0f6d44d 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/adder32_ce_artix7.v
@@ -1,12 +1,12 @@
//------------------------------------------------------------------------------
//
-// modexp_systolic_pe_artix7.v
+// adder32_ce_artix7.v
// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit adder w/ clock enable.
//
// Authors: Pavel Shatov
//
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, 2018 NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,59 @@
//
//------------------------------------------------------------------------------
-module modexp_systolic_pe_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- input [31: 0] t,
- input [31: 0] c_in,
- output [31: 0] p,
- output [31: 0] c_out
- );
-
- reg [31: 0] t_dly;
- reg [31: 0] c_in_dly;
-
- always @(posedge clk) t_dly <= t;
- always @(posedge clk) c_in_dly <= c_in;
-
- wire [31: 0] t_c_in_s;
- wire t_c_in_c_out;
-
- reg t_c_in_c_out_dly;
-
- always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-
- modexpa7_adder32_artix7 add_t_c_in
- (
- .clk (clk),
- .ce (1'b1),
- .a (t_dly),
- .b (c_in_dly),
- .c_in (1'b0),
- .s (t_c_in_s),
- .c_out (t_c_in_c_out)
- );
-
- wire [63: 0] a_b;
-
- wire [31: 0] a_b_lsb = a_b[31: 0];
- wire [31: 0] a_b_msb = a_b[63:32];
-
- reg [31: 0] a_b_msb_dly;
-
- always @(posedge clk) a_b_msb_dly <= a_b_msb;
-
- modexpa7_multiplier32_artix7 mul_a_b
- (
- .clk (clk),
- .a (a),
- .b (b),
- .p (a_b)
- );
-
- wire [31: 0] add_p_s;
- wire add_p_c_out;
-
- reg [31: 0] add_p_s_dly;
-
- always @(posedge clk) add_p_s_dly <= add_p_s;
-
- assign p = add_p_s_dly;
-
- modexpa7_adder32_artix7 add_p
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_lsb),
- .b (t_c_in_s),
- .c_in (1'b0),
- .s (add_p_s),
- .c_out (add_p_c_out)
- );
-
- modexpa7_adder32_artix7 add_c_out
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_msb_dly),
- .b ({{31{1'b0}}, t_c_in_c_out_dly}),
- .c_in (add_p_c_out),
- .s (c_out),
- .c_out ()
- );
+module adder32_ce_artix7
+ (
+ input clk, // clock
+ input ce, // clock enable
+ input [31: 0] a, // operand input
+ input [31: 0] b, // operand input
+ output [31: 0] s, // sum output
+ input c_in, // carry input
+ output c_out // carry output
+ );
+
+ //
+ // Lower and higher parts of operand
+ //
+ wire [17: 0] bl = b[17: 0];
+ wire [13: 0] bh = b[31:18];
+
+
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0000;
+ wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_adder
+ (
+ .clk (clk),
+
+ .ce (ce),
+
+ .carry (c_in),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({{16{1'b0}}, bh}),
+ .b (bl),
+ .c ({{16{1'b0}}, a}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign s = p_int[31: 0];
+ assign c_out = p_int[32];
+
endmodule
diff --git a/lowlevel/artix7/dsp48e1_wrapper_modexp.v b/lowlevel/artix7/dsp48e1_wrapper_modexp.v
index 17d8efe..27c8bf8 100644
--- a/lowlevel/artix7/dsp48e1_wrapper_modexp.v
+++ b/lowlevel/artix7/dsp48e1_wrapper_modexp.v
@@ -36,7 +36,7 @@
//
//------------------------------------------------------------------------------
-module modexpa7_dsp48e1_wrapper_modexp #
+module dsp48e1_wrapper_modexp #
(
parameter AREG = 1'b0,
parameter PREG = 1'b0,
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/modexp_multiplier32_artix7.v
similarity index 50%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/modexp_multiplier32_artix7.v
index 08391f5..d4bd3f4 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/modexp_multiplier32_artix7.v
@@ -1,8 +1,8 @@
//------------------------------------------------------------------------------
//
-// modexp_systolic_pe_artix7.v
+// modexp_multiplier32_artix7.v
// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit multiplier.
//
// Authors: Pavel Shatov
//
@@ -36,87 +36,132 @@
//
//------------------------------------------------------------------------------
-module modexp_systolic_pe_artix7
+module modexp_multiplier32_artix7
(
input clk,
input [31: 0] a,
input [31: 0] b,
- input [31: 0] t,
- input [31: 0] c_in,
- output [31: 0] p,
- output [31: 0] c_out
+ output [63: 0] p
);
+
+ /* split a, b into smaller words */
+ wire [16: 0] a_lo = a[16: 0];
+ wire [16: 0] b_lo = b[16: 0];
+ wire [14: 0] a_hi = a[31:17];
+ wire [14: 0] b_hi = b[31:17];
+
+ /* smaller sub-products */
+ wire [47: 0] dsp1_p;
+ wire [47: 0] dsp2_p;
+ wire [47: 0] dsp4_p;
+
+ /* direct output mapping */
+ assign p[63:34] = dsp4_p[29: 0];
- reg [31: 0] t_dly;
- reg [31: 0] c_in_dly;
-
- always @(posedge clk) t_dly <= t;
- always @(posedge clk) c_in_dly <= c_in;
-
- wire [31: 0] t_c_in_s;
- wire t_c_in_c_out;
-
- reg t_c_in_c_out_dly;
-
- always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
+ /* delayed output mapping */
+ genvar fd;
+ generate for (fd=0; fd<17; fd=fd+1)
+ begin : gen_FD
+ FD # (.INIT( 1'b0)) FD_inst1 (.C(clk), .D(dsp1_p[fd]), .Q(p[fd + 0]));
+ FD # (.INIT( 1'b0)) FD_inst3 (.C(clk), .D(dsp2_p[fd]), .Q(p[fd + 17]));
+ end
+ endgenerate
+
+ /* product chains */
+ wire [47: 0] dsp1_p_chain;
+ wire [47: 0] dsp3_p_chain;
+ wire [47: 0] dsp2_p_chain;
- modexpa7_adder32_artix7 add_t_c_in
+ /* operand chains */
+ wire [29: 0] a_lo_chain;
+ wire [29: 0] a_hi_chain;
+
+ //
+ // a_lo * b_lo
+ //
+ dsp48e1_wrapper_modexp #
+ (
+ .AREG (1'b1),
+ .PREG (1'b0),
+ .A_INPUT ("DIRECT")
+ )
+ dsp1
(
.clk (clk),
- .ce (1'b1),
- .a (t_dly),
- .b (c_in_dly),
- .c_in (1'b0),
- .s (t_c_in_s),
- .c_out (t_c_in_c_out)
+ .opmode (7'b0110101),
+ .a ({13'd0, a_lo}),
+ .b ({1'b0, b_lo}),
+ .p (dsp1_p),
+ .acin (30'd0),
+ .pcin (48'd0),
+ .acout (a_lo_chain),
+ .pcout (dsp1_p_chain)
);
-
- wire [63: 0] a_b;
-
- wire [31: 0] a_b_lsb = a_b[31: 0];
- wire [31: 0] a_b_msb = a_b[63:32];
-
- reg [31: 0] a_b_msb_dly;
- always @(posedge clk) a_b_msb_dly <= a_b_msb;
-
- modexpa7_multiplier32_artix7 mul_a_b
+ //
+ // a_hi * b_lo
+ //
+ dsp48e1_wrapper_modexp #
+ (
+ .AREG (1'b1),
+ .PREG (1'b0),
+ .A_INPUT ("DIRECT")
+ )
+ dsp2
(
- .clk (clk),
- .a (a),
- .b (b),
- .p (a_b)
+ .clk (clk),
+ .opmode (7'b0010101),
+ .a ({15'd0, a_hi}),
+ .b ({1'd0, b_lo}),
+ .p (dsp2_p),
+ .acin (30'd0),
+ .pcin (dsp3_p_chain),
+ .acout (a_hi_chain),
+ .pcout (dsp2_p_chain)
);
- wire [31: 0] add_p_s;
- wire add_p_c_out;
-
- reg [31: 0] add_p_s_dly;
-
- always @(posedge clk) add_p_s_dly <= add_p_s;
-
- assign p = add_p_s_dly;
-
- modexpa7_adder32_artix7 add_p
+ //
+ // a_lo * b_hi
+ //
+ dsp48e1_wrapper_modexp #
+ (
+ .AREG (1'b0),
+ .PREG (1'b0),
+ .A_INPUT ("CASCADE")
+ )
+ dsp3
(
.clk (clk),
- .ce (1'b1),
- .a (a_b_lsb),
- .b (t_c_in_s),
- .c_in (1'b0),
- .s (add_p_s),
- .c_out (add_p_c_out)
- );
-
- modexpa7_adder32_artix7 add_c_out
+ .opmode (7'b1010101),
+ .a (30'd0),
+ .b ({3'd0, b_hi}),
+ .p (),
+ .acin (a_lo_chain),
+ .pcin (dsp1_p_chain),
+ .acout (),
+ .pcout (dsp3_p_chain)
+ );
+
+ //
+ // a_hi * b_hi
+ //
+ dsp48e1_wrapper_modexp #
+ (
+ .AREG (1'b0),
+ .PREG (1'b1),
+ .A_INPUT ("CASCADE")
+ )
+ dsp4
(
.clk (clk),
- .ce (1'b1),
- .a (a_b_msb_dly),
- .b ({{31{1'b0}}, t_c_in_c_out_dly}),
- .c_in (add_p_c_out),
- .s (c_out),
- .c_out ()
+ .opmode (7'b1010101),
+ .a (30'd0),
+ .b ({3'd0, b_hi}),
+ .p (dsp4_p),
+ .acin (a_hi_chain),
+ .pcin (dsp2_p_chain),
+ .acout (),
+ .pcout ()
);
endmodule
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/modexp_systolic_pe_artix7.v
index 08391f5..9cf01f2 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/modexp_systolic_pe_artix7.v
@@ -60,10 +60,9 @@ module modexp_systolic_pe_artix7
always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
- modexpa7_adder32_artix7 add_t_c_in
+ adder32_artix7 add_t_c_in
(
.clk (clk),
- .ce (1'b1),
.a (t_dly),
.b (c_in_dly),
.c_in (1'b0),
@@ -80,7 +79,7 @@ module modexp_systolic_pe_artix7
always @(posedge clk) a_b_msb_dly <= a_b_msb;
- modexpa7_multiplier32_artix7 mul_a_b
+ modexp_multiplier32_artix7 mul_a_b
(
.clk (clk),
.a (a),
@@ -97,10 +96,9 @@ module modexp_systolic_pe_artix7
assign p = add_p_s_dly;
- modexpa7_adder32_artix7 add_p
+ adder32_artix7 add_p
(
.clk (clk),
- .ce (1'b1),
.a (a_b_lsb),
.b (t_c_in_s),
.c_in (1'b0),
@@ -108,10 +106,9 @@ module modexp_systolic_pe_artix7
.c_out (add_p_c_out)
);
- modexpa7_adder32_artix7 add_c_out
+ adder32_artix7 add_c_out
(
.clk (clk),
- .ce (1'b1),
.a (a_b_msb_dly),
.b ({{31{1'b0}}, t_c_in_c_out_dly}),
.c_in (add_p_c_out),
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/artix7/subtractor32_ce_artix7.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/artix7/subtractor32_ce_artix7.v
index 08391f5..c0238ea 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/artix7/subtractor32_ce_artix7.v
@@ -1,12 +1,12 @@
//------------------------------------------------------------------------------
//
-// modexp_systolic_pe_artix7.v
+// subtractor32_ce_artix7.v
// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Hardware (Artix-7 DSP48E1) 32-bit subtractor w/ clock enable.
//
// Authors: Pavel Shatov
//
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,57 @@
//
//------------------------------------------------------------------------------
-module modexp_systolic_pe_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- input [31: 0] t,
- input [31: 0] c_in,
- output [31: 0] p,
- output [31: 0] c_out
- );
-
- reg [31: 0] t_dly;
- reg [31: 0] c_in_dly;
-
- always @(posedge clk) t_dly <= t;
- always @(posedge clk) c_in_dly <= c_in;
-
- wire [31: 0] t_c_in_s;
- wire t_c_in_c_out;
-
- reg t_c_in_c_out_dly;
-
- always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-
- modexpa7_adder32_artix7 add_t_c_in
- (
- .clk (clk),
- .ce (1'b1),
- .a (t_dly),
- .b (c_in_dly),
- .c_in (1'b0),
- .s (t_c_in_s),
- .c_out (t_c_in_c_out)
- );
+module subtractor32_ce_artix7
+ (
+ input clk,
+ input ce,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [31: 0] d,
+ input b_in,
+ output b_out
+ );
- wire [63: 0] a_b;
-
- wire [31: 0] a_b_lsb = a_b[31: 0];
- wire [31: 0] a_b_msb = a_b[63:32];
-
- reg [31: 0] a_b_msb_dly;
-
- always @(posedge clk) a_b_msb_dly <= a_b_msb;
-
- modexpa7_multiplier32_artix7 mul_a_b
- (
- .clk (clk),
- .a (a),
- .b (b),
- .p (a_b)
- );
-
- wire [31: 0] add_p_s;
- wire add_p_c_out;
-
- reg [31: 0] add_p_s_dly;
-
- always @(posedge clk) add_p_s_dly <= add_p_s;
-
- assign p = add_p_s_dly;
-
- modexpa7_adder32_artix7 add_p
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_lsb),
- .b (t_c_in_s),
- .c_in (1'b0),
- .s (add_p_s),
- .c_out (add_p_c_out)
- );
+ //
+ // Lower and higher parts of operand
+ //
+ wire [17: 0] bl = b[17: 0];
+ wire [13: 0] bh = b[31:18];
- modexpa7_adder32_artix7 add_c_out
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_msb_dly),
- .b ({{31{1'b0}}, t_c_in_c_out_dly}),
- .c_in (add_p_c_out),
- .s (c_out),
- .c_out ()
- );
+ //
+ // DSP48E1 Slice
+ //
+
+ /* Operation Mode */
+ wire [ 3: 0] dsp48e1_alumode = 4'b0011;
+ wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
+
+ /* Internal Product */
+ wire [47: 0] p_int;
+
+ dsp48e1_wrapper dsp_subtractor
+ (
+ .clk (clk),
+
+ .ce (ce),
+
+ .carry (b_in),
+
+ .alumode (dsp48e1_alumode),
+ .opmode (dsp48e1_opmode),
+
+ .a ({{16{1'b0}}, bh}),
+ .b (bl),
+ .c ({{16{1'b0}}, a}),
+
+ .p (p_int)
+ );
+
+ //
+ // Output Mapping
+ //
+ assign d = p_int[31: 0];
+ assign b_out = p_int[32];
endmodule
diff --git a/lowlevel/cryptech_primitive_switch.vh b/lowlevel/cryptech_primitive_switch.vh
index a4aad45..44b0852 100644
--- a/lowlevel/cryptech_primitive_switch.vh
+++ b/lowlevel/cryptech_primitive_switch.vh
@@ -49,19 +49,29 @@
//
// Generic Math Primitives
//
-`define CRYPTECH_PRIMITIVE_MAC16_GENERIC mac16_generic
-`define CRYPTECH_PRIMITIVE_ADD32_GENERIC adder32_generic
-`define CRYPTECH_PRIMITIVE_ADD47_GENERIC adder47_generic
-`define CRYPTECH_PRIMITIVE_SUB32_GENERIC subtractor32_generic
+`define CRYPTECH_PRIMITIVE_MAC16_GENERIC mac16_generic
+`define CRYPTECH_PRIMITIVE_ADD32_GENERIC adder32_generic
+`define CRYPTECH_PRIMITIVE_ADD47_GENERIC adder47_generic
+`define CRYPTECH_PRIMITIVE_SUB32_GENERIC subtractor32_generic
+
+`define CRYPTECH_PRIMITIVE_ADD32_CE_GENERIC adder32_ce_generic
+`define CRYPTECH_PRIMITIVE_SUB32_CE_GENERIC subtractor32_ce_generic
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_GENERIC modexp_systolic_pe_generic
//
// Xilinx Math Primitives for Artix-7 Family
//
-`define CRYPTECH_PRIMITIVE_MAC16_VENDOR mac16_artix7
-`define CRYPTECH_PRIMITIVE_ADD32_VENDOR adder32_artix7
-`define CRYPTECH_PRIMITIVE_ADD47_VENDOR adder47_artix7
-`define CRYPTECH_PRIMITIVE_SUB32_VENDOR subtractor32_artix7
+`define CRYPTECH_PRIMITIVE_MAC16_VENDOR mac16_artix7
+`define CRYPTECH_PRIMITIVE_ADD32_VENDOR adder32_artix7
+`define CRYPTECH_PRIMITIVE_ADD47_VENDOR adder47_artix7
+`define CRYPTECH_PRIMITIVE_SUB32_VENDOR subtractor32_artix7
+
+`define CRYPTECH_PRIMITIVE_ADD32_CE_VENDOR adder32_ce_artix7
+`define CRYPTECH_PRIMITIVE_SUB32_CE_VENDOR subtractor32_ce_artix7
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_VENDOR modexp_systolic_pe_artix7
@@ -78,6 +88,11 @@
`define CRYPTECH_PRIMITIVE_ADD47 `CRYPTECH_PRIMITIVE_ADD47_GENERIC
`define CRYPTECH_PRIMITIVE_SUB32 `CRYPTECH_PRIMITIVE_SUB32_GENERIC
+`define CRYPTECH_PRIMITIVE_ADD32_CE `CRYPTECH_PRIMITIVE_ADD32_CE_GENERIC
+`define CRYPTECH_PRIMITIVE_SUB32_CE `CRYPTECH_PRIMITIVE_SUB32_CE_GENERIC
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE `CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_GENERIC
+
`else
// vendor-specific primitives
@@ -86,6 +101,11 @@
`define CRYPTECH_PRIMITIVE_ADD32 `CRYPTECH_PRIMITIVE_ADD32_VENDOR
`define CRYPTECH_PRIMITIVE_SUB32 `CRYPTECH_PRIMITIVE_SUB32_VENDOR
+`define CRYPTECH_PRIMITIVE_ADD32_CE `CRYPTECH_PRIMITIVE_ADD32_CE_VENDOR
+`define CRYPTECH_PRIMITIVE_SUB32_CE `CRYPTECH_PRIMITIVE_SUB32_CE_VENDOR
+
+`define CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE `CRYPTECH_PRIMITIVE_MODEXP_SYSTOLIC_PE_VENDOR
+
`endif
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/generic/adder32_ce_generic.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/generic/adder32_ce_generic.v
index 08391f5..329bcbf 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/generic/adder32_ce_generic.v
@@ -1,12 +1,12 @@
//------------------------------------------------------------------------------
//
-// modexp_systolic_pe_artix7.v
+// adder32_ce_generic.v
// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Generic 32-bit adder w/ clock enable.
//
// Authors: Pavel Shatov
//
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, 2018 NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,30 @@
//
//------------------------------------------------------------------------------
-module modexp_systolic_pe_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- input [31: 0] t,
- input [31: 0] c_in,
- output [31: 0] p,
- output [31: 0] c_out
- );
-
- reg [31: 0] t_dly;
- reg [31: 0] c_in_dly;
-
- always @(posedge clk) t_dly <= t;
- always @(posedge clk) c_in_dly <= c_in;
-
- wire [31: 0] t_c_in_s;
- wire t_c_in_c_out;
-
- reg t_c_in_c_out_dly;
-
- always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-
- modexpa7_adder32_artix7 add_t_c_in
- (
- .clk (clk),
- .ce (1'b1),
- .a (t_dly),
- .b (c_in_dly),
- .c_in (1'b0),
- .s (t_c_in_s),
- .c_out (t_c_in_c_out)
- );
+module adder32_ce_generic
+ (
+ input clk, // clock
+ input ce, // clock enable
+ input [31: 0] a, // operand input
+ input [31: 0] b, // operand input
+ output [31: 0] s, // sum output
+ input c_in, // carry input
+ output c_out // carry output
+ );
- wire [63: 0] a_b;
-
- wire [31: 0] a_b_lsb = a_b[31: 0];
- wire [31: 0] a_b_msb = a_b[63:32];
-
- reg [31: 0] a_b_msb_dly;
-
- always @(posedge clk) a_b_msb_dly <= a_b_msb;
-
- modexpa7_multiplier32_artix7 mul_a_b
- (
- .clk (clk),
- .a (a),
- .b (b),
- .p (a_b)
- );
-
- wire [31: 0] add_p_s;
- wire add_p_c_out;
-
- reg [31: 0] add_p_s_dly;
-
- always @(posedge clk) add_p_s_dly <= add_p_s;
-
- assign p = add_p_s_dly;
-
- modexpa7_adder32_artix7 add_p
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_lsb),
- .b (t_c_in_s),
- .c_in (1'b0),
- .s (add_p_s),
- .c_out (add_p_c_out)
- );
+ //
+ // Sum
+ //
+ reg [32: 0] s_int;
- modexpa7_adder32_artix7 add_c_out
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_msb_dly),
- .b ({{31{1'b0}}, t_c_in_c_out_dly}),
- .c_in (add_p_c_out),
- .s (c_out),
- .c_out ()
- );
+ always @(posedge clk)
+ if (ce) s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in};
+
+ //
+ // Output
+ //
+ assign s = s_int[31:0];
+ assign c_out = s_int[32];
endmodule
diff --git a/lowlevel/artix7/modexp_systolic_pe_artix7.v b/lowlevel/generic/subtractor32_ce_generic.v
similarity index 56%
copy from lowlevel/artix7/modexp_systolic_pe_artix7.v
copy to lowlevel/generic/subtractor32_ce_generic.v
index 08391f5..da97e76 100644
--- a/lowlevel/artix7/modexp_systolic_pe_artix7.v
+++ b/lowlevel/generic/subtractor32_ce_generic.v
@@ -1,12 +1,12 @@
//------------------------------------------------------------------------------
//
-// modexp_systolic_pe_artix7.v
+// subtractor32_ce_generic.v
// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) low-level systolic array processing element.
+// Generic 32-bit subtractor w/ clock enable.
//
// Authors: Pavel Shatov
//
-// Copyright (c) 2016-2017, NORDUnet A/S
+// Copyright (c) 2016, NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
@@ -36,88 +36,30 @@
//
//------------------------------------------------------------------------------
-module modexp_systolic_pe_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- input [31: 0] t,
- input [31: 0] c_in,
- output [31: 0] p,
- output [31: 0] c_out
- );
-
- reg [31: 0] t_dly;
- reg [31: 0] c_in_dly;
-
- always @(posedge clk) t_dly <= t;
- always @(posedge clk) c_in_dly <= c_in;
-
- wire [31: 0] t_c_in_s;
- wire t_c_in_c_out;
-
- reg t_c_in_c_out_dly;
-
- always @(posedge clk) t_c_in_c_out_dly <= t_c_in_c_out;
-
- modexpa7_adder32_artix7 add_t_c_in
- (
- .clk (clk),
- .ce (1'b1),
- .a (t_dly),
- .b (c_in_dly),
- .c_in (1'b0),
- .s (t_c_in_s),
- .c_out (t_c_in_c_out)
- );
+module subtractor32_ce_generic
+ (
+ input clk,
+ input ce,
+ input [31: 0] a,
+ input [31: 0] b,
+ output [31: 0] d,
+ input b_in,
+ output b_out
+ );
- wire [63: 0] a_b;
-
- wire [31: 0] a_b_lsb = a_b[31: 0];
- wire [31: 0] a_b_msb = a_b[63:32];
-
- reg [31: 0] a_b_msb_dly;
-
- always @(posedge clk) a_b_msb_dly <= a_b_msb;
-
- modexpa7_multiplier32_artix7 mul_a_b
- (
- .clk (clk),
- .a (a),
- .b (b),
- .p (a_b)
- );
-
- wire [31: 0] add_p_s;
- wire add_p_c_out;
-
- reg [31: 0] add_p_s_dly;
-
- always @(posedge clk) add_p_s_dly <= add_p_s;
-
- assign p = add_p_s_dly;
-
- modexpa7_adder32_artix7 add_p
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_lsb),
- .b (t_c_in_s),
- .c_in (1'b0),
- .s (add_p_s),
- .c_out (add_p_c_out)
- );
+ //
+ // Difference
+ //
+ reg [32: 0] d_int;
- modexpa7_adder32_artix7 add_c_out
- (
- .clk (clk),
- .ce (1'b1),
- .a (a_b_msb_dly),
- .b ({{31{1'b0}}, t_c_in_c_out_dly}),
- .c_in (add_p_c_out),
- .s (c_out),
- .c_out ()
- );
+ always @(posedge clk)
+ if (ce) d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in};
+
+ //
+ // Output
+ //
+ assign d = d_int[31:0];
+ assign b_out = d_int[32];
endmodule
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the Commits
mailing list