[Cryptech-Commits] [core/math/modexpng] 39/92: Redesigned the testbench. Core clock does not necessarily need to be twice faster than the bus clock now. It can be the same, or say four times faster.
git at cryptech.is
git at cryptech.is
Sat Mar 14 18:19:18 UTC 2020
This is an automated email from the git hooks/post-receive script.
paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.
commit 72902f5b40ac695786f5103d2a5a456c6c7ee83f
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Oct 21 13:04:07 2019 +0300
Redesigned the testbench. Core clock does not necessarily need to be twice
faster than the bus clock now. It can be the same, or say four times faster.
---
bench/tb_core_full_512.v | 294 +++++++++++++--------
rtl/modexpng_dsp48e1.vh | 1 +
rtl/modexpng_dsp_array_block.v | 84 +++++-
rtl/modexpng_dsp_slice_primitive.vh | 9 +
...pper.v => modexpng_dsp_slice_wrapper_generic.v} | 100 +++++--
...apper.v => modexpng_dsp_slice_wrapper_xilinx.v} | 4 +-
rtl/modexpng_io_block.v | 19 +-
rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v | 66 +++--
rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v | 59 ++---
rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v | 43 +++
rtl/modexpng_uop_rom.v | 20 ++
11 files changed, 482 insertions(+), 217 deletions(-)
diff --git a/bench/tb_core_full_512.v b/bench/tb_core_full_512.v
index 221a2c6..75accff 100644
--- a/bench/tb_core_full_512.v
+++ b/bench/tb_core_full_512.v
@@ -31,6 +31,7 @@ module tb_core_full_512;
reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1];
reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ];
reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ];
+ reg [31:0] D[0:TB_NUM_WORDS_N-1];
reg [31:0] DP[0:TB_NUM_WORDS_PQ-1];
reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1];
reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1];
@@ -81,6 +82,10 @@ module tb_core_full_512;
Q_COEFF[ 0] = 32'h5eee9ecd; Q_COEFF[ 1] = 32'h085153b0; Q_COEFF[ 2] = 32'h85326da6; Q_COEFF[ 3] = 32'h7521931a;
Q_COEFF[ 4] = 32'h99e0eef1; Q_COEFF[ 5] = 32'ha219917b; Q_COEFF[ 6] = 32'he8e9087a; Q_COEFF[ 7] = 32'h5239d12b;
Q_COEFF[ 8] = 32'h0000ed92;
+ D[ 0] = 32'hf127ca41; D[ 1] = 32'hc4975ff0; D[ 2] = 32'h69ebbe13; D[ 3] = 32'h66fe0018;
+ D[ 4] = 32'hf2089237; D[ 5] = 32'hfa3f05ab; D[ 6] = 32'h2ab183c4; D[ 7] = 32'h1e4b3c04;
+ D[ 8] = 32'ha67974e8; D[ 9] = 32'ha6714d63; D[ 10] = 32'hfe5cd801; D[ 11] = 32'h13f2071a;
+ D[ 12] = 32'h0b978309; D[ 13] = 32'hb0ddb4a0; D[ 14] = 32'ha437a2cc; D[ 15] = 32'h2391b2fb;
DP[ 0] = 32'h3891ed91; DP[ 1] = 32'h775046c2; DP[ 2] = 32'h60180c26; DP[ 3] = 32'h5130700a;
DP[ 4] = 32'hb13c8216; DP[ 5] = 32'h833fcf78; DP[ 6] = 32'h7ab89b12; DP[ 7] = 32'hb976758c;
DQ[ 0] = 32'h28cc59ad; DQ[ 1] = 32'h3ce6ed45; DQ[ 2] = 32'ha1f53aeb; DQ[ 3] = 32'h06ca05e1;
@@ -100,26 +105,45 @@ module tb_core_full_512;
S[ 8] = 32'h2854a51a; S[ 9] = 32'h0245619b; S[ 10] = 32'hfb67ef8f; S[ 11] = 32'hcc5bdd4f;
S[ 12] = 32'ha70f58bd; S[ 13] = 32'h31f15702; S[ 14] = 32'hd6f36259; S[ 15] = 32'h280e67e0;
end
-
//
// Clocks
//
- `define CLK_FREQUENCY_MHZ (100.0)
- `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
- `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+ `define CLK_FREQUENCY_MHZ (100.0)
+ `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
+ `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
+ `define CLK_PERIOD_QUARTER_NS (0.5 * `CLK_PERIOD_HALF_NS)
- `define CLK_BUS_FREQUENCY_MHZ (50.0)
- `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ)
- `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS)
-
- reg clk = 1'b1;
- reg clk_bus = 1'b0;
+ `define CLK_BUS_FREQUENCY_MHZ (25.0)
+ `define CLK_BUS_PERIOD_NS (1000.0 / `CLK_BUS_FREQUENCY_MHZ)
+ `define CLK_BUS_PERIOD_HALF_NS (0.5 * `CLK_BUS_PERIOD_NS)
+
+ reg clk = 1'b1;
+ reg clk_dly = 1'b0;
+ wire clk_idle = clk & clk_dly;
+
+ reg clk_bus = 1'b1;
+ reg clk_bus_dly = 1'b0;
+ wire clk_bus_idle = clk_bus & clk_bus_dly;
- always #`CLK_PERIOD_HALF_NS clk = ~clk;
+ always #`CLK_PERIOD_HALF_NS clk <= ~clk;
+ always #`CLK_BUS_PERIOD_HALF_NS clk_bus <= ~clk_bus;
+
+ always @(clk ) clk_dly <= #(`CLK_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk;
+ always @(clk_bus) clk_bus_dly <= #(`CLK_BUS_PERIOD_HALF_NS - `CLK_PERIOD_QUARTER_NS) clk_bus;
+
+
+ //
+ // Clock Sync
+ //
+ task sync_clk;
+ while (clk_idle !== 1) _wait_quarter_clk_tick;
+ endtask
- always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus;
+ task sync_clk_bus;
+ while (clk_bus_idle !== 1) _wait_quarter_clk_tick;
+ endtask
//
@@ -143,7 +167,6 @@ module tb_core_full_512;
//
// System Bus
//
- reg bus_ready;
reg bus_cs = 1'b0;
reg bus_we = 1'b0;
reg [11:0] bus_addr;
@@ -185,67 +208,102 @@ module tb_core_full_512;
//
- // Routine (Bus)
+ // Bus Init Routine
//
- initial begin
-
- bus_ready = 1'b0;
+ task core_set_input;
+ begin
+ core_set_input_1;
+ core_set_input_2;
+ wait_clk_bus_ticks(10);
+ $display("Core input banks written.");
+ end
+ endtask
- while (rst) wait_clock_bus_tick;
- wait_clock_bus_ticks(10);
- $display("Core came out of reset.");
-
- set_input_1;
- set_input_2;
-
- wait_clock_bus_ticks(10);
- bus_ready = 1'b1;
- end
+ //
+ // Script
+ //
+ initial main;
//
- // Routine (Control/Status, Bus)
+ // Main Routine (Control/Status, Bus)
//
- initial begin
-
- _wait_half_clock_tick;
- wait_clock_ticks(100);
- rst = 1'b0;
-
- while (!bus_ready) wait_clock_tick;
- wait_clock_ticks(10);
- $display("Core input banks written.");
-
- word_index_last_n = CORE_NUM_WORDS_N - 1;
- word_index_last_pq = CORE_NUM_WORDS_PQ - 1;
-
- bit_index_last_n = TB_MODULUS_LENGTH_N - 1;
- bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1;
+ task main;
+ begin
- core_crt_mode = 1'b1;
+ sync_clk; // switch to fast core clock
+ core_reset; // reset core
+ core_set_params; // set parameters (modulus width, exponent length)
- core_next = 1'b1;
- wait_clock_tick;
- core_next = 1'b0;
- $display("Pulsed 'next' control signal.");
+ sync_clk_bus; // switch to slow bus clock
+ core_set_input; // write to core input banks
+ /*
+ sync_clk; // switch to fast core clock
+ core_set_crt_mode(1); // enable CRT signing
+ core_pulse_next; // assert 'next' bit for one cycle
+ core_wait_valid; // wait till 'valid' bit gets asserted
+
+ sync_clk_bus; // switch to slow bus clock
+ core_get_output; // read from core output banks
+ core_verify_output; // check, whether core output matches precomputed known good refrence values
+ */
+ sync_clk; // switch to fast core clock
+ core_set_crt_mode(0); // disable CRT signing
+ core_pulse_next; // assert 'next' bit for one cycle
+ core_wait_valid; // wait till 'valid' bit gets asserted
+
+ sync_clk_bus; // switch to slow bus clock
+ core_get_output; // read from core output banks
+ core_verify_output; // check, whether core output matches precomputed known good refrence values
+ end
+ endtask
- while (!core_valid) wait_clock_tick;
- wait_clock_ticks(10);
-
- $display("Detected high 'valid' status signal.");
- core_crt_mode = 1'bX;
-
- wait_clock_ticks(10);
- get_output;
- wait_clock_ticks(10);
-
- $display("Core output banks read.");
-
- verify;
+ task core_reset;
+ begin
+ wait_clk_ticks(100);
+ rst = 1'b0;
+ wait_clk_ticks(10);
+ $display("Core reset finished.");
+ end
+ endtask
- end
+ task core_set_params;
+ begin
+ word_index_last_n = CORE_NUM_WORDS_N - 1;
+ word_index_last_pq = CORE_NUM_WORDS_PQ - 1;
+ bit_index_last_n = TB_MODULUS_LENGTH_N - 1;
+ bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1;
+ $display("Core parameters set.");
+ end
+ endtask
+ task core_set_crt_mode;
+ input _crt;
+ begin
+ core_crt_mode = _crt;
+ if (_crt) $display("Enabled CRT mode.");
+ else $display("Disabled CRT mode.");
+ end
+ endtask
+
+ task core_pulse_next;
+ begin
+ core_next = 1'b1;
+ wait_clk_tick;
+ core_next = 1'b0;
+ $display("Pulsed core 'next' control signal.");
+ end
+ endtask
+
+ task core_wait_valid;
+ begin
+ while (!core_valid) wait_clk_tick;
+ wait_clk_ticks(10);
+ $display("Detected high core 'valid' status signal.");
+ end
+ endtask
+
//
// Variables
@@ -254,58 +312,61 @@ module tb_core_full_512;
//
- // set_input_1;
+ // core_set_input_1
//
- task set_input_1;
- reg [9:0] _tn;
+ task core_set_input_1;
+ reg [9:0] _tn;
+ reg [31:0] zzz;
begin
_tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N, _w[6:0], N[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]);
- bus_write(2'd0, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X, _w[6:0], X[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y, _w[6:0], Y[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_M, _w[6:0], M[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N, _w[6:0], N[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N_FACTOR, _w[6:0], N_FACTOR[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_N_COEFF, _w[6:0], N_COEFF[_w]);
+ bus_write(2'd1, _tn[9:7], _tn[6:0], N_COEFF[TB_NUM_WORDS_N]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_X, _w[6:0], X[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd1, BANK_IN_1_Y, _w[6:0], Y[_w]);
end
endtask
//
- // set_input_2;
+ // core_set_input_2
//
- task set_input_2;
+ task core_set_input_2;
begin
-// for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M, _w[6:0], M[_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]);
- for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]);
- for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]);
- for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]);
+ //for (_w=0; _w< TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd2, BANK_IN_2_D, { _w[6:0]}, D [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P, {1'b0, _w[5:0]}, P [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P, {1'b1, _w[5:0]}, DP [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P_FACTOR, { _w[6:0]}, P_FACTOR[_w]);
+ for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_P_COEFF, { _w[6:0]}, P_COEFF [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q, {1'b0, _w[5:0]}, Q [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q, {1'b1, _w[5:0]}, DQ [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q_FACTOR, { _w[6:0]}, Q_FACTOR[_w]);
+ for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_Q_COEFF, { _w[6:0]}, Q_COEFF [_w]);
+ for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd2, BANK_IN_2_QINV, { _w[6:0]}, QINV [_w]);
end
endtask
//
- // get_output;
+ // core_get_output
//
- task get_output;
+ task core_get_output;
begin
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]);
- for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S, _w[6:0], S_READBACK[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]);
+ for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd3, BANK_OUT_S, _w[6:0], S_READBACK[_w]);
+ wait_clk_bus_ticks(10);
+ $display("Core output banks read.");
end
endtask
//
- // verify;
+ // core_verify_output
//
- task verify;
+ task core_verify_output;
//
reg xm_ok;
reg ym_ok;
@@ -387,7 +448,7 @@ module tb_core_full_512;
input [31:0] data;
begin
_bus_drive(1'b1, 1'b1, {sel, bank, addr}, data);
- wait_clock_bus_tick;
+ wait_clk_bus_tick;
_bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
end
endtask
@@ -403,7 +464,7 @@ module tb_core_full_512;
output [31:0] data;
begin
_bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX);
- wait_clock_bus_tick;
+ wait_clk_bus_tick;
data = bus_data_rd;
_bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
end
@@ -411,48 +472,61 @@ module tb_core_full_512;
//
- // _wait_half_clock_tick()
+ // _wait_quarter_clk_tick()
//
- task _wait_half_clock_tick;
- #`CLK_PERIOD_HALF_NS;
+ task _wait_quarter_clk_tick;
+ #`CLK_PERIOD_QUARTER_NS;
endtask
+
//
- // wait_clock_tick()
+ // _wait_half_clk_tick()
//
- task wait_clock_tick;
+ task _wait_half_clk_tick;
begin
- _wait_half_clock_tick;
- _wait_half_clock_tick;
+ _wait_quarter_clk_tick;
+ _wait_quarter_clk_tick;
end
endtask
//
- // wait_clock_bus_tick()
+ // wait_clk_tick()
//
- task wait_clock_bus_tick;
+ task wait_clk_tick;
+ begin
+ _wait_half_clk_tick;
+ _wait_half_clk_tick;
+ end
+ endtask
+
+
+ //
+ // wait_clk_bus_tick()
+ //
+ task wait_clk_bus_tick;
#`CLK_BUS_PERIOD_NS;
endtask
//
- // wait_clock_ticks()
+ // wait_clk_ticks()
//
- task wait_clock_ticks;
+ task wait_clk_ticks;
input integer num_ticks;
for (_n=0; _n<num_ticks; _n=_n+1)
- wait_clock_tick;
+ wait_clk_tick;
endtask
-
+
//
- // wait_clock_bus_ticks()
+ // wait_clk_bus_ticks()
//
- task wait_clock_bus_ticks;
+ task wait_clk_bus_ticks;
input integer num_ticks;
for (_n=0; _n<num_ticks; _n=_n+1)
- wait_clock_bus_tick;
+ wait_clk_bus_tick;
endtask
-
+
+
endmodule
diff --git a/rtl/modexpng_dsp48e1.vh b/rtl/modexpng_dsp48e1.vh
index bc3d55c..410ad41 100644
--- a/rtl/modexpng_dsp48e1.vh
+++ b/rtl/modexpng_dsp48e1.vh
@@ -6,3 +6,4 @@ localparam DSP48E1_P_W = 48;
localparam DSP48E1_INMODE_W = 5;
localparam DSP48E1_OPMODE_W = 7;
localparam DSP48E1_ALUMODE_W = 4;
+
diff --git a/rtl/modexpng_dsp_array_block.v b/rtl/modexpng_dsp_array_block.v
index 8c4e844..6b4ad3c 100644
--- a/rtl/modexpng_dsp_array_block.v
+++ b/rtl/modexpng_dsp_array_block.v
@@ -6,8 +6,9 @@ module modexpng_dsp_array_block
a, b, p
);
- `include "modexpng_dsp48e1.vh"
`include "modexpng_parameters.vh"
+ `include "modexpng_dsp48e1.vh"
+ `include "modexpng_dsp_slice_primitive.vh"
input clk;
@@ -38,14 +39,87 @@ module modexpng_dsp_array_block
ce_a2 <= ce_a1;
ce_b1 <= ce_b0;
end
-
+
+ ///
+ wire [46:0] p_debug_direct;
+ wire [17:0] casc_a_debug_direct;
+ wire [15:0] casc_b_debug_direct;
+
+ wire [46:0] p_debug_cascade;
+
+ wire [46:0] p_ref_direct = p[ 0 +: MAC_W];
+ wire [46:0] p_ref_cascade = p[MAC_W +: MAC_W];
+
+ modexpng_dsp_slice_wrapper_xilinx #
+ (
+ .AB_INPUT("DIRECT"),
+ .B_REG(2)
+ )
+ dsp_debug_direct
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a0),
+ .ce_b1 (ce_b0),
+ .ce_a2 (ce_a1),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[0 +: 18]),
+ .b (b),
+ .p (p_debug_direct),
+
+ .inmode ({DSP48E1_INMODE_W{1'b0}}),
+ .opmode ({1'b0, mode_z[0], 1'b0, 2'b01, 2'b01}),
+ .alumode ({DSP48E1_ALUMODE_W{1'b0}}),
+
+ .casc_a_in (WORD_EXT_ZERO),
+ .casc_b_in (WORD_ZERO),
+
+ .casc_a_out (casc_a_debug_direct),
+ .casc_b_out (casc_b_debug_direct)
+ );
+
+ modexpng_dsp_slice_wrapper_xilinx #
+ (
+ .AB_INPUT("CASCADE"),
+ .B_REG(1)
+ )
+ dsp_debug_cascade
+ (
+ .clk (clk),
+
+ .ce_a1 (ce_a1),
+ .ce_b1 (1'b0),
+ .ce_a2 (ce_a2),
+ .ce_b2 (ce_b1),
+ .ce_m (ce_m),
+ .ce_p (ce_p),
+ .ce_mode (ce_mode),
+
+ .a (a[0 +: 18]),
+ .b (b),
+ .p (p_debug_cascade),
+
+ .inmode ({DSP48E1_INMODE_W{1'b0}}),
+ .opmode ({1'b0, mode_z[1], 1'b0, 2'b01, 2'b01}),
+ .alumode ({DSP48E1_ALUMODE_W{1'b0}}),
+
+ .casc_a_in (casc_a_debug_direct),
+ .casc_b_in (casc_b_debug_direct),
+
+ .casc_a_out (),
+ .casc_b_out ()
+ );
genvar z;
generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
//
begin : gen_DSP48E1
//
- modexpng_dsp_slice_wrapper #
+ `MODEXPNG_DSP_SLICE #
(
.AB_INPUT("DIRECT"),
.B_REG(2)
@@ -77,7 +151,7 @@ module modexpng_dsp_array_block
.casc_b_out (casc_b[z])
);
//
- modexpng_dsp_slice_wrapper #
+ `MODEXPNG_DSP_SLICE #
(
.AB_INPUT("CASCADE"),
.B_REG(1)
@@ -113,7 +187,7 @@ module modexpng_dsp_array_block
//
endgenerate
- modexpng_dsp_slice_wrapper #
+ `MODEXPNG_DSP_SLICE #
(
.AB_INPUT("DIRECT"),
.B_REG(2)
diff --git a/rtl/modexpng_dsp_slice_primitive.vh b/rtl/modexpng_dsp_slice_primitive.vh
new file mode 100644
index 0000000..02d9a5d
--- /dev/null
+++ b/rtl/modexpng_dsp_slice_primitive.vh
@@ -0,0 +1,9 @@
+`ifndef MODEXPNG_ENABLE_DEBUG
+
+`define MODEXPNG_DSP_SLICE modexpng_dsp_slice_wrapper_xilinx
+
+`else
+
+`define MODEXPNG_DSP_SLICE modexpng_dsp_slice_wrapper_generic
+
+`endif
diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper_generic.v
similarity index 59%
copy from rtl/modexpng_dsp_slice_wrapper.v
copy to rtl/modexpng_dsp_slice_wrapper_generic.v
index 3d13570..7183d74 100644
--- a/rtl/modexpng_dsp_slice_wrapper.v
+++ b/rtl/modexpng_dsp_slice_wrapper_generic.v
@@ -1,4 +1,4 @@
-module modexpng_dsp_slice_wrapper #
+module modexpng_dsp_slice_wrapper_generic #
(
AB_INPUT = "DIRECT",
B_REG = 2
@@ -16,29 +16,81 @@ module modexpng_dsp_slice_wrapper #
`include "modexpng_parameters.vh"
`include "modexpng_dsp48e1.vh"
- input clk;
- input ce_a1;
- input ce_b1;
- input ce_a2;
- input ce_b2;
- input ce_m;
- input ce_p;
- input ce_mode;
- input [ WORD_EXT_W -1:0] a;
- input [ WORD_W -1:0] b;
- output [ MAC_W -1:0] p;
- input [ DSP48E1_INMODE_W -1:0] inmode;
- input [ DSP48E1_OPMODE_W -1:0] opmode;
- input [DSP48E1_ALUMODE_W -1:0] alumode;
- input [ WORD_EXT_W -1:0] casc_a_in;
- input [ WORD_W -1:0] casc_b_in;
- output [ WORD_EXT_W -1:0] casc_a_out;
- output [ WORD_W -1:0] casc_b_out;
+ input clk; //
+ input ce_a1; //
+ input ce_b1; //
+ input ce_a2; //
+ input ce_b2; //
+ input ce_m; //
+ input ce_p; //
+ input ce_mode; //
+ input [ WORD_EXT_W -1:0] a; //
+ input [ WORD_W -1:0] b; //
+ output [ MAC_W -1:0] p; //
+ input [ DSP48E1_INMODE_W -1:0] inmode; //
+ input [ DSP48E1_OPMODE_W -1:0] opmode; //
+ input [DSP48E1_ALUMODE_W -1:0] alumode; //
+ input [ WORD_EXT_W -1:0] casc_a_in; //
+ input [ WORD_W -1:0] casc_b_in; //
+ output [ WORD_EXT_W -1:0] casc_a_out; //
+ output [ WORD_W -1:0] casc_b_out; //
- wire [DSP48E1_A_W - WORD_EXT_W -1:0] casc_a_dummy;
- wire [DSP48E1_B_W - WORD_W -1:0] casc_b_dummy;
- wire [DSP48E1_P_W - MAC_W -1:0] p_dummy;
+ //
+ // A Port
+ //
+ wire [WORD_EXT_W -1:0] a_mux = AB_INPUT == "DIRECT" ? a : casc_a_in;
+ reg [WORD_EXT_W -1:0] a_reg1;
+ reg [WORD_EXT_W -1:0] a_reg2;
+ assign casc_a_out = a_reg1;
+
+ always @(posedge clk) begin
+ if (ce_a1) a_reg1 <= a_mux;
+ if (ce_a2) a_reg2 <= a_reg1;
+ end
+
+ //
+ // B Port
+ //
+ wire [WORD_W -1:0] b_mux = AB_INPUT == "DIRECT" ? b : casc_b_in;
+ reg [WORD_W -1:0] b_reg1;
+ reg [WORD_W -1:0] b_reg2;
+
+ assign casc_b_out = b_reg1;
+
+ always @(posedge clk) begin
+ if (ce_b1) b_reg1 <= b_mux;
+ if (ce_b2) b_reg2 <= B_REG == 2 ? b_reg1 : b_mux;
+ end
+
+ //
+ // OPMODE Port
+ //
+ reg [DSP48E1_OPMODE_W -1:0] opmode_reg;
+
+ always @(posedge clk) begin
+ if (ce_mode) opmode_reg <= opmode;
+ end
+
+ //
+ // M, P
+ //
+ reg [MAC_W-1:0] m_reg;
+ reg [MAC_W-1:0] p_reg;
+
+ wire [MAC_W-1:0] a_pad = {{MAC_W-WORD_EXT_W{1'b0}}, a_reg2};
+ wire [MAC_W-1:0] b_pad = {{MAC_W-WORD_W{1'b0}}, b_reg2};
+ wire [MAC_W-1:0] p_pad = opmode_reg[5] ? p_reg : {MAC_W{1'b0}};
+
+ assign p = p_reg;
+
+ always @(posedge clk) begin
+ if (ce_m) m_reg <= a_pad * b_pad;
+ if (ce_p) p_reg <= m_reg + p_pad;
+ end
+
+
+ /*
DSP48E1 #
(
.AREG (2),
@@ -130,6 +182,8 @@ module modexpng_dsp_slice_wrapper #
.MULTSIGNIN (1'b0),
.MULTSIGNOUT ()
- );
+ );
+ */
+
endmodule
diff --git a/rtl/modexpng_dsp_slice_wrapper.v b/rtl/modexpng_dsp_slice_wrapper_xilinx.v
similarity index 98%
rename from rtl/modexpng_dsp_slice_wrapper.v
rename to rtl/modexpng_dsp_slice_wrapper_xilinx.v
index 3d13570..9c1a60d 100644
--- a/rtl/modexpng_dsp_slice_wrapper.v
+++ b/rtl/modexpng_dsp_slice_wrapper_xilinx.v
@@ -1,4 +1,4 @@
-module modexpng_dsp_slice_wrapper #
+module modexpng_dsp_slice_wrapper_xilinx #
(
AB_INPUT = "DIRECT",
B_REG = 2
@@ -130,6 +130,6 @@ module modexpng_dsp_slice_wrapper #
.MULTSIGNIN (1'b0),
.MULTSIGNOUT ()
- );
+ );
endmodule
diff --git a/rtl/modexpng_io_block.v b/rtl/modexpng_io_block.v
index 06ce2b1..6d008aa 100644
--- a/rtl/modexpng_io_block.v
+++ b/rtl/modexpng_io_block.v
@@ -82,12 +82,12 @@ module modexpng_io_block
wire [ BUS_DATA_W -1:0] bus_data_rd_input_1;
wire [ BUS_DATA_W -1:0] bus_data_rd_output;
- wire bus_we_input_1 = bus_we && (bus_addr_msb == 2'd0);
- wire bus_we_input_2 = bus_we && (bus_addr_msb == 2'd1);
+ wire bus_we_input_1 = bus_we && (bus_addr_msb == 2'd1);
+ wire bus_we_input_2 = bus_we && (bus_addr_msb == 2'd2);
- wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'b00);
- wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'b01);
- wire bus_cs_output = bus_cs && (bus_addr_msb == 2'b10);
+ wire bus_cs_input_1 = bus_cs && (bus_addr_msb == 2'd1);
+ wire bus_cs_input_2 = bus_cs && (bus_addr_msb == 2'd2);
+ wire bus_cs_output = bus_cs && (bus_addr_msb == 2'd3);
/* INPUT_1 */
`MODEXPNG_TDP_36K_X16_X32 bram_input_1
@@ -125,7 +125,6 @@ module modexpng_io_block
.doutb (in_2_dout) //
);
-
/* OUTPUT */
`MODEXPNG_SDP_36K_X32_X16 bram_output
(
@@ -152,10 +151,10 @@ module modexpng_io_block
//
case (bus_addr_msb_dly)
//
- 2'd0: bus_data_rd_mux = bus_data_rd_input_1;
- 2'd1: bus_data_rd_mux = 32'hDEADC0DE;
- 2'd2: bus_data_rd_mux = bus_data_rd_output;
- 2'd3: bus_data_rd_mux = 32'hDEADC0DE;
+ 2'd0: bus_data_rd_mux = 32'hDEADC0DE;
+ 2'd1: bus_data_rd_mux = bus_data_rd_input_1;
+ 2'd2: bus_data_rd_mux = 32'hDEADC0DE;
+ 2'd3: bus_data_rd_mux = bus_data_rd_output;
//
endcase
diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v
index 034b00b..3797e41 100644
--- a/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v
+++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper_generic.v
@@ -34,42 +34,40 @@ module modexpng_sdp_36k_x16_x32_wrapper_generic
//
- // BRAM_SDP_MACRO
+ // Memory
//
- BRAM_SDP_MACRO #
- (
- .DEVICE ("7SERIES"),
-
- .BRAM_SIZE ("36Kb"),
-
- .WRITE_WIDTH (BUS_DATA_W),
- .READ_WIDTH (WORD_W),
-
- .DO_REG (1),
- .WRITE_MODE ("READ_FIRST"),
-
- .SRVAL (72'h000000000000000000),
- .INIT (72'h000000000000000000),
-
- .INIT_FILE ("NONE"),
- .SIM_COLLISION_CHECK ("NONE")
- )
- BRAM_SDP_MACRO_inst
- (
- .RST (1'b0),
+ reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1];
+
+ //
+ // Write Port
+ //
+ always @(posedge clk_bus)
+ //
+ if (ena && wea)
+ mem[addra] <= dina;
+
+ //
+ // Read Port
+ //
+ reg [WORD_W -1:0] doutb_reg1;
+ reg [WORD_W -1:0] doutb_reg2;
+
+ assign doutb = doutb_reg2;
+
+ wire [BUS_DATA_W -1:0] mem_addrb = mem[addrb[BANK_ADDR_W + OP_ADDR_W -1:1]];
+
+ wire [ WORD_W -1:0] mem_addrb_msb = mem_addrb[ BUS_DATA_W -1:WORD_W];
+ wire [ WORD_W -1:0] mem_addrb_lsb = mem_addrb[ WORD_W -1: 0];
- .WRCLK (clk_bus),
- .WREN (ena),
- .WE ({4{wea}}),
- .WRADDR (addra),
- .DI (dina),
-
- .RDCLK (clk),
- .RDEN (enb),
- .REGCE (regceb),
- .RDADDR (addrb),
- .DO (doutb)
- );
+ always @(posedge clk)
+ //
+ if (enb)
+ doutb_reg1 <= addrb[0] ? mem_addrb_msb : mem_addrb_lsb;
+
+ always @(posedge clk)
+ //
+ if (regceb)
+ doutb_reg2 <= doutb_reg1;
endmodule
diff --git a/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v
index c74daac..586cadf 100644
--- a/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v
+++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper_generic.v
@@ -33,41 +33,34 @@ module modexpng_sdp_36k_x32_x16_wrapper_generic
//
- // BRAM_SDP_MACRO
+ // Memory
//
- BRAM_SDP_MACRO #
- (
- .DEVICE ("7SERIES"),
-
- .BRAM_SIZE ("36Kb"),
-
- .WRITE_WIDTH (WORD_W),
- .READ_WIDTH (BUS_DATA_W),
-
- .DO_REG (0),
- .WRITE_MODE ("READ_FIRST"),
-
- .SRVAL (72'h000000000000000000),
- .INIT (72'h000000000000000000),
-
- .INIT_FILE ("NONE"),
- .SIM_COLLISION_CHECK ("NONE")
- )
- BRAM_SDP_MACRO_inst
- (
- .RST (1'b0),
+ reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1];
+
+ //
+ // Write Port
+ //
+ wire [BANK_ADDR_W + BUS_OP_ADDR_W -2:0] addra_msb = addra[BANK_ADDR_W + BUS_OP_ADDR_W -1:1];
+ wire addra_lsb = addra[0];
+
+ always @(posedge clk)
+ //
+ if (ena && wea) begin
+ if (addra_lsb) mem[addra_msb][BUS_DATA_W-1:WORD_W] <= dina;
+ else mem[addra_msb][ WORD_W-1: 0] <= dina;
+ end
+
+ //
+ // Read Port
+ //
+ reg [BUS_DATA_W -1:0] doutb_reg;
- .WRCLK (clk),
- .WREN (ena),
- .WE ({2{wea}}),
- .WRADDR (addra),
- .DI (dina),
+ assign doutb = doutb_reg;
- .RDCLK (clk_bus),
- .RDEN (enb),
- .REGCE (1'b0),
- .RDADDR (addrb),
- .DO (doutb)
- );
+ always @(posedge clk_bus)
+ //
+ if (enb)
+ doutb_reg <= mem[addrb];
+
endmodule
diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v
index 5e69bef..fda7cf6 100644
--- a/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v
+++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper_generic.v
@@ -34,6 +34,48 @@ module modexpng_tdp_36k_x16_x32_wrapper_generic
output [ WORD_W -1:0] doutb;
+ //
+ // Memory
+ //
+ reg [BUS_DATA_W -1:0] mem[0:2**(BANK_ADDR_W+BUS_OP_ADDR_W)-1];
+
+ //
+ // Read-Write Port
+ //
+ reg [BUS_DATA_W -1:0] douta_reg;
+
+ assign douta = douta_reg;
+
+ always @(posedge clk_bus)
+ //
+ if (ena) begin
+ if (wea) mem[addra] <= dina;
+ douta_reg <= mem[addra];
+ end
+
+ //
+ // Read Port
+ //
+ reg [WORD_W -1:0] doutb_reg1;
+ reg [WORD_W -1:0] doutb_reg2;
+
+ assign doutb = doutb_reg2;
+
+ wire [BUS_DATA_W -1:0] mem_addrb = mem[addrb[BANK_ADDR_W + OP_ADDR_W -1:1]];
+
+ wire [ WORD_W -1:0] mem_addrb_msb = mem_addrb[ BUS_DATA_W -1:WORD_W];
+ wire [ WORD_W -1:0] mem_addrb_lsb = mem_addrb[ WORD_W -1: 0];
+
+ always @(posedge clk)
+ //
+ if (enb)
+ doutb_reg1 <= addrb[0] ? mem_addrb_msb : mem_addrb_lsb;
+
+ always @(posedge clk)
+ //
+ if (regceb)
+ doutb_reg2 <= doutb_reg1;
+/*
//
// BRAM_TDP_MACRO
//
@@ -84,5 +126,6 @@ module modexpng_tdp_36k_x16_x32_wrapper_generic
.DIB ({WORD_W{1'b0}}),
.DOB (doutb)
);
+ */
endmodule
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 522e9ca..5d6308c 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -15,6 +15,8 @@ module modexpng_uop_rom
always @(posedge clk)
//
case (addr)
+ //
+ // CRT mode
//
7'd000: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
7'd001: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
@@ -107,9 +109,27 @@ module modexpng_uop_rom
7'd058: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_NARROW_A }; //
//
7'd059: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_A, BANK_DNC, BANK_OUT_S }; //
+ //
+ // Non-CRT Mode (i.e. only when "D" is known)
+ //
+ 7'd064: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 7'd065: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 7'd066: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
+ 7'd067: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
+ 7'd068: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 7'd069: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ //
+ 7'd070: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 7'd071: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 7'd072: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 7'd073: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 7'd074: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 7'd075: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
//
default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_DNC, BANK_DNC }; //
//
+
+
endcase
endmodule
More information about the Commits
mailing list