[Cryptech-Commits] [core/pkey/ed25519] 02/04: Started porting modular multiplier from ECDSA.

git at cryptech.is git at cryptech.is
Wed Sep 26 11:35:05 UTC 2018


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository core/pkey/ed25519.

commit f2239775210e3cd6a373987e7a872328a30501a9
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Sep 25 15:04:27 2018 +0300

    Started porting modular multiplier from ECDSA.
---
 .../ed25519_modular_multiplier.v                   | 410 ++++++++++++
 rtl/modular_multiplier/ed25519_modular_reductor.v  | 699 +++++++++++++++++++++
 2 files changed, 1109 insertions(+)

diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v
new file mode 100644
index 0000000..9f8ead7
--- /dev/null
+++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v
@@ -0,0 +1,410 @@
+//------------------------------------------------------------------------------
+//
+// ed25519_modular_multiplier.v
+// -----------------------------------------------------------------------------
+// Curve25519 Modular Multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, 2018 NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module ed25519_modular_multiplier
+(
+    clk, rst_n,
+    ena, rdy,
+    a_addr, b_addr, p_addr, p_wren,
+    a_din, b_din, p_dout
+);
+
+
+    //
+    // Constants
+    //
+    localparam integer OPERAND_NUM_WORDS    = 8;
+    localparam integer WORD_COUNTER_WIDTH   = 3;
+
+
+    //
+    // Handy Numbers
+    //
+    localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+    localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+    //
+    // Handy Functions
+    //
+    function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+    input    [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+    begin
+        WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+            WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+    end
+    endfunction
+
+    function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
+    input    [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+    begin
+        WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+            WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+    end
+    endfunction
+
+
+    //
+    // Ports
+    //
+    input   clk;    // system clock
+    input   rst_n;  // active-low async reset
+
+    input   ena;    // enable input
+    output  rdy;    // ready output
+
+    output  [WORD_COUNTER_WIDTH-1:0]    a_addr;    // index of current A word
+    output  [WORD_COUNTER_WIDTH-1:0]    b_addr;    // index of current B word
+    output  [WORD_COUNTER_WIDTH-1:0]    p_addr;    // index of current P word
+    
+    output  p_wren;    // store current P word now
+
+    input   [31:0]  a_din;      // current word of A
+    input   [31:0]  b_din;      // current word of B
+    output  [31:0]  p_dout;     // current word of P
+
+
+    //
+    // Word Indices
+    //
+    reg [WORD_COUNTER_WIDTH-1:0] index_a;
+    reg [WORD_COUNTER_WIDTH-1:0] index_b;
+
+    /* map registers to output ports */
+    assign a_addr    = index_a;
+    assign b_addr    = index_b;
+
+
+    //
+    // FSM
+    //
+    localparam FSM_SHREG_WIDTH =    1 * OPERAND_NUM_WORDS + 1 +
+                                    2 * OPERAND_NUM_WORDS + 1 +
+                                    2 * OPERAND_NUM_WORDS + 2 +
+                                    0 * OPERAND_NUM_WORDS + 2 +
+                                    0 * OPERAND_NUM_WORDS + 1;
+
+    localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1};
+
+    reg [FSM_SHREG_WIDTH-1:0] fsm_shreg = FSM_SHREG_INIT;
+
+    assign rdy = fsm_shreg[0];
+
+    wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a  = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+    //wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+    //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b  = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+    //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+    //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+    //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si     = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+    //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+    //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+    //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+    //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop  = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+    wire                         inc_index_a        = |fsm_shreg_inc_index_a;
+//    wire                         store_word_a        = |fsm_shreg_store_word_a;
+//    wire                         inc_index_b        = |fsm_shreg_inc_index_b;
+//    wire                         clear_mac_ab        = |fsm_shreg_inc_index_b;
+//    wire                         shift_wide_a        = |fsm_shreg_inc_index_b;
+//    wire                         enable_mac_ab        = |fsm_shreg_inc_index_b;
+//    wire                         store_si_msb        = |fsm_shreg_store_si_msb;
+//    wire                         store_si_lsb        =  fsm_shreg_store_si_lsb;
+//    wire                         shift_si        = |fsm_shreg_shift_si;
+//    wire                         mask_cw1_sum        =  fsm_shreg_mask_cw1_sum;
+//    wire                         store_c_word        = |fsm_shreg_store_c_word;
+//    wire                         reduce_start        =  fsm_shreg_reduce_start;
+//    wire                         reduce_stop        =  fsm_shreg_reduce_stop;
+
+
+    //
+    // FSM Logic
+    //
+//    wire                         reduce_done;
+
+    always @(posedge clk or negedge rst_n)
+        //
+        if (rst_n == 1'b0)
+            //
+            fsm_shreg <= FSM_SHREG_INIT;
+        //
+        else begin
+            //
+            if (rdy)
+                fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+            else
+             /*if (!reduce_stop || reduce_done)*/
+                fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+        end
+
+
+//
+// Word Index Increment Logic
+//
+
+/*
+reg    index_b_ff;
+
+always @(posedge clk)
+//
+if (inc_index_b) index_b_ff <= ~index_b_ff;
+else index_b_ff <= 1'b0;
+
+always @(posedge clk)
+//
+if (rdy) begin
+//
+index_a        <= WORD_INDEX_ZERO;
+index_b        <= WORD_INDEX_LAST;
+//
+end else begin
+//
+if (inc_index_a)        index_a    <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+if (inc_index_b && !index_b_ff)    index_b    <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+//
+end
+
+
+//
+// Wide Operand Buffer
+//
+reg    [255:0]    buf_a_wide;
+
+always @(posedge clk)
+//
+if (store_word_a)
+buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+else if (shift_wide_a)
+buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+//
+// Multiplier Array
+//
+wire     mac_inhibit;    // control signal to pause all accumulators
+
+wire [46: 0] mac[0:15];    // outputs of all accumulators
+reg [15: 0]     mac_clear;    // individual per-accumulator clear flag
+
+assign mac_inhibit = ~enable_mac_ab;
+
+always @(posedge clk)
+//
+if (!clear_mac_ab)
+mac_clear <= {16{1'b1}};
+else begin
+
+if (mac_clear == {16{1'b1}})
+mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
+else
+mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+end
+
+//
+// Array of parallel multipliers
+//
+genvar i;
+generate for (i=0; i<16; i=i+1)
+begin : gen_mac_array
+//
+mac16_wrapper mac16_inst
+(
+.clk        (clk),
+.ce        (~mac_inhibit),
+
+.clr        (mac_clear[i]),
+
+.a        (buf_a_wide[16*i+:16]),
+.b        (index_b_ff ? b_din[15:0] : b_din[31:16]),
+.s        (mac[i])
+);
+//
+end
+endgenerate
+
+//
+// Intermediate Words
+//
+reg    [47*(2*OPERAND_NUM_WORDS-1)-1:0]    si_msb;
+reg    [47*(2*OPERAND_NUM_WORDS-0)-1:0]    si_lsb;
+
+
+wire    [47*(2*OPERAND_NUM_WORDS-1)-1:0]    si_msb_new;
+wire    [47*(2*OPERAND_NUM_WORDS-0)-1:0]    si_lsb_new;
+
+generate for (i=0; i<16; i=i+1)
+begin : gen_si_lsb_new
+assign si_lsb_new[47*i+:47] = mac[15-i];
+end
+endgenerate
+
+generate for (i=1; i<16; i=i+1)
+begin : gen_si_msb_new
+assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+end
+endgenerate
+
+always @(posedge clk) begin
+//
+if (shift_si) begin
+si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+end else begin
+
+if (store_si_msb)
+si_msb <= si_msb_new;
+
+if (store_si_lsb)
+si_lsb <= si_lsb_new;
+end
+
+end
+
+
+//
+// Accumulators
+//
+wire    [46: 0]    add47_cw0_s;
+wire    [46: 0]    add47_cw1_s;
+
+
+//
+// cw0, b, cw1, b
+//
+reg    [30: 0]    si_prev_dly;
+reg    [15: 0]    si_next_dly;
+
+always @(posedge clk)
+//
+if (shift_si)
+si_prev_dly <= si_lsb[93:63];
+else
+si_prev_dly <= {31{1'b0}};
+
+always @(posedge clk)
+//
+si_next_dly <= si_lsb[62:47];
+
+wire    [46: 0]    add47_cw0_a = si_lsb[46:0];
+wire    [46: 0]    add47_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+wire    [46: 0]    add47_cw1_a = add47_cw0_s;
+wire    [46: 0]    add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
+
+adder47_wrapper add47_cw0_inst
+(
+.clk    (clk),
+.a        (add47_cw0_a),
+.b        (add47_cw0_b),
+.s        (add47_cw0_s)
+);
+
+adder47_wrapper add47_cw1_inst
+(
+.clk    (clk),
+.a        (add47_cw1_a),
+.b        (add47_cw1_b),
+.s        (add47_cw1_s)
+);
+
+
+
+//
+// Full-Size Product
+//
+reg    [WORD_COUNTER_WIDTH:0]    bram_c_addr;
+
+wire    [WORD_COUNTER_WIDTH:0]    reduce_c_addr;
+wire    [                31:0]    reduce_c_word;
+
+always @(posedge clk)
+//
+if (store_c_word)
+bram_c_addr <= bram_c_addr + 1'b1;
+else
+bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+bram_1rw_1ro_readfirst #
+(
+.MEM_WIDTH        (32),
+.MEM_ADDR_BITS    (WORD_COUNTER_WIDTH + 1)
+)
+bram_c_inst
+(
+.clk        (clk),
+
+.a_addr        (bram_c_addr),
+.a_wr        (store_c_word),
+.a_in        (add47_cw1_s[31:0]),
+.a_out        (),
+
+.b_addr        (reduce_c_addr),
+.b_out        (reduce_c_word)
+);
+
+
+//
+// Reduction Stage
+//
+modular_reductor_256 reduce_256_inst
+(
+.clk        (clk),
+.rst_n        (rst_n),
+
+.ena        (reduce_start),
+.rdy        (reduce_done),
+
+.x_addr        (reduce_c_addr),
+.n_addr        (n_addr),
+.p_addr        (p_addr),
+.p_wren        (p_wren),
+
+.x_din        (reduce_c_word),
+.n_din        (n_din),
+.p_dout        (p_dout)
+);
+*/
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular_multiplier/ed25519_modular_reductor.v b/rtl/modular_multiplier/ed25519_modular_reductor.v
new file mode 100644
index 0000000..5b50cb3
--- /dev/null
+++ b/rtl/modular_multiplier/ed25519_modular_reductor.v
@@ -0,0 +1,699 @@
+//------------------------------------------------------------------------------
+//
+// ed25519_modular_reductor.v
+// -----------------------------------------------------------------------------
+// Curve 25519 Modular Reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2018, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module ed25519_modular_reductor
+(
+    clk, rst_n,
+    ena, rdy,
+    x_addr, y_addr, y_wren,
+    x_din, y_dout
+);
+
+
+    //
+    // Constants
+    //
+    localparam integer OPERAND_NUM_WORDS    = 8;
+    localparam integer WORD_COUNTER_WIDTH   = 3;
+
+
+    /*
+//
+// Handy Numbers
+//
+localparam  [WORD_COUNTER_WIDTH:0]  WORD_INDEX_ZERO = 0;
+localparam  [WORD_COUNTER_WIDTH:0]  WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
+    */
+/*
+
+    //
+    // Include Handy Functions
+    //
+function    [WORD_COUNTER_WIDTH:0]  WORD_INDEX_PREVIOUS_OR_LAST;
+input   [WORD_COUNTER_WIDTH:0]  WORD_INDEX_CURRENT;
+begin
+WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+end
+endfunction
+*/
+
+    //
+    // Ports
+    //
+    input   clk;    // system clock
+    input   rst_n;  // active-low async reset
+
+    input   ena;    // enable input
+    output  rdy;    // ready output
+
+    output  [WORD_COUNTER_WIDTH  :0]    x_addr; // index of current X word
+    output  [WORD_COUNTER_WIDTH-1:0]    p_addr; // index of current P word
+    
+    output  y_wren;     // store current Y word now
+
+    input   [31:0]  x_din;  // current word of X
+    output  [31:0]  y_dout; // current word of Y
+
+/*
+//
+// Word Indices
+//
+reg [WORD_COUNTER_WIDTH:0]              index_x;
+
+
+// map registers to output ports 
+assign x_addr   = index_x;
+*/
+
+    //
+    // FSM
+    //
+    localparam integer FSM_SHREG_WIDTH = 2;//(2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+    localparam FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1};
+    
+    reg [FSM_SHREG_WIDTH-1:0] fsm_shreg = FSM_SHREG_INIT;
+
+    assign rdy = fsm_shreg[0];
+
+    /*
+wire [2 * OPERAND_NUM_WORDS - 1:0]          fsm_shreg_inc_index_x   = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+wire [2 * OPERAND_NUM_WORDS - 1:0]          fsm_shreg_store_word_z  = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+wire [2 *                 5 - 1:0]          fsm_shreg_reduce_stages = fsm_shreg[                                        1 +: 2 *                 5];
+
+wire [5-1:0]                    fsm_shreg_reduce_stage_start;
+wire [5-1:0]                    fsm_shreg_reduce_stage_stop;
+
+genvar                      s;
+generate for (s=0; s<5; s=s+1)
+begin : gen_fsm_shreg_reduce_stages
+assign fsm_shreg_reduce_stage_start[5 - (s + 1)]    = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+assign fsm_shreg_reduce_stage_stop[5 - (s + 1)]     = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+end
+endgenerate
+
+wire inc_index_x    = |fsm_shreg_inc_index_x;
+wire store_word_z   = |fsm_shreg_store_word_z;
+wire reduce_start   = |fsm_shreg_reduce_stage_start;
+wire reduce_stop    = |fsm_shreg_reduce_stage_stop;
+wire store_p        =  fsm_shreg_reduce_stage_stop[0];
+
+
+wire    reduce_adder0_done;
+wire    reduce_adder1_done;
+wire    reduce_subtractor_done;
+
+wire    reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+*/
+    always @(posedge clk or negedge rst_n)
+        //
+        if (rst_n == 1'b0)
+            //
+            fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+            //
+        else begin
+            //
+            if (rdy)
+                //
+                fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+                //
+            else /*if (!reduce_stop || reduce_done_all)*/
+                //
+                fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+                //
+        end
+
+/*
+//
+// Word Index Increment Logic
+//
+always @(posedge clk)
+//
+if (rdy)
+//
+index_x <= WORD_INDEX_LAST;
+//
+else if (inc_index_x)
+//
+index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+//
+// Look-up Table
+//
+
+//
+// Take a look at the corresponding C model for more information
+// on how exactly the math behind reduction works. The first step
+// is to assemble nine 256-bit values ("z-words") from 32-bit parts
+// of the full 512-bit product ("c-word"). The problem with z5 is
+// that it contains c13 two times. This implementation scans from
+// c15 to c0 and writes current part of c-word into corresponding
+// parts of z-words. Since those 32-bit parts are stored in block
+// memories, one source word can only be written to one location in
+// every z-word at a time. The trick is to delay c13 and then write
+// the delayed value at the corresponding location in z5 instead of
+// the next c12. "z_save" flag is used to indicate that the current
+// word should be delayed and written once again during the next cycle.
+//
+
+reg [9*WORD_COUNTER_WIDTH-1:0]  z_addr; //
+reg [9                   -1:0]  z_wren; //
+reg [9                   -1:0]  z_mask; // mask input to store zero word
+reg [9                   -1:0]  z_save; // save previous word once again
+
+always @(posedge clk)
+//
+if (inc_index_x)
+//
+case (index_x)
+//
+//                     s9     s8     s7     s6     s5     s4     s3     s2     s1
+//                     ||     ||     ||     ||     ||     ||     ||     ||     ||
+4'd00:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
+4'd01:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
+4'd02:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
+4'd03:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
+4'd04:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
+4'd05:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
+4'd06:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
+4'd07:  z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
+4'd08:  z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
+4'd09:  z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
+4'd10:  z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
+4'd11:  z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
+4'd12:  z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
+4'd13:  z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
+4'd14:  z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
+4'd15:  z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
+//
+default:    z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+//
+endcase
+
+always @(posedge clk)
+//
+case (index_x)
+//
+//                     9     8     7     6     5     4     3     2     1
+//                     |     |     |     |     |     |     |     |     |
+4'd00:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd01:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd02:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd03:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd04:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd05:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd06:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd07:  z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+4'd08:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd09:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd10:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd11:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd12:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd13:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd14:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+4'd15:  z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+//
+default:    z_wren <= {9{1'b0}};
+//
+endcase
+
+always @(posedge clk)
+//
+if (inc_index_x)
+//
+case (index_x)
+//
+//                     9     8     7     6     5     4     3     2     1
+//                     |     |     |     |     |     |     |     |     |
+4'd00:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd01:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd02:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd03:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd04:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd05:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd06:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd07:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd08:  z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+4'd09:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+4'd10:  z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+4'd11:  z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+4'd12:  z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+4'd13:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+4'd14:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd15:  z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+//
+default:    z_mask <= {9{1'bX}};
+//
+endcase
+
+always @(posedge clk)
+//
+if (inc_index_x)
+//
+case (index_x)
+//
+//                     9     8     7     6     5     4     3     2     1
+//                     |     |     |     |     |     |     |     |     |
+4'd00:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd01:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd02:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd03:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd04:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd05:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd06:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd07:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd08:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd09:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd10:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd11:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd12:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd13:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd14:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+4'd15:  z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+//
+default:    z_save <= {9{1'bX}};
+//
+endcase
+
+
+//
+// Intermediate Numbers
+//
+reg [WORD_COUNTER_WIDTH-1:0]    reduce_z_addr[1:9];
+wire [                32-1:0]   reduce_z_dout[1:9];
+
+reg [31: 0]                 x_din_dly;
+always @(posedge clk)
+//
+x_din_dly <= x_din;
+
+
+genvar              z;
+generate for (z=1; z<=9; z=z+1)
+//
+begin : gen_z_bram
+//
+bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+bram_c_inst
+(
+.clk        (clk),
+
+.a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+.a_wr       (z_wren[z-1] & store_word_z),
+.a_in       (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+.a_out  (),
+
+.b_addr (reduce_z_addr[z]),
+.b_out  (reduce_z_dout[z])
+);
+//
+end
+//
+endgenerate
+
+
+
+
+wire    [                32-1:0]    bram_sum0_wr_din;
+wire [WORD_COUNTER_WIDTH-1:0]   bram_sum0_wr_addr;
+wire                bram_sum0_wr_wren;
+
+wire [                32-1:0]   bram_sum1_wr_din;
+wire [WORD_COUNTER_WIDTH-1:0]   bram_sum1_wr_addr;
+wire                bram_sum1_wr_wren;
+
+wire [                32-1:0]   bram_diff_wr_din;
+wire [WORD_COUNTER_WIDTH-1:0]   bram_diff_wr_addr;
+wire                bram_diff_wr_wren;
+
+wire [                32-1:0]   bram_sum0_rd_dout;
+reg [WORD_COUNTER_WIDTH-1:0]    bram_sum0_rd_addr;
+
+wire [                32-1:0]   bram_sum1_rd_dout;
+reg [WORD_COUNTER_WIDTH-1:0]    bram_sum1_rd_addr;
+
+wire [                32-1:0]   bram_diff_rd_dout;
+reg [WORD_COUNTER_WIDTH-1:0]    bram_diff_rd_addr;
+
+
+bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+bram_sum0_inst
+(
+.clk        (clk),
+
+.a_addr (bram_sum0_wr_addr),
+.a_wr       (bram_sum0_wr_wren),
+.a_in       (bram_sum0_wr_din),
+.a_out  (),
+
+.b_addr (bram_sum0_rd_addr),
+.b_out  (bram_sum0_rd_dout)
+);
+
+bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+bram_sum1_inst
+(
+.clk        (clk),
+
+.a_addr (bram_sum1_wr_addr),
+.a_wr       (bram_sum1_wr_wren),
+.a_in       (bram_sum1_wr_din),
+.a_out  (),
+
+.b_addr (bram_sum1_rd_addr),
+.b_out  (bram_sum1_rd_dout)
+);
+
+bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+bram_diff_inst
+(
+.clk        (clk),
+
+.a_addr (bram_diff_wr_addr),
+.a_wr       (bram_diff_wr_wren),
+.a_in       (bram_diff_wr_din),
+.a_out  (),
+
+.b_addr (bram_diff_rd_addr),
+.b_out  (bram_diff_rd_dout)
+);
+
+
+wire [WORD_COUNTER_WIDTH-1:0]   adder0_ab_addr;
+wire [WORD_COUNTER_WIDTH-1:0]   adder1_ab_addr;
+wire [WORD_COUNTER_WIDTH-1:0]   subtractor_ab_addr;
+
+reg [                32-1:0]    adder0_a_din;
+reg [                32-1:0]    adder0_b_din;
+
+reg [                32-1:0]    adder1_a_din;
+reg [                32-1:0]    adder1_b_din;
+
+reg [                32-1:0]    subtractor_a_din;
+reg [                32-1:0]    subtractor_b_din;
+
+// n_addr - only 1 output, because all modules are in sync
+
+modular_adder #
+(
+.OPERAND_NUM_WORDS  (OPERAND_NUM_WORDS),
+.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+)
+adder_inst0
+(
+.clk            (clk),
+.rst_n      (rst_n),
+
+.ena            (reduce_start),
+.rdy            (reduce_adder0_done),
+
+.ab_addr        (adder0_ab_addr),
+.n_addr     (),
+.s_addr     (bram_sum0_wr_addr),
+.s_wren     (bram_sum0_wr_wren),
+
+.a_din      (adder0_a_din),
+.b_din      (adder0_b_din),
+.n_din      (n_din),
+.s_dout     (bram_sum0_wr_din)
+);
+
+modular_adder #
+(
+.OPERAND_NUM_WORDS  (OPERAND_NUM_WORDS),
+.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+)
+adder_inst1
+(
+.clk            (clk),
+.rst_n      (rst_n),
+
+.ena            (reduce_start),
+.rdy            (reduce_adder1_done),
+
+.ab_addr        (adder1_ab_addr),
+.n_addr     (),
+.s_addr     (bram_sum1_wr_addr),
+.s_wren     (bram_sum1_wr_wren),
+
+.a_din      (adder1_a_din),
+.b_din      (adder1_b_din),
+.n_din      (n_din),
+.s_dout     (bram_sum1_wr_din)
+);
+
+modular_subtractor #
+(
+.OPERAND_NUM_WORDS  (OPERAND_NUM_WORDS),
+.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+)
+subtractor_inst
+(
+.clk            (clk),
+.rst_n      (rst_n),
+
+.ena            (reduce_start),
+.rdy            (reduce_subtractor_done),
+
+.ab_addr        (subtractor_ab_addr),
+.n_addr     (n_addr),
+.d_addr     (bram_diff_wr_addr),
+.d_wren     (bram_diff_wr_wren),
+
+.a_din      (subtractor_a_din),
+.b_din      (subtractor_b_din),
+.n_din      (n_din),
+.d_dout     (bram_diff_wr_din)
+);
+
+
+//
+// Address (Operand) Selector
+//
+always @(*)
+//
+case (fsm_shreg_reduce_stage_stop)
+//
+5'b10000: begin
+reduce_z_addr[1]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[2]    = adder0_ab_addr;
+reduce_z_addr[3]    = adder1_ab_addr;
+reduce_z_addr[4]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[5]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[6]    = subtractor_ab_addr;
+reduce_z_addr[7]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[8]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[9]    = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum0_rd_addr   = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum1_rd_addr   = {WORD_COUNTER_WIDTH{1'bX}};
+bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+end
+//
+5'b01000: begin
+reduce_z_addr[1]    = adder0_ab_addr;
+reduce_z_addr[2]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[3]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[4]    = adder1_ab_addr;
+reduce_z_addr[5]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[6]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[7]    = subtractor_ab_addr;
+reduce_z_addr[8]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[9]    = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum0_rd_addr   = adder0_ab_addr;
+bram_sum1_rd_addr   = adder1_ab_addr;
+bram_diff_rd_addr = subtractor_ab_addr;
+end
+//
+5'b00100: begin
+reduce_z_addr[1]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[2]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[3]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[4]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[5]    = adder0_ab_addr;
+reduce_z_addr[6]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[7]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[8]    = subtractor_ab_addr;
+reduce_z_addr[9]    = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum0_rd_addr   = adder0_ab_addr;
+bram_sum1_rd_addr   = adder1_ab_addr;
+bram_diff_rd_addr = subtractor_ab_addr;
+end
+//
+5'b00010: begin
+reduce_z_addr[1]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[2]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[3]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[4]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[5]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[6]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[7]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[8]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[9]    = subtractor_ab_addr;
+bram_sum0_rd_addr   = adder0_ab_addr;
+bram_sum1_rd_addr   = adder0_ab_addr;
+bram_diff_rd_addr = subtractor_ab_addr;
+end
+//
+5'b00001: begin
+reduce_z_addr[1]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[2]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[3]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[4]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[5]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[6]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[7]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[8]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[9]    = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum0_rd_addr   = adder0_ab_addr;
+bram_sum1_rd_addr   = {WORD_COUNTER_WIDTH{1'bX}};
+bram_diff_rd_addr = adder0_ab_addr;
+end
+//
+default: begin
+reduce_z_addr[1]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[2]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[3]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[4]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[5]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[6]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[7]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[8]    = {WORD_COUNTER_WIDTH{1'bX}};
+reduce_z_addr[9]    = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum0_rd_addr   = {WORD_COUNTER_WIDTH{1'bX}};
+bram_sum1_rd_addr   = {WORD_COUNTER_WIDTH{1'bX}};
+bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+end
+//
+endcase
+
+
+//
+// adder 0
+//
+always @(*) begin
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   adder0_a_din = reduce_z_dout[2];
+5'b01000:   adder0_a_din = bram_sum0_rd_dout;
+5'b00100:   adder0_a_din = bram_sum0_rd_dout;
+5'b00010:   adder0_a_din = bram_sum0_rd_dout;
+5'b00001:   adder0_a_din = bram_sum0_rd_dout;
+default:        adder0_a_din = {32{1'bX}};
+endcase
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   adder0_b_din = reduce_z_dout[2];
+5'b01000:   adder0_b_din = reduce_z_dout[1];
+5'b00100:   adder0_b_din = reduce_z_dout[5];
+5'b00010:   adder0_b_din = bram_sum1_rd_dout;
+5'b00001:   adder0_b_din = bram_diff_rd_dout;
+default:        adder0_b_din = {32{1'bX}};
+endcase
+//
+end
+
+//
+// adder 1
+//
+always @(*) begin
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   adder1_a_din = reduce_z_dout[3];
+5'b01000:   adder1_a_din = bram_sum1_rd_dout;
+5'b00100:   adder1_a_din = bram_sum1_rd_dout;
+5'b00010:   adder1_a_din = {32{1'bX}};
+5'b00001:   adder1_a_din = {32{1'bX}};
+default:        adder1_a_din = {32{1'bX}};
+endcase
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   adder1_b_din = reduce_z_dout[3];
+5'b01000:   adder1_b_din = reduce_z_dout[4];
+5'b00100:   adder1_b_din = {32{1'b0}};
+5'b00010:   adder1_b_din = {32{1'bX}};
+5'b00001:   adder1_b_din = {32{1'bX}};
+default:        adder1_b_din = {32{1'bX}};
+endcase
+//
+end
+
+
+//
+// subtractor
+//
+always @(*) begin
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   subtractor_a_din = {32{1'b0}};
+5'b01000:   subtractor_a_din = bram_diff_rd_dout;
+5'b00100:   subtractor_a_din = bram_diff_rd_dout;
+5'b00010:   subtractor_a_din = bram_diff_rd_dout;
+5'b00001:   subtractor_a_din = {32{1'bX}};
+default:        subtractor_a_din = {32{1'bX}};
+endcase
+//
+case (fsm_shreg_reduce_stage_stop)
+5'b10000:   subtractor_b_din = reduce_z_dout[6];
+5'b01000:   subtractor_b_din = reduce_z_dout[7];
+5'b00100:   subtractor_b_din = reduce_z_dout[8];
+5'b00010:   subtractor_b_din = reduce_z_dout[9];
+5'b00001:   subtractor_b_din = {32{1'bX}};
+default:        subtractor_b_din = {32{1'bX}};
+endcase
+//
+end
+
+
+//
+// Address Mapping
+//
+assign p_addr   = bram_sum0_wr_addr;
+assign p_wren   = bram_sum0_wr_wren & store_p;
+assign p_dout   = bram_sum0_wr_din;
+*/
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------



More information about the Commits mailing list