[Cryptech-Commits] [core/math/modexpng] 90/92: More elegant way to do partial product recombination: * take advantage of the cascade paths between DSP slices * decrease latency of operation

git at cryptech.is git at cryptech.is
Sat Mar 14 18:20:09 UTC 2020


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/math/modexpng.

commit 2791a17430c5b0c3291be3824aa8cdf07f305e92
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Feb 11 15:54:22 2020 +0300

    More elegant way to do partial product recombination:
     * take advantage of the cascade paths between DSP slices
     * decrease latency of operation
---
 rtl/modexpng_recombinator_block.v |  83 +++++------------
 rtl/modexpng_recombinator_cell.v  | 185 +++++++++++++++++++-------------------
 2 files changed, 115 insertions(+), 153 deletions(-)

diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index e3cb50f..62d84e1 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -496,10 +496,8 @@ module modexpng_recombinator_block
     
     reg         rcmb_xy_lsb_ce = 1'b0;
     reg         rcmb_xy_lsb_ce_aux = 1'b0;
-    reg         rcmb_xy_lsb_ce_aux_dly = 1'b0;
     reg  [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
-    wire        rcmb_xy_lsb_ce_combined     = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
-    wire        rcmb_xy_lsb_ce_combined_ext = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0] | rcmb_xy_lsb_ce_aux_dly;
+    wire        rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
     reg         rcmb_xy_lsb_clr;
     wire        rcmb_xy_lsb_cry = !xy_valid_latch_lsb && rcmb_xy_lsb_ce_purge[1];
 
@@ -512,9 +510,7 @@ module modexpng_recombinator_block
 
     reg         rcmb_xy_msb_ce = 1'b0;
     reg  [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
-    reg         rcmb_xy_msb_ce_purge0_rectangle_dly = 1'b0;
-    wire        rcmb_xy_msb_ce_combined     = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
-    wire        rcmb_xy_msb_ce_combined_ext = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0] | rcmb_xy_msb_ce_purge0_rectangle_dly;
+    wire        rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
     reg         rcmb_xy_msb_clr;
     
     reg  [ MAC_W -1:0] rcmb_x_msb_din;
@@ -522,46 +518,44 @@ module modexpng_recombinator_block
     wire [WORD_W -1:0] rcmb_x_msb_dout;
     wire [WORD_W -1:0] rcmb_y_msb_dout;
     
-    always @(posedge clk) rcmb_xy_lsb_ce_aux_dly <= rcmb_xy_lsb_ce_aux;
-    always @(posedge clk) rcmb_xy_msb_ce_purge0_rectangle_dly <= rcmb_mode == RCMB_MODE_RECTANGLE ? rcmb_xy_msb_ce_purge[0] : 1'b0;
-    
-    modexpng_recombinator_cell recomb_x_lsb_new
+    modexpng_recombinator_cell recomb_x_lsb
     (
         .clk   (clk),
-        .ce    (rcmb_xy_lsb_ce_combined_ext),
+        .ce    (rcmb_xy_lsb_ce_combined),
         .clr   (rcmb_xy_lsb_clr),
         .din   (rcmb_x_lsb_din),
         .dout  (rcmb_x_lsb_dout),
         .doutw (rcmb_x_lsb_doutw)
     );
-    modexpng_recombinator_cell recomb_y_lsb_new
+    modexpng_recombinator_cell recomb_y_lsb
     (
         .clk   (clk),
-        .ce    (rcmb_xy_lsb_ce_combined_ext),
+        .ce    (rcmb_xy_lsb_ce_combined),
         .clr   (rcmb_xy_lsb_clr),
         .din   (rcmb_y_lsb_din),
         .dout  (rcmb_y_lsb_dout),
         .doutw (rcmb_y_lsb_doutw)
     );
 
-    modexpng_recombinator_cell recomb_x_msb_new
+    modexpng_recombinator_cell recomb_x_msb
     (
         .clk   (clk),
-        .ce    (rcmb_xy_msb_ce_combined_ext),
+        .ce    (rcmb_xy_msb_ce_combined),
         .clr   (rcmb_xy_msb_clr),
         .din   (rcmb_x_msb_din),
         .dout  (rcmb_x_msb_dout),
         .doutw ()
     );
-    modexpng_recombinator_cell recomb_y_msb_new
+    modexpng_recombinator_cell recomb_y_msb
     (
         .clk   (clk),
-        .ce    (rcmb_xy_msb_ce_combined_ext),
+        .ce    (rcmb_xy_msb_ce_combined),
         .clr   (rcmb_xy_msb_clr),
         .din   (rcmb_y_msb_din),
         .dout  (rcmb_y_msb_dout),
         .doutw ()
     );
+    
 
     always @(posedge clk) begin
         //
@@ -596,8 +590,8 @@ module modexpng_recombinator_block
             rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1];
             rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1];
         end else if (rcmb_xy_lsb_cry) begin
-            rcmb_x_lsb_din <= rcmb_x_msb_carry_1;
-            rcmb_y_lsb_din <= rcmb_y_msb_carry_1;
+            rcmb_x_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_x_msb_carry_1};
+            rcmb_y_lsb_din <= {{(MAC_W-WORD_W){1'b0}}, rcmb_y_msb_carry_1};
         end else begin
             rcmb_x_lsb_din <= {MAC_W{1'b0}};
             rcmb_y_lsb_din <= {MAC_W{1'b0}};
@@ -759,52 +753,17 @@ module modexpng_recombinator_block
         end
 
 
-    reg rcmb_xy_lsb_ce_combined_dly1 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly1 = 1'b0;
-    
-    reg rcmb_xy_lsb_ce_combined_dly2 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly2 = 1'b0;
-
-    reg rcmb_xy_lsb_ce_combined_dly3 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly3 = 1'b0;
-
-    reg rcmb_xy_lsb_ce_combined_dly4 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly4 = 1'b0;
-
-    reg rcmb_xy_lsb_ce_combined_dly5 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly5 = 1'b0;
-
-    reg rcmb_xy_lsb_ce_combined_dly6 = 1'b0;
-    reg rcmb_xy_msb_ce_combined_dly6 = 1'b0;
+    reg [4:1] rcmb_xy_lsb_ce_combined_dly;
+    reg [4:1] rcmb_xy_msb_ce_combined_dly;
 
     always @(posedge clk or negedge rst_n)
         //
         if (!rst_n) begin
-            rcmb_xy_lsb_ce_combined_dly1 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly1 <= 1'b0;
-            rcmb_xy_lsb_ce_combined_dly2 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly2 <= 1'b0;        
-            rcmb_xy_lsb_ce_combined_dly3 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly3 <= 1'b0;        
-            rcmb_xy_lsb_ce_combined_dly4 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly4 <= 1'b0;        
-            rcmb_xy_lsb_ce_combined_dly5 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly5 <= 1'b0;        
-            rcmb_xy_lsb_ce_combined_dly6 <= 1'b0;
-            rcmb_xy_msb_ce_combined_dly6 <= 1'b0;        
+            rcmb_xy_lsb_ce_combined_dly <= 4'b0000;
+            rcmb_xy_msb_ce_combined_dly <= 4'b0000;        
         end else begin
-            rcmb_xy_lsb_ce_combined_dly1 <= rcmb_xy_lsb_ce_combined;
-            rcmb_xy_msb_ce_combined_dly1 <= rcmb_xy_msb_ce_combined;
-            rcmb_xy_lsb_ce_combined_dly2 <= rcmb_xy_lsb_ce_combined_dly1;
-            rcmb_xy_msb_ce_combined_dly2 <= rcmb_xy_msb_ce_combined_dly1;
-            rcmb_xy_lsb_ce_combined_dly3 <= rcmb_xy_lsb_ce_combined_dly2;
-            rcmb_xy_msb_ce_combined_dly3 <= rcmb_xy_msb_ce_combined_dly2;
-            rcmb_xy_lsb_ce_combined_dly4 <= rcmb_xy_lsb_ce_combined_dly3;
-            rcmb_xy_msb_ce_combined_dly4 <= rcmb_xy_msb_ce_combined_dly3;
-            rcmb_xy_lsb_ce_combined_dly5 <= rcmb_xy_lsb_ce_combined_dly4;
-            rcmb_xy_msb_ce_combined_dly5 <= rcmb_xy_msb_ce_combined_dly4;
-            rcmb_xy_lsb_ce_combined_dly6 <= rcmb_xy_lsb_ce_combined_dly5;
-            rcmb_xy_msb_ce_combined_dly6 <= rcmb_xy_msb_ce_combined_dly5;
+            rcmb_xy_lsb_ce_combined_dly <= {rcmb_xy_lsb_ce_combined_dly[3:1], rcmb_xy_lsb_ce_combined};
+            rcmb_xy_msb_ce_combined_dly <= {rcmb_xy_msb_ce_combined_dly[3:1], rcmb_xy_msb_ce_combined};
         end        
 
     reg rcmb_xy_lsb_valid = 1'b0;
@@ -816,8 +775,8 @@ module modexpng_recombinator_block
             rcmb_xy_lsb_valid <= 1'b0;
             rcmb_xy_msb_valid <= 1'b0;        
         end else begin
-            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly6;
-            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly6;
+            rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly[4];
+            rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly[4];
         end        
 
 
diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v
index 0c9ab00..28d17f2 100644
--- a/rtl/modexpng_recombinator_cell.v
+++ b/rtl/modexpng_recombinator_cell.v
@@ -58,39 +58,51 @@ module modexpng_recombinator_cell
     
     
     //
-    // din <=> {z[13:0], y[15:0], x[15:0]}
+    // Pipelined Clock Enable, Clear, Data Input 
     //
-    wire [WORD_W -1:0] din_z = {2'b00, din[3 * WORD_W -3 : 2 * WORD_W]}; // [47:46][45:32]
-    wire [WORD_W -1:0] din_y = {       din[2 * WORD_W -1 :     WORD_W]}; // [31:16]
-    wire [WORD_W -1:0] din_x = {       din[    WORD_W -1 :          0]}; // [15: 0]
+    reg             ce_pipe = 1'b0;
+    reg             clr_pipe;
+    reg [MAC_W-1:0] din_pipe;
+    
+    always @(posedge clk)
+        {ce_pipe, clr_pipe, din_pipe} <= {ce, clr, din};
 
     
     //
-    // Delayed Clock Enables
+    // din_pipe <=> {z[13:0], y[15:0], x[15:0]}
     //
-    reg ce_dly1 = 1'b0, ce_dly2 = 1'b0, ce_dly3 = 1'b0, ce_dly4 = 1'b0, ce_dly5 = 1'b0, ce_dly6 = 1'b0; 
-    always @(posedge clk) {ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5, ce_dly6} <= {ce, ce_dly1, ce_dly2, ce_dly3, ce_dly4, ce_dly5};
+    wire [WORD_W -1:0] din_z = {2'b00, din_pipe[3 * WORD_W -3 : 2 * WORD_W]}; // (47:46)[45:32]
+    wire [WORD_W -1:0] din_y = {       din_pipe[2 * WORD_W -1 :     WORD_W]}; //        [31:16]
+    wire [WORD_W -1:0] din_x = {       din_pipe[    WORD_W -1 :          0]}; //        [15: 0]
 
-
-    //
-    // Delayed Clear
-    //
-    reg clr_dly1, clr_dly2, clr_dly3, clr_dly4;
-    always @(posedge clk) {clr_dly1, clr_dly2, clr_dly3, clr_dly4} <= {clr, clr_dly1, clr_dly2, clr_dly3};
     
-
     //
     // Phase Flip-Flop
     //
-    reg phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5;
+    reg phase_ff = 1'b0;
+    
     always @(posedge clk)
-        if      (ce)  phase_ff <= ~phase_ff;
-        else if (clr) phase_ff <= 1'b0;
+        phase_ff <= ce_pipe ? ~phase_ff : 1'b0;
 
-    always @(posedge clk)
-        {phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4, phase_ff_dly5} <= {phase_ff, phase_ff_dly1, phase_ff_dly2, phase_ff_dly3, phase_ff_dly4};
-        
+
+    //
+    // Delayed Clock Enable, Clear, Data Input
+    //
+    wire master_ce_0;
+    reg  master_ce_1 = 1'b0;
+    wire slave_ce_1;
+    reg  slave_ce_2 = 1'b0;
+    reg  dout_ce_3 = 1'b0;
+    reg  dout_ce_4 = 1'b0;
+    
+    assign master_ce_0 = ce_pipe;
+    assign slave_ce_1  = master_ce_1; 
     
+    always @(posedge clk) master_ce_1 <= ce_pipe & ~phase_ff;
+    always @(posedge clk) slave_ce_2  <= slave_ce_1;
+    always @(posedge clk) {dout_ce_3, dout_ce_4} <= {slave_ce_2, dout_ce_3}; 
+
+
     //
     // Shift Registers
     //
@@ -101,11 +113,11 @@ module modexpng_recombinator_cell
 
     always @(posedge clk) begin
         //
-        if      (ce)  {din_x_dly1, din_y_dly1, din_z_dly1} <= {din_x,     din_y,     din_z};
-        else if (clr) {din_x_dly1, din_y_dly1, din_z_dly1} <= {WORD_ZERO, WORD_ZERO, WORD_ZERO};
+        if (ce_pipe)       {din_x_dly1, din_y_dly1, din_z_dly1} <= {din_x,     din_y,     din_z};
+        else if (clr_pipe) {din_x_dly1, din_y_dly1, din_z_dly1} <= {WORD_ZERO, WORD_ZERO, WORD_ZERO};
         //
-        if      (ce)  {din_z_dly2} <= {din_z_dly1};
-        else if (clr) {din_z_dly2} <= {WORD_ZERO};
+        if (ce_pipe)       {din_z_dly2} <= {din_z_dly1};
+        else if (clr_pipe) {din_z_dly2} <= {WORD_ZERO};
         //
     end
     
@@ -113,82 +125,65 @@ module modexpng_recombinator_cell
     //
     // DSP Input Registers
     //
-    reg [2 * WORD_W-1:0] master_ab_reg;
-    reg [2 * WORD_W-1:0] master_c_reg;
-
-    reg [    WORD_W+1:0] slave_ab_reg;
-    reg [    WORD_W+1:0] slave_ab_next_reg;
-
+    wire [2 * WORD_W-1:0] master_ab;
+    wire [2 * WORD_W-1:0] master_c;
+    wire [2 * WORD_W-1:0] slave_ab;
+    reg                   slave_c;
+    
+    assign master_ab = {din_y,      din_y_dly1};
+    assign master_c  = {din_z_dly1, din_z_dly2};
+    assign slave_ab  = {din_x,      din_x_dly1};
+        
 
     //
     // DSP Cascade Bus
     //
     wire [DSP48E1_P_W-1:0] master_slave_p_int;
-
+    
 
     //
     // DSP Output Buses
     //
-    wire [DSP48E1_P_W-1:0] master_p_int;
+    wire                   master_carry_out_int;
     wire [DSP48E1_P_W-1:0] slave_p_int;
-
-    
-    //
-    // DSP Input Mapping
-    //
-    wire [DSP48E1_C_W-1:0] master_ab_int = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_ab_reg};
-    wire [DSP48E1_C_W-1:0] master_c_int  = {{(DSP48E1_C_W - 2 * WORD_W){1'b0}}, master_c_reg};
-
-    wire [DSP48E1_C_W-1:0] slave_ab_int = {{(DSP48E1_C_W - (WORD_W+3)){1'b0}}, slave_ab_reg[WORD_W+1:WORD_W], 1'b1, slave_ab_reg[WORD_W-1:0]};
-    wire [DSP48E1_C_W-1:0] slave_c_int  = {DSP48E1_C_W{1'b0}};
+    wire                   slave_carry_out_int;
 
 
     //
-    // Master DSP Input Logic
+    // Custom Carry Cascade
     //
     always @(posedge clk)
         //
-        if (ce) begin
-            master_ab_reg <= !phase_ff ? {din_y,      din_y_dly1} : {din_x,    din_x_dly1}; 
-            master_c_reg  <= !phase_ff ? {din_z_dly1, din_z_dly2} : {WORD_DNC, WORD_DNC};
-        end else begin
-            master_ab_reg <= {WORD_DNC, WORD_DNC};
-            master_c_reg  <= {WORD_DNC, WORD_DNC};
-        end
-    
+        if (slave_ce_2) slave_c <= master_carry_out_int;
+
     
     //
-    // Slave DSP Input Logic
+    // DSP Input Mapping
     //
-    always @(posedge clk) begin
-        //
-        slave_ab_reg      <= {(WORD_W+2){1'bX}};
-        slave_ab_next_reg <= {(WORD_W+2){1'bX}};
-        //
-        if (ce_dly3 && phase_ff_dly3) slave_ab_next_reg <= {master_p_int[2*WORD_W+1:WORD_W]};
-        //
-        if (ce_dly3 && phase_ff_dly3) slave_ab_reg <= {2'b00, master_p_int[WORD_W-1:0]};
-        if (ce_dly4 && phase_ff_dly4) slave_ab_reg <= slave_ab_next_reg; 
-        //
-    end
-    
+    wire [DSP48E1_C_W-1:0] master_ab_int = {master_ab, {(DSP48E1_C_W - 2*WORD_W){1'b0}}};
+    wire [DSP48E1_C_W-1:0] master_c_int  = {master_c,  {(DSP48E1_C_W - 2*WORD_W){1'b0}}};
+
+    wire [DSP48E1_C_W-1:0] slave_ab_int = {slave_ab, {(DSP48E1_C_W - 2*WORD_W){1'b0}}};
+    wire [DSP48E1_C_W-1:0] slave_c_int  = {{(2*WORD_W-1){1'b0}}, slave_c, {(DSP48E1_C_W-2*WORD_W){1'b1}}};
+
     
     //
-    // OPMODE Logic
+    // DPS Modes
     //
-    reg [DSP48E1_OPMODE_W-1:0] master_opmode;
-    reg [DSP48E1_OPMODE_W-1:0] slave_opmode;
+    wire [DSP48E1_OPMODE_W    -1:0] master_opmode;
+    wire [DSP48E1_CARRYINSEL_W-1:0] master_carryinsel;
+    
+    reg  [DSP48E1_OPMODE_W    -1:0] slave_opmode;
+    reg  [DSP48E1_CARRYINSEL_W-1:0] slave_carryinsel;
+    
+    assign master_opmode     = DSP48E1_OPMODE_Z0_YC_XAB;
+    assign master_carryinsel = DSP48E1_CARRYINSEL_CARRYIN;
     
     always @(posedge clk) begin
-        //
-        if (ce) master_opmode <= !phase_ff ? DSP48E1_OPMODE_Z0_YC_XAB : DSP48E1_OPMODE_ZP_Y0_XAB;
-        else    master_opmode <= DSP48E1_OPMODE_DNC;
-        //
-        if (ce_dly4) slave_opmode <= clr_dly4 ? DSP48E1_OPMODE_Z0_Y0_XAB : DSP48E1_OPMODE_ZP17_Y0_XAB;
-        else         slave_opmode <= DSP48E1_OPMODE_DNC;
-        //
+        slave_opmode     <= clr_pipe ? DSP48E1_OPMODE_ZPCIN_Y0_XAB : DSP48E1_OPMODE_ZPCIN_YC_XAB;
+        slave_carryinsel <= clr_pipe ? DSP48E1_CARRYINSEL_CARRYIN  : DSP48E1_CARRYINSEL_CARRYCASCOUT;
     end
-        
+
     
     //
     // DSP Slice Instances
@@ -196,42 +191,50 @@ module modexpng_recombinator_cell
     `MODEXPNG_DSP_SLICE_ADDSUB dsp_master_inst
     (
         .clk            (clk),
-        .ce_abc         (ce_dly1),
-        .ce_p           (ce_dly2),
-        .ce_ctrl        (ce_dly1),
+        .ce_abc         (master_ce_0),
+        .ce_p           (master_ce_1),
+        .ce_ctrl        (master_ce_0),
         .ab             (master_ab_int),
         .c              (master_c_int),
-        .p              (master_p_int),
+        .p              (),
         .op_mode        (master_opmode),
         .alu_mode       (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
-        .carry_in_sel   (DSP48E1_CARRYINSEL_CARRYIN),
+        .carry_in_sel   (master_carryinsel),
         .casc_p_in      (),
-        .casc_p_out     (),
-        .carry_out      ()
+        .casc_p_out     (master_slave_p_int),
+        .carry_out      (master_carry_out_int)
     );
     
     `MODEXPNG_DSP_SLICE_ADDSUB dsp_slave_inst
     (
         .clk            (clk),
-        .ce_abc         (ce_dly5),
-        .ce_p           (ce_dly6),
-        .ce_ctrl        (ce_dly5),
+        .ce_abc         (slave_ce_1),
+        .ce_p           (slave_ce_2),
+        .ce_ctrl        (slave_ce_1),
         .ab             (slave_ab_int),
         .c              (slave_c_int),
         .p              (slave_p_int),
         .op_mode        (slave_opmode),
         .alu_mode       (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN),
-        .carry_in_sel   (DSP48E1_CARRYINSEL_CARRYIN),
-        .casc_p_in      (),
+        .carry_in_sel   (slave_carryinsel),
+        .casc_p_in      (master_slave_p_int),
         .casc_p_out     (),
-        .carry_out      ()
+        .carry_out      (slave_carry_out_int)
     );
 
 
     //
     // Output Register
-    //    
-    assign dout  = {slave_p_int[WORD_W-1:0]};
-    assign doutw = {slave_p_int[WORD_W+1], dout}; 
+    //
+    reg [WORD_W:0] doutx_reg;
+    
+    assign dout  = doutx_reg[WORD_W-1:0];
+    assign doutw = doutx_reg;
+
+    always @(posedge clk) begin
+        doutx_reg <= {1'bX, WORD_DNC};
+        if (dout_ce_4) doutx_reg <= {slave_carry_out_int, slave_p_int[DSP48E1_P_W - 0*WORD_W -1 -: WORD_W]};
+        if (dout_ce_3) doutx_reg <= {1'b0,                slave_p_int[DSP48E1_P_W - 1*WORD_W -1 -: WORD_W]};
+    end
     
 endmodule



More information about the Commits mailing list