[Cryptech-Commits] [user/shatov/modexpng] 04/21: This commit modifies the REGULAR_ADD_UNEVEN micro-operation to use DSP slices for addition instead of fabric logic. This opcode is only necessary when in CRT mode and is executed once per entire exponentiation to recombine the two "easier" exponentiations. This was the final change necessary to get rid of using fabric math in the general worker module.

git at cryptech.is git at cryptech.is
Mon Jan 20 21:18:05 UTC 2020


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit ab061afd20523bdb0342613f4eb343daee6571c6
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Jan 16 21:47:01 2020 +0300

    This commit modifies the REGULAR_ADD_UNEVEN micro-operation to use DSP slices
    for addition instead of fabric logic. This opcode is only necessary when in
    CRT mode and is executed once per entire exponentiation to recombine the two
    "easier" exponentiations. This was the final change necessary to get rid of
    using fabric math in the general worker module.
---
 rtl/modexpng_general_worker.v | 310 ++++++++++++------------------------------
 1 file changed, 88 insertions(+), 222 deletions(-)

diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 6652f14..6618b5f 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -30,7 +30,7 @@
 //
 //======================================================================
 
-module modexpng_general_worker
+module modexpng_general_worker_new
 (
     clk, rst_n,
     ena, rdy,
@@ -387,7 +387,8 @@ module modexpng_general_worker
             case (opcode)
                 //
                 UOP_OPCODE_PROPAGATE_CARRIES,
-                UOP_OPCODE_MODULAR_SUBTRACT_X:
+                UOP_OPCODE_MODULAR_SUBTRACT_X,
+                UOP_OPCODE_REGULAR_ADD_UNEVEN:
                     //
                     case (wrk_fsm_state)
                         WRK_FSM_STATE_BUSY2,
@@ -395,8 +396,7 @@ module modexpng_general_worker
                         WRK_FSM_STATE_LATENCY_POST4: enable_narrow_wr_en;
                     endcase
                 //                
-                UOP_OPCODE_MERGE_LH,
-                UOP_OPCODE_REGULAR_ADD_UNEVEN:
+                UOP_OPCODE_MERGE_LH:
                     //
                     case (wrk_fsm_state)
                         WRK_FSM_STATE_BUSY1,
@@ -754,7 +754,8 @@ module modexpng_general_worker
         case (opcode)
             //
             UOP_OPCODE_PROPAGATE_CARRIES,
-            UOP_OPCODE_MODULAR_SUBTRACT_X:
+            UOP_OPCODE_MODULAR_SUBTRACT_X,
+            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                 //
                 case (wrk_fsm_state)
                     WRK_FSM_STATE_BUSY2,
@@ -762,8 +763,7 @@ module modexpng_general_worker
                     WRK_FSM_STATE_LATENCY_POST4: update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_addr_x_dly[4], rd_narrow_addr_y_dly[4]);
                 endcase
             //
-            UOP_OPCODE_MERGE_LH,
-            UOP_OPCODE_REGULAR_ADD_UNEVEN:
+            UOP_OPCODE_MERGE_LH:
                 //
                 case (wrk_fsm_state)
                     WRK_FSM_STATE_BUSY1,
@@ -802,6 +802,22 @@ module modexpng_general_worker
         endcase
         //
     end
+    
+    
+    //
+    // UOP_OPCODE_REGULAR_ADD_UNEVEN
+    //
+    reg regular_add_uneven_flag;
+    
+    always @(posedge clk)
+        //
+        case (opcode)
+            UOP_OPCODE_REGULAR_ADD_UNEVEN:
+                case (wrk_fsm_state)
+                    WRK_FSM_STATE_LATENCY_PRE4:                                regular_add_uneven_flag <= 1'b0;
+                    WRK_FSM_STATE_BUSY2: if (rd_wide_addr_is_last_half_dly[2]) regular_add_uneven_flag <= 1'b1;
+                endcase
+        endcase
 
 
     //
@@ -853,7 +869,8 @@ module modexpng_general_worker
             //
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_MODULAR_SUBTRACT_X,
-            UOP_OPCODE_MODULAR_SUBTRACT_Y: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
+            UOP_OPCODE_MODULAR_SUBTRACT_Y,
+            UOP_OPCODE_REGULAR_ADD_UNEVEN: {dsp_ce_x, dsp_ce_y} <= {rd_narrow_ena_x_dly2, rd_narrow_ena_y_dly2};
             default:                       {dsp_ce_x, dsp_ce_y} <= {1'b0, 1'b0};
             //
         endcase
@@ -929,6 +946,30 @@ module modexpng_general_worker
                 //
             end
             //
+            UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
+                //
+                if (rd_narrow_ena_x_dly2) begin
+                    if (rd_narrow_addr_x_dly[1] == OP_ADDR_ZERO) dsp_op_mode_x      <= DSP48E1_OPMODE_Z0_YC_XAB;
+                    else begin
+                        if (!regular_add_uneven_flag)            dsp_op_mode_x      <= DSP48E1_OPMODE_ZP17_YC_XAB;
+                        else                                     dsp_op_mode_x      <= DSP48E1_OPMODE_ZP17_YC_X0;
+                    end                 
+                                                                 dsp_alu_mode_x     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
+                                                                 dsp_carry_in_sel_x <= DSP48E1_CARRYINSEL_CARRYIN;
+                end
+                //
+                if (rd_narrow_ena_y_dly2) begin
+                    if (rd_narrow_addr_y_dly[1] == OP_ADDR_ZERO) dsp_op_mode_y      <= DSP48E1_OPMODE_Z0_YC_XAB;
+                    else begin
+                        if (!regular_add_uneven_flag)            dsp_op_mode_y      <= DSP48E1_OPMODE_ZP17_YC_XAB;
+                        else                                     dsp_op_mode_y      <= DSP48E1_OPMODE_ZP17_YC_X0;
+                    end                 
+                                                                 dsp_alu_mode_y     <= DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN;
+                                                                 dsp_carry_in_sel_y <= DSP48E1_CARRYINSEL_CARRYIN;
+                end
+                //
+            end
+            //            
         endcase
         //
     end
@@ -988,14 +1029,32 @@ module modexpng_general_worker
                     dsp_x_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
                     dsp_x_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
                     dsp_y_x_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
-                    dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_x[WORD_W-1:0]};
+                    dsp_y_x_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_x[WORD_W-1:0]};
                 end
                 //
                 if (rd_narrow_ena_y_dly2) begin
                     dsp_x_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
                     dsp_x_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
                     dsp_y_y_y <= {{(DSP48E1_C_W-WORD_W){1'b1}}, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
-                    dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_y_din_y[WORD_W-1:0]};
+                    dsp_y_y_x <= {{(DSP48E1_C_W-WORD_W){1'b0}}, wrk_rd_wide_x_din_y[WORD_W-1:0]};
+                end
+                //
+            end
+            //
+            UOP_OPCODE_REGULAR_ADD_UNEVEN: begin
+                //
+                if (rd_narrow_ena_x_dly2) begin
+                    dsp_x_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
+                    dsp_x_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_x  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_x  [WORD_W-1:0]};
+                    dsp_y_x_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_x[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
+                    dsp_y_x_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_x  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_x  [WORD_W-1:0]};
+                end
+                //
+                if (rd_narrow_ena_y_dly2) begin
+                    dsp_x_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_x_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
+                    dsp_x_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_x_din_y  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_x_din_y  [WORD_W-1:0]};
+                    dsp_y_y_y <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_narrow_y_din_y[WORD_EXT_W-1:WORD_W], 1'b1, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
+                    dsp_y_y_x <= {{(DSP48E1_C_W-(WORD_EXT_W+1)){1'b0}}, wrk_rd_wide_y_din_y  [WORD_EXT_W-1:WORD_W], 1'b0, wrk_rd_wide_y_din_y  [WORD_W-1:0]};
                 end
                 //
             end
@@ -1022,7 +1081,7 @@ module modexpng_general_worker
         .carry_in_sel   (dsp_carry_in_sel_x),
         .casc_p_in      (),
         .casc_p_out     (),
-        .carryout       (dsp_carry_out_x)
+        .carry_out      (dsp_carry_out_x)
     );
     
     `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_x
@@ -1039,7 +1098,7 @@ module modexpng_general_worker
         .carry_in_sel   (dsp_carry_in_sel_x),
         .casc_p_in      (),
         .casc_p_out     (),
-        .carryout       ()
+        .carry_out      ()
     );
     
     `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_x_y
@@ -1056,7 +1115,7 @@ module modexpng_general_worker
         .carry_in_sel   (dsp_carry_in_sel_y),
         .casc_p_in      (),
         .casc_p_out     (),
-        .carryout       (dsp_carry_out_y)
+        .carry_out      (dsp_carry_out_y)
     );
     
     `MODEXPNG_DSP_SLICE_ADDSUB dst_inst_y_y
@@ -1073,140 +1132,43 @@ module modexpng_general_worker
         .carry_in_sel   (dsp_carry_in_sel_y),
         .casc_p_in      (),
         .casc_p_out     (),
-        .carryout       ()
+        .carry_out      ()
     );
     
     
     //
     // UOP_OPCODE_MODULAR_SUBTRACT_X
     //    
-    reg modular_subtract_x_brw_flag;
-    reg modular_subtract_y_brw_flag;
 
     //
     // IMPORTANT: DSP48E1 turns out to have a very non-obvious feature: when doing _subtraction_,
     //            the CARRYOUT[3] is _NOT_ equivalent to the borrow flag! See "CARRYOUT/CARRYCASCOUT"
     //            section of Appendix A on pp. 55-56 of UG479 for more details.
     //
-    always @(posedge clk)
-        //
-        case (opcode)
-            UOP_OPCODE_MODULAR_SUBTRACT_X:
-                case (wrk_fsm_state)
-                    WRK_FSM_STATE_LATENCY_POST4:
-                    //{modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {1'bX, 1'bZ};
-                    {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
-                endcase
-            endcase
-
-    
-    //reg modular_subtract_x_brw_r;
-    //reg modular_subtract_y_brw_r;
-    
-    //reg modular_subtract_x_cry_r;
-    //reg modular_subtract_y_cry_r;
-
-    //wire [WORD_W:0] modular_subtract_x_w_brw = rd_narrow_x_din_x_dly1[WORD_W:0] - rd_narrow_y_din_x_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_x_brw_r};
-    //wire [WORD_W:0] modular_subtract_y_w_brw = rd_narrow_x_din_y_dly1[WORD_W:0] - rd_narrow_y_din_y_dly1[WORD_W:0] - {{WORD_W{1'b0}}, modular_subtract_y_brw_r};
-
-    //wire [WORD_W:0] modular_subtract_x_w_cry = rd_narrow_x_din_x_dly1[WORD_W:0] + rd_wide_x_din_x_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_x_cry_r};
-    //wire [WORD_W:0] modular_subtract_y_w_cry = rd_narrow_x_din_y_dly1[WORD_W:0] + rd_wide_x_din_y_dly1[WORD_W:0] + {{WORD_W{1'b0}}, modular_subtract_y_cry_r};
-
-    //reg [WORD_W:0] modular_subtract_x_w_brw_r;
-    //reg [WORD_W:0] modular_subtract_y_w_brw_r;
-
-    //reg [WORD_W:0] modular_subtract_x_w_cry_r;
-    //reg [WORD_W:0] modular_subtract_y_w_cry_r;
-    
-    //wire modular_subtract_x_w_brw_msb = modular_subtract_x_w_brw_r[WORD_W];
-    //wire modular_subtract_y_w_brw_msb = modular_subtract_y_w_brw_r[WORD_W];
-
-    //wire modular_subtract_x_w_cry_msb = modular_subtract_x_w_cry_r[WORD_W];
-    //wire modular_subtract_y_w_cry_msb = modular_subtract_y_w_cry_r[WORD_W];
-    
-    //wire [WORD_W -1:0] modular_subtract_x_w_brw_lsb = modular_subtract_x_w_brw_r[WORD_W -1:0];
-    //wire [WORD_W -1:0] modular_subtract_y_w_brw_lsb = modular_subtract_y_w_brw_r[WORD_W -1:0];
-
-    //wire [WORD_W -1:0] modular_subtract_x_w_cry_lsb = modular_subtract_x_w_cry_r[WORD_W -1:0];
-    //wire [WORD_W -1:0] modular_subtract_y_w_cry_lsb = modular_subtract_y_w_cry_r[WORD_W -1:0];
 
-    //wire [WORD_EXT_W -1:0] modular_subtract_x_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_brw_lsb};
-    //wire [WORD_EXT_W -1:0] modular_subtract_y_w_brw_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_brw_lsb};
+    reg modular_subtract_x_brw_flag;
+    reg modular_subtract_y_brw_flag;
 
-    //wire [WORD_EXT_W -1:0] modular_subtract_x_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_w_cry_lsb};
-    //wire [WORD_EXT_W -1:0] modular_subtract_y_w_cry_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_w_cry_lsb};
-    
     reg  [WORD_EXT_W -1:0] modular_subtract_x_mux;
     reg  [WORD_EXT_W -1:0] modular_subtract_y_mux;
     
     wire [WORD_EXT_W -1:0] modular_subtract_x_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_x_mux[WORD_W-1:0]};
     wire [WORD_EXT_W -1:0] modular_subtract_y_mux_reduced = {{CARRY_W{1'b0}}, modular_subtract_y_mux[WORD_W-1:0]};
-    
-    //task _modular_subtract_update_brw;
-        //input x_brw, y_brw;
-        //{modular_subtract_x_brw_r, modular_subtract_y_brw_r} <= {x_brw, y_brw};
-    //endtask
-    
-    //task _modular_subtract_update_cry;
-        //input x_cry, y_cry;
-        //{modular_subtract_x_cry_r, modular_subtract_y_cry_r} <= {x_cry, y_cry};
-    //endtask
-    
-    //task modular_subtract_clear_brw; _modular_subtract_update_brw(                        1'b0,                         1'b0); endtask
-    //task modular_subtract_store_brw; _modular_subtract_update_brw(modular_subtract_x_w_brw_msb, modular_subtract_y_w_brw_msb); endtask
-
-    //task modular_subtract_clear_cry; _modular_subtract_update_cry(                        1'b0,                         1'b0); endtask
-    //task modular_subtract_store_cry; _modular_subtract_update_cry(modular_subtract_x_w_cry_msb, modular_subtract_y_w_cry_msb); endtask
-    
-    //task _modular_subtract_update_diff_w_brw;
-        //input [WORD_W:0] x_diff_w_brw, y_diff_w_brw;
-        //{modular_subtract_x_w_brw_r, modular_subtract_y_w_brw_r} <= {x_diff_w_brw, y_diff_w_brw};
-    //endtask
-
-    //task _modular_subtract_update_sum_w_cry;
-        //input [WORD_W:0] x_sum_w_cry, y_sum_w_cry;
-        //{modular_subtract_x_w_cry_r, modular_subtract_y_w_cry_r} <= {x_sum_w_cry, y_sum_w_cry};
-    //endtask
-    
-    //task modular_subtract_store_diff_w_brw; _modular_subtract_update_diff_w_brw(modular_subtract_x_w_brw, modular_subtract_y_w_brw); endtask
 
-    //task modular_subtract_store_sum_w_cry; _modular_subtract_update_sum_w_cry(modular_subtract_x_w_cry, modular_subtract_y_w_cry); endtask
-    
     always @(posedge clk)
         //
         case (opcode)
-            //
-            //UOP_OPCODE_MODULAR_SUBTRACT_X:
-                //
-                //case (wrk_fsm_state)
-                    //
-                    //WRK_FSM_STATE_LATENCY_PRE3:  modular_subtract_clear_brw;
-                    //WRK_FSM_STATE_BUSY1,
-                    //WRK_FSM_STATE_LATENCY_POST1,
-                    //WRK_FSM_STATE_LATENCY_POST3: modular_subtract_store_brw; // we need the very last borrow here too!
-                    //
-                    //WRK_FSM_STATE_LATENCY_PRE4,
-                    //WRK_FSM_STATE_BUSY2,
-                    //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_diff_w_brw;
-                    //
-                //endcase
-            //
-            //UOP_OPCODE_MODULAR_SUBTRACT_Y:
-                //
-                //case (wrk_fsm_state)
-                    //
-                    //WRK_FSM_STATE_LATENCY_PRE3:  modular_subtract_clear_cry;
-                    //WRK_FSM_STATE_BUSY1,
-                    //WRK_FSM_STATE_LATENCY_POST1: modular_subtract_store_cry;
-                    //
-                    //WRK_FSM_STATE_LATENCY_PRE4,
-                    //WRK_FSM_STATE_BUSY2,
-                    //WRK_FSM_STATE_LATENCY_POST2: modular_subtract_store_sum_w_cry;
-                    //
-                //endcase
-            //
+            UOP_OPCODE_MODULAR_SUBTRACT_X:
+                case (wrk_fsm_state)
+                    WRK_FSM_STATE_LATENCY_POST4:
+                        {modular_subtract_x_brw_flag, modular_subtract_y_brw_flag} <= {~dsp_carry_out_x, ~dsp_carry_out_y};
+                endcase
+            endcase    
+           
+    always @(posedge clk)
+        //
+        case (opcode)
             UOP_OPCODE_MODULAR_SUBTRACT_Z:
-                //
                 case (wrk_fsm_state)
                     // 
                     WRK_FSM_STATE_LATENCY_PRE4,
@@ -1215,96 +1177,10 @@ module modexpng_general_worker
                         //
                         begin modular_subtract_x_mux <= !modular_subtract_x_brw_flag ? rd_narrow_x_din_x_dly1 : rd_wide_x_din_x_dly1;
                               modular_subtract_y_mux <= !modular_subtract_y_brw_flag ? rd_narrow_x_din_y_dly1 : rd_wide_x_din_y_dly1; end
-                    //
-                endcase            
-            //
+                endcase
         endcase
 
 
-    //
-    // UOP_OPCODE_REGULAR_ADD_UNEVEN
-    //
-    reg [CARRY_W -1:0] regular_add_uneven_x_x_cry_r;
-    reg [CARRY_W -1:0] regular_add_uneven_y_x_cry_r;
-    reg [CARRY_W -1:0] regular_add_uneven_x_y_cry_r;
-    reg [CARRY_W -1:0] regular_add_uneven_y_y_cry_r;
-    
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_msb_w_cry = rd_narrow_x_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_x_cry_r};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_msb_w_cry = rd_narrow_y_din_x_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_x_cry_r};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_msb_w_cry = rd_narrow_x_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_x_y_cry_r};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_msb_w_cry = rd_narrow_y_din_y_dly1 + {{WORD_W{1'b0}}, regular_add_uneven_y_y_cry_r};
-    
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_lsb_w_cry = regular_add_uneven_x_x_msb_w_cry + rd_wide_x_din_x_dly1;
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_lsb_w_cry = regular_add_uneven_y_x_msb_w_cry + rd_wide_y_din_x_dly1;
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_lsb_w_cry = regular_add_uneven_x_y_msb_w_cry + rd_wide_x_din_y_dly1;
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_lsb_w_cry = regular_add_uneven_y_y_msb_w_cry + rd_wide_y_din_y_dly1;
-    
-    reg [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_r;
-    reg [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_r;
-    reg [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_r;
-    reg [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_r;
-    
-    wire [CARRY_W -1:0] regular_add_uneven_x_x_w_cry_msb = regular_add_uneven_x_x_w_cry_r[WORD_EXT_W -1:WORD_W];
-    wire [CARRY_W -1:0] regular_add_uneven_y_x_w_cry_msb = regular_add_uneven_y_x_w_cry_r[WORD_EXT_W -1:WORD_W];
-    wire [CARRY_W -1:0] regular_add_uneven_x_y_w_cry_msb = regular_add_uneven_x_y_w_cry_r[WORD_EXT_W -1:WORD_W];
-    wire [CARRY_W -1:0] regular_add_uneven_y_y_w_cry_msb = regular_add_uneven_y_y_w_cry_r[WORD_EXT_W -1:WORD_W];
-    
-    wire [WORD_W -1:0] regular_add_uneven_x_x_w_cry_lsb = regular_add_uneven_x_x_w_cry_r[WORD_W -1:0];
-    wire [WORD_W -1:0] regular_add_uneven_y_x_w_cry_lsb = regular_add_uneven_y_x_w_cry_r[WORD_W -1:0];
-    wire [WORD_W -1:0] regular_add_uneven_x_y_w_cry_lsb = regular_add_uneven_x_y_w_cry_r[WORD_W -1:0];
-    wire [WORD_W -1:0] regular_add_uneven_y_y_w_cry_lsb = regular_add_uneven_y_y_w_cry_r[WORD_W -1:0];
-    
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_x_w_cry_lsb};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_x_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_x_w_cry_lsb};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_x_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_x_y_w_cry_lsb};
-    wire [WORD_EXT_W -1:0] regular_add_uneven_y_y_w_cry_reduced = {{CARRY_W{1'b0}}, regular_add_uneven_y_y_w_cry_lsb};
-    
-    reg regular_add_uneven_store_lsb_now;
-    
-    task _regular_add_uneven_update_cry;
-        input [CARRY_W-1:0]     x_x_cry,                      y_x_cry,                      x_y_cry,                      y_y_cry;
-        {  regular_add_uneven_x_x_cry_r, regular_add_uneven_y_x_cry_r, regular_add_uneven_x_y_cry_r, regular_add_uneven_y_y_cry_r} <=
-        {                       x_x_cry,                      y_x_cry,                      x_y_cry,                      y_y_cry};
-    endtask
-    
-    task regular_add_uneven_clear_cry; _regular_add_uneven_update_cry(                      CARRY_ZERO,                       CARRY_ZERO,                       CARRY_ZERO,                       CARRY_ZERO); endtask
-    task regular_add_uneven_store_cry; _regular_add_uneven_update_cry(regular_add_uneven_x_x_w_cry_msb, regular_add_uneven_y_x_w_cry_msb, regular_add_uneven_x_y_w_cry_msb, regular_add_uneven_y_y_w_cry_msb); endtask
-        
-    task _regular_add_uneven_update_sum_w_cry;
-        input [WORD_EXT_W-1:0] x_x_sum_w_cry,                  y_x_sum_w_cry,                  x_y_sum_w_cry,                  y_y_sum_w_cry;
-        {     regular_add_uneven_x_x_w_cry_r, regular_add_uneven_y_x_w_cry_r, regular_add_uneven_x_y_w_cry_r, regular_add_uneven_y_y_w_cry_r} <=
-        {                      x_x_sum_w_cry,                  y_x_sum_w_cry,                  x_y_sum_w_cry,                  y_y_sum_w_cry};
-    endtask
-    
-    task regular_add_uneven_store_sum_lsb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_lsb_w_cry, regular_add_uneven_y_x_lsb_w_cry, regular_add_uneven_x_y_lsb_w_cry, regular_add_uneven_y_y_lsb_w_cry); endtask
-    
-    task regular_add_uneven_store_sum_msb_w_cry; _regular_add_uneven_update_sum_w_cry(regular_add_uneven_x_x_msb_w_cry, regular_add_uneven_y_x_msb_w_cry, regular_add_uneven_x_y_msb_w_cry, regular_add_uneven_y_y_msb_w_cry); endtask
-
-    always @(posedge clk)
-        //
-           case (wrk_fsm_state)
-                //
-                WRK_FSM_STATE_LATENCY_PRE3: regular_add_uneven_store_lsb_now <= 1'b1;
-                WRK_FSM_STATE_BUSY1: if (rd_wide_addr_is_last_half_dly[3]) regular_add_uneven_store_lsb_now <= 1'b0;         
-                //
-            endcase
-            
-    always @(posedge clk)
-        //
-           case (wrk_fsm_state)
-                //
-                WRK_FSM_STATE_LATENCY_PRE3:  regular_add_uneven_clear_cry;
-                WRK_FSM_STATE_BUSY1,
-                WRK_FSM_STATE_LATENCY_POST1: regular_add_uneven_store_cry;
-                //
-                WRK_FSM_STATE_LATENCY_PRE4:                                        regular_add_uneven_store_sum_lsb_w_cry;
-                WRK_FSM_STATE_BUSY2:         if (regular_add_uneven_store_lsb_now) regular_add_uneven_store_sum_lsb_w_cry;
-                                             else                                  regular_add_uneven_store_sum_msb_w_cry;
-                WRK_FSM_STATE_LATENCY_POST2:                                       regular_add_uneven_store_sum_msb_w_cry;
-                //
-            endcase
-
-
     //
     // FSM Process
     //
@@ -1424,7 +1300,8 @@ module modexpng_general_worker
         case (opcode)
             //
             UOP_OPCODE_PROPAGATE_CARRIES,
-            UOP_OPCODE_MODULAR_SUBTRACT_X:
+            UOP_OPCODE_MODULAR_SUBTRACT_X,
+            UOP_OPCODE_REGULAR_ADD_UNEVEN:
                 //
                 case (wrk_fsm_state)
                     //
@@ -1524,17 +1401,6 @@ module modexpng_general_worker
                         // 
                 endcase
             //
-            UOP_OPCODE_REGULAR_ADD_UNEVEN:
-                //
-                case (wrk_fsm_state)
-                    //
-                    WRK_FSM_STATE_BUSY1,
-                    WRK_FSM_STATE_LATENCY_POST1,
-                    WRK_FSM_STATE_LATENCY_POST3:
-                        //
-                        update_narrow_dout(regular_add_uneven_x_x_w_cry_reduced, regular_add_uneven_y_x_w_cry_reduced, regular_add_uneven_x_y_w_cry_reduced, regular_add_uneven_y_y_w_cry_reduced);
-                    // 
-                endcase
         endcase
         //
     end



More information about the Commits mailing list