[Cryptech-Commits] [core/math/modexp] 01/03: Adding files from master that does not break the testcases.

git at cryptech.is git at cryptech.is
Tue Jun 30 14:16:14 UTC 2015


This is an automated email from the git hooks/post-receive script.

joachim at secworks.se pushed a commit to branch debug_bigoperands
in repository core/math/modexp.

commit 30403ce8b26755f840b358784504608f39db29b2
Author: Joachim Strömbergson <joachim at secworks.se>
Date:   Tue Jun 30 10:35:15 2015 +0200

    Adding files from master that does not break the testcases.
---
 src/rtl/adder.v    |  64 +++++
 src/rtl/montprod.v | 777 ++++++++++++++++++++++++++++-------------------------
 src/rtl/residue.v  | 402 ++++++++++++++-------------
 src/rtl/shl.v      |  56 ++++
 src/rtl/shr.v      |  56 ++++
 toolruns/Makefile  |   2 +-
 6 files changed, 782 insertions(+), 575 deletions(-)

diff --git a/src/rtl/adder.v b/src/rtl/adder.v
new file mode 100644
index 0000000..fa8ed8c
--- /dev/null
+++ b/src/rtl/adder.v
@@ -0,0 +1,64 @@
+//======================================================================
+//
+// adder.v
+// -------
+// Adder with separate carry in and carry out. Used in the montprod
+// amd residue modules of the modexp core.
+//
+//
+// Author: Peter Magnusson, Joachim Strömbergson
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module adder #(parameter OPW = 32)
+              (
+               input      [(OPW - 1) : 0]  a,
+               input      [(OPW - 1) : 0]  b,
+               input                       carry_in,
+
+               output wire [(OPW - 1) : 0] sum,
+               output wire                 carry_out
+              );
+
+  reg [(OPW) : 0] adder_result;
+
+   assign sum = adder_result[(OPW - 1) : 0];
+   assign carry_out = adder_result[(OPW)];
+
+   always @*
+     begin
+       adder_result = {1'b0, a} + {1'b0, b} + {{OPW{1'b0}}, carry_in};
+     end
+
+endmodule // adder
+
+//======================================================================
+// EOF adder.v
+//======================================================================
diff --git a/src/rtl/montprod.v b/src/rtl/montprod.v
index 6b525f7..3a310d7 100644
--- a/src/rtl/montprod.v
+++ b/src/rtl/montprod.v
@@ -4,6 +4,9 @@
 // ---------
 // Montgomery product calculator for the modular exponentiantion core.
 //
+// parameter OPW is operand word width in bits.
+// parameter ADW is address width in bits.
+//
 //
 // Author: Peter Magnusson, Joachim Strombergson
 // Copyright (c) 2015, NORDUnet A/S All rights reserved.
@@ -36,135 +39,158 @@
 //
 //======================================================================
 
-module montprod(
-                input wire           clk,
-                input wire           reset_n,
+module montprod #(parameter OPW = 32, parameter ADW = 8)
+               (
+                input wire                  clk,
+                input wire                  reset_n,
 
-                input wire           calculate,
-                output wire          ready,
+                input wire                  calculate,
+                output wire                 ready,
 
-                input   [7 : 0]      length,
+                input wire [(ADW - 1) : 0]  length,
 
-                output wire [7 : 0]  opa_addr,
-                input wire [31 : 0]  opa_data,
+                output wire [(ADW - 1) : 0] opa_addr,
+                input wire [(OPW - 1) : 0]  opa_data,
 
-                output wire [7 : 0]  opb_addr,
-                input wire [31 : 0]  opb_data,
+                output wire [(ADW - 1) : 0] opb_addr,
+                input wire [(OPW - 1) : 0]  opb_data,
 
-                output wire [7 : 0]  opm_addr,
-                input wire [31 : 0]  opm_data,
+                output wire [(ADW - 1) : 0] opm_addr,
+                input wire [(OPW - 1) : 0]  opm_data,
 
-                output wire [7 : 0]  result_addr,
-                output wire [31 : 0] result_data,
-                output wire          result_we
+                output wire [(ADW - 1) : 0] result_addr,
+                output wire [(OPW - 1) : 0] result_data,
+                output wire                 result_we
                );
 
 
   //----------------------------------------------------------------
   // Internal constant and parameter definitions.
   //----------------------------------------------------------------
-  localparam DEBUG = 0;
-
-  localparam CTRL_IDLE           = 4'h0;
-  localparam CTRL_INIT_S         = 4'h1;
-  localparam CTRL_LOOP_INIT      = 4'h2;
-  localparam CTRL_LOOP_ITER      = 4'h3;
-  localparam CTRL_LOOP_BQ        = 4'h4;
-  localparam CTRL_L_CALC_SM      = 4'h5;
-  localparam CTRL_L_STALLPIPE_SM = 4'h6;
-  localparam CTRL_L_CALC_SA      = 4'h7;
-  localparam CTRL_L_STALLPIPE_SA = 4'h8;
-  localparam CTRL_L_CALC_SDIV2   = 4'h9;
-  localparam CTRL_L_STALLPIPE_D2 = 4'hA;
-  localparam CTRL_L_STALLPIPE_ES = 4'hB;
-  localparam CTRL_EMIT_S         = 4'hC;
-  localparam CTRL_DONE           = 4'hD;
-
-  localparam SMUX_0            = 2'h0;
-  localparam SMUX_ADD_SM       = 2'h1;
-  localparam SMUX_ADD_SA       = 2'h2;
-  localparam SMUX_SHR          = 2'h3;
+  localparam CTRL_IDLE            = 4'h0;
+  localparam CTRL_LOOP_ITER       = 4'h1;
+  localparam CTRL_LOOP_BQ         = 4'h2;
+  localparam CTRL_CALC_ADD        = 4'h3;
+  localparam CTRL_STALLPIPE_ADD   = 4'h4;
+  localparam CTRL_CALC_SDIV2      = 4'h5;
+  localparam CTRL_STALLPIPE_SDIV2 = 4'h6;
+  localparam CTRL_L_STALLPIPE_ES  = 4'h7;
+  localparam CTRL_EMIT_S          = 4'h8;
+
+  localparam SMUX_ZERO            = 2'h0;
+  localparam SMUX_ADD             = 2'h1;
+  localparam SMUX_SHR             = 2'h2;
+
 
   //----------------------------------------------------------------
   // Registers including update variables and write enable.
   //----------------------------------------------------------------
+  reg                  ready_reg;
+  reg                  ready_new;
+  reg                  ready_we;
 
-  reg [07 : 0] opa_addr_reg;
-  reg [07 : 0] opb_addr_reg;
-  reg [07 : 0] opm_addr_reg;
+  reg [3 : 0]          montprod_ctrl_reg;
+  reg [3 : 0]          montprod_ctrl_new;
+  reg                  montprod_ctrl_we;
 
-  reg [07 : 0] result_addr_reg;
-  reg [31 : 0] result_data_reg;
+  reg  [1 : 0]         s_mux_new;
+  reg  [1 : 0]         s_mux_reg;
 
-  reg          ready_reg;
-  reg          ready_new;
-  reg          ready_we;
+  reg                  s_mem_we_reg;
+  reg                  s_mem_we_new;
 
-  reg [3 : 0]  montprod_ctrl_reg;
-  reg [3 : 0]  montprod_ctrl_new;
-  reg          montprod_ctrl_we;
+  reg [(ADW - 1) : 0]  s_mem_read_addr_reg;
 
-  reg  [1 : 0] s_mux_new;
-  reg  [1 : 0] s_mux_reg;
+  reg                  q_new;
+  reg                  q_reg;
+  reg                  b_new;
+  reg                  b_reg;
+  reg                  bq_we;
 
-  reg [31 : 0] s_mem_new;
-  reg          s_mem_we;
-  reg          s_mem_we_new;
-  reg [07 : 0] s_mem_addr;
-  reg [07 : 0] s_mem_wr_addr;
-  wire [31 : 0] s_mem_read_data;
+  reg [12 : 0]         loop_ctr_reg;
+  reg [12 : 0]         loop_ctr_new;
+  reg                  loop_ctr_we;
+  reg                  loop_ctr_set;
+  reg                  loop_ctr_dec;
 
-  reg          q; //q = (s - b * A) & 1
-  reg          q_reg;
-  reg          b; //b: bit of B
-  reg          b_reg;
+  reg [(13 - ADW - 1) : 0] b_bit_index_reg;
+  reg [(13 - ADW - 1) : 0] b_bit_index_new;
+  reg                      b_bit_index_we;
 
-  reg [12 : 0] loop_counter;
-  reg [12 : 0] loop_counter_new;
-  reg [12 : 0] loop_counter_dec;
-  reg [07 : 0] B_word_index; //loop counter as a word index
-  reg [04 : 0] B_bit_index; //loop counter as a bit index
-  reg [04 : 0] B_bit_index_reg; //loop counter as a bit index
+  reg [(ADW - 1) : 0]  word_index_reg;
+  reg [(ADW - 1) : 0]  word_index_new;
+  reg                  word_index_we;
+  reg [(ADW - 1) : 0]  word_index_prev_reg;
+  reg                  reset_word_index_lsw;
+  reg                  reset_word_index_msw;
+  reg                  inc_word_index;
+  reg                  dec_word_index;
 
-  reg [07 : 0] word_index; //register of what word is being read
-  reg [07 : 0] word_index_new; //calculation of what word to be read
-  reg [07 : 0] word_index_prev; //register of what word was read previously (result address to emit)
-  reg [07 : 0] length_m1;
+  reg                  add_carry_in_sa_reg;
+  reg                  add_carry_in_sa_new;
+  reg                  add_carry_in_sm_reg;
+  reg                  add_carry_in_sm_new;
 
-  reg          add_carry_in_sa;
-  reg          add_carry_new_sa;
-  reg          add_carry_in_sm;
-  reg          add_carry_new_sm;
+  reg                  shr_carry_in_reg;
+  reg                  shr_carry_in_new;
 
-  reg          shr_carry_in;
-  reg          shr_carry_new;
+  reg                  first_iteration_reg;
+  reg                  first_iteration_new;
+  reg                  first_iteration_we;
 
-  reg          reset_word_index_LSW;
-  reg          reset_word_index_MSW;
+  reg                  test_reg;
+  reg                  test_new;
+
+  reg [(OPW - 2) : 0]  shr_data_out_reg;
+  reg                  shr_carry_out_reg;
+  reg                  shr_carry_out_new;
 
 
   //----------------------------------------------------------------
   // Wires.
   //----------------------------------------------------------------
-  reg           tmp_result_we;
-  wire [31 : 0] add_result_sa;
-  wire          add_carry_out_sa;
-  wire [31 : 0] add_result_sm;
-  wire          add_carry_out_sm;
+  wire [(OPW - 1) : 0] add_result_sa;
+  wire                 add_carry_out_sa;
+  wire [(OPW - 1) : 0] add_result_sm;
+  wire                 add_carry_out_sm;
+
+  reg [(ADW - 1) : 0]  b_word_index; //loop counter as a word index
+
+  reg  [(OPW - 1) : 0] shr_data_in;
+  wire                 shr_carry_out;
+  wire [(OPW - 1) : 0] shr_data_out;
+
+  reg [(ADW - 1) : 0]  tmp_opa_addr;
+  reg                  tmp_result_we;
+
+  reg [(ADW - 1) : 0]  s_mem_read_addr;
+  wire [(OPW - 1) : 0] s_mem_read_data;
+  reg [(ADW - 1) : 0]  s_mem_write_addr;
+  reg [(OPW - 1) : 0]  s_mem_write_data;
+  reg [(OPW - 1) : 0]  tmp_s_mem_write_data;
+
+  reg [(OPW - 1) : 0]  sa_adder_data_in;
+  reg [(OPW - 1) : 0]  muxed_s_mem_read_data;
+  reg [(OPW - 1) : 0]  shifted_s_mem_write_data;
+
+  wire [(ADW - 1) : 0] length_m1;
 
-  wire          shr_carry_out;
-  wire [31 : 0] shr_adiv2;
+  // Temporary debug wires.
+  reg [1 : 0] state_trace;
+  reg [1 : 0] mux_trace;
 
 
   //----------------------------------------------------------------
   // Concurrent connectivity for ports etc.
   //----------------------------------------------------------------
-  assign opa_addr    = opa_addr_reg;
-  assign opb_addr    = opb_addr_reg;
-  assign opm_addr    = opm_addr_reg;
+  assign length_m1   = length - 1'b1;
 
-  assign result_addr = result_addr_reg;
-  assign result_data = result_data_reg;
+  assign opa_addr    = tmp_opa_addr;
+  assign opb_addr    = b_word_index;
+  assign opm_addr    = word_index_reg;
+
+  assign result_addr = word_index_prev_reg;
+  assign result_data = s_mem_read_data;
   assign result_we   = tmp_result_we;
 
   assign ready       = ready_reg;
@@ -173,55 +199,38 @@ module montprod(
   //----------------------------------------------------------------
   // Instantions
   //----------------------------------------------------------------
-
-  blockmem1r1w s_mem(
-                     .clk(clk),
-                     .read_addr(s_mem_addr),
-                     .read_data(s_mem_read_data),
-                     .wr(s_mem_we),
-                     .write_addr(s_mem_wr_addr),
-                     .write_data(s_mem_new)
-                    );
-
-
-  adder32 s_adder_sa(
-    .a(s_mem_read_data),
-    .b(opa_data),
-    .carry_in(add_carry_in_sa),
-    .sum(add_result_sa),
-    .carry_out(add_carry_out_sa)
-  );
-
-  adder32 s_adder_sm(
-    .a(s_mem_read_data),
-    .b(opm_data),
-    .carry_in(add_carry_in_sm),
-    .sum(add_result_sm),
-    .carry_out(add_carry_out_sm)
-  );
-
-  shr32 shifter(
-     .a( s_mem_read_data ),
-     .carry_in( shr_carry_in ),
-     .adiv2( shr_adiv2 ),
-     .carry_out( shr_carry_out )
-  );
-
-  always @*
-    begin : s_mux
-      case (s_mux_reg)
-        SMUX_0:
-          s_mem_new = 32'b0;
-        SMUX_ADD_SA:
-          s_mem_new = add_result_sa;
-        SMUX_ADD_SM:
-          s_mem_new = add_result_sm;
-        SMUX_SHR:
-          s_mem_new = shr_adiv2;
-      endcase
-      if (DEBUG)
-        $display("SMUX%x: %x", s_mux_reg, s_mem_new);
-    end
+  blockmem1r1w #(.OPW(OPW), .ADW(ADW)) s_mem(
+                                             .clk(clk),
+                                             .read_addr(s_mem_read_addr),
+                                             .read_data(s_mem_read_data),
+                                             .wr(s_mem_we_reg),
+                                             .write_addr(s_mem_write_addr),
+                                             .write_data(s_mem_write_data)
+                                            );
+
+  adder #(.OPW(OPW)) s_adder_sm(
+                                .a(muxed_s_mem_read_data),
+                                .b(opm_data),
+                                .carry_in(add_carry_in_sm_reg),
+                                .sum(add_result_sm),
+                                .carry_out(add_carry_out_sm)
+                               );
+
+
+  adder #(.OPW(OPW)) s_adder_sa(
+                                .a(sa_adder_data_in),
+                                .b(opa_data),
+                                .carry_in(add_carry_in_sa_reg),
+                                .sum(add_result_sa),
+                                .carry_out(add_carry_out_sa)
+                               );
+
+  shr #(.OPW(OPW)) shifter(
+                           .a(shr_data_in),
+                           .carry_in(shr_carry_in_reg),
+                           .adiv2(shr_data_out),
+                           .carry_out(shr_carry_out)
+                          );
 
 
   //----------------------------------------------------------------
@@ -235,205 +244,246 @@ module montprod(
     begin : reg_update
       if (!reset_n)
         begin
-          ready_reg         <= 1'b0;
-          loop_counter      <= 13'h0;
-          word_index        <= 8'h0;
-          word_index_prev   <= 8'h0;
-          add_carry_in_sa   <= 1'b0;
-          add_carry_in_sm   <= 1'b0;
-          shr_carry_in      <= 1'b0;
-          montprod_ctrl_reg <= CTRL_IDLE;
-          b_reg             <= 1'b0;
-          q_reg             <= 1'b0;
-          s_mux_reg         <= SMUX_0;
-          s_mem_we          <= 1'b0;
-          s_mem_wr_addr     <= 8'h0;
-          B_bit_index_reg   <= 5'h0;
+          test_reg            <= 1'b1;
+          ready_reg           <= 1'b1;
+          loop_ctr_reg        <= 13'h0;
+          word_index_reg      <= {ADW{1'b0}};
+          word_index_prev_reg <= {ADW{1'b0}};
+          add_carry_in_sa_reg <= 1'b0;
+          add_carry_in_sm_reg <= 1'b0;
+          shr_data_out_reg    <= {(OPW - 1){1'b0}};
+          shr_carry_in_reg    <= 1'b0;
+          b_reg               <= 1'b0;
+          q_reg               <= 1'b0;
+          s_mux_reg           <= SMUX_ZERO;
+          s_mem_we_reg        <= 1'b0;
+          s_mem_read_addr_reg <= {ADW{1'b0}};
+          b_bit_index_reg     <= {(13 - ADW){1'b0}};
+          first_iteration_reg <= 1'b0;
+          montprod_ctrl_reg   <= CTRL_IDLE;
         end
       else
         begin
-          if (ready_we)
-            ready_reg <= ready_new;
+          test_reg            <= test_new;
 
-          if (montprod_ctrl_we)
-            begin
-               montprod_ctrl_reg <= montprod_ctrl_new;
-             end
+          s_mem_read_addr_reg <= s_mem_read_addr;
+          s_mem_we_reg        <= s_mem_we_new;
+          s_mux_reg           <= s_mux_new;
 
-          s_mem_wr_addr <= s_mem_addr;
+          word_index_prev_reg <= word_index_reg;
 
-          s_mem_we <= s_mem_we_new;
+          shr_carry_in_reg    <= shr_carry_in_new;
+          add_carry_in_sa_reg <= add_carry_in_sa_new;
+          add_carry_in_sm_reg <= add_carry_in_sm_new;
+          shr_data_out_reg    <= shr_data_out[(OPW - 2) : 0];
 
-          word_index <= word_index_new;
-          word_index_prev <= word_index;
+          if (word_index_we)
+            word_index_reg <= word_index_new;
 
-          loop_counter <= loop_counter_new;
-          shr_carry_in <= shr_carry_new;
-          add_carry_in_sa <= add_carry_new_sa;
-          add_carry_in_sm <= add_carry_new_sm;
+          if (first_iteration_we)
+            first_iteration_reg <= first_iteration_new;
 
-          B_bit_index_reg <= B_bit_index;
-          q_reg <= q;
-          b_reg <= b;
+          if (b_bit_index_we)
+            b_bit_index_reg <= b_bit_index_new;
 
-          s_mux_reg <= s_mux_new;
-      end
-    end // reg_update
+          if (bq_we)
+            begin
+              b_reg <= b_new;
+              q_reg <= q_new;
+            end
 
-  always @*
-   begin : bq_process
-      b = b_reg;
-      q = q_reg;
-      if (montprod_ctrl_reg == CTRL_LOOP_BQ)
-         begin
-           b = opb_data[ B_bit_index_reg ];
-           //opa_addr will point to length-1 to get A LSB.
-           //s_read_addr will point to length-1
-           q = s_mem_read_data[0] ^ (opa_data[0] & b);
-           if (DEBUG)
-             $display("s_mem_read_data: %x opa_data %x b %x q %x B_bit_index_reg %x", s_mem_read_data, opa_data, b, q, B_bit_index_reg);
+          if (ready_we)
+            ready_reg <= ready_new;
+
+          if (loop_ctr_we)
+            loop_ctr_reg <= loop_ctr_new;
+
+          if (montprod_ctrl_we)
+            begin
+               montprod_ctrl_reg <= montprod_ctrl_new;
+             end
         end
-   end
+    end // reg_update
 
 
   //----------------------------------------------------------------
-  // Process for iterating the loop counter and setting related B indexes
+  // s_logic
+  //
+  // Logic to calculate S memory updates including address
+  // and write enable. This is the main montprod datapath.
   //----------------------------------------------------------------
   always @*
-   begin : loop_counter_process
-     loop_counter_new  = loop_counter;
-      length_m1        = length - 1'b1;
-      loop_counter_dec = loop_counter - 1'b1;
-      B_word_index     = loop_counter[12:5];
-      B_bit_index      = B_bit_index_reg;
+    begin : s_logic
+      shr_carry_in_new      = 1'b0;
+      muxed_s_mem_read_data = {OPW{1'b0}};
+      sa_adder_data_in      = {OPW{1'b0}};
+      add_carry_in_sa_new   = 1'b0;
+      add_carry_in_sm_new   = 1'b0;
+      s_mem_read_addr       = word_index_reg;
+      s_mem_write_addr      = s_mem_read_addr_reg;
+      s_mem_write_data      = {OPW{1'b0}};
+      s_mem_we_new          = 1'b0;
+      state_trace           = 0;
+      mux_trace             = 0;
+      tmp_s_mem_write_data  = {OPW{1'b0}};
+      test_new              = 1'b0;
 
       case (montprod_ctrl_reg)
-        CTRL_LOOP_INIT:
-          loop_counter_new = {length, 5'b00000} - 1'b1;
-
         CTRL_LOOP_ITER:
           begin
-            B_word_index     = loop_counter[12:5];
-            B_bit_index      = 5'h1f - loop_counter[4:0];
+            s_mem_read_addr = length_m1;
           end
 
-        CTRL_L_STALLPIPE_D2:
-            loop_counter_new = loop_counter_dec;
-
-        default:
-          loop_counter_new = loop_counter;
-      endcase
-    end
-
-
-  //----------------------------------------------------------------
-  // prodcalc
-  //----------------------------------------------------------------
-  always @*
-    begin : prodcalc
-
-      case (montprod_ctrl_reg)
-        CTRL_LOOP_ITER:
-          //q = (s[length-1] ^ A[length-1]) & 1;
-          opa_addr_reg = length_m1;
-
-        default:
-          opa_addr_reg = word_index;
-       endcase
+        CTRL_CALC_ADD:
+          begin
+            //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles.
+            s_mem_we_new = b_reg | q_reg | first_iteration_reg;
+            state_trace  = 1;
+            test_new     = 1'b1;
+          end
 
-       opb_addr_reg = B_word_index;
-       opm_addr_reg = word_index;
+        CTRL_CALC_SDIV2:
+          begin
+            //s = (s + q*M + b*A) >>> 1; s>>=1.  Takes (1..length) cycles.
+            s_mem_we_new = 1'b1;
+          end
 
-      case (montprod_ctrl_reg)
-        CTRL_LOOP_ITER:
-          s_mem_addr = length_m1;
         default:
-          s_mem_addr = word_index;
+          begin
+          end
       endcase
 
 
+      case (s_mux_reg)
+        SMUX_ADD:
+          begin
+            mux_trace = 1;
 
+            if (first_iteration_reg)
+              muxed_s_mem_read_data = {OPW{1'b0}};
+            else
+              muxed_s_mem_read_data = s_mem_read_data;
 
-      result_addr_reg  = word_index_prev;
-      result_data_reg  = s_mem_read_data;
 
-      case (montprod_ctrl_reg)
-        CTRL_EMIT_S:
-           tmp_result_we = 1'b1;
-        default:
-           tmp_result_we = 1'b0;
-      endcase
+            if (q_reg)
+                sa_adder_data_in = add_result_sm;
+            else
+              sa_adder_data_in = muxed_s_mem_read_data;
 
 
-      if (reset_word_index_LSW == 1'b1)
-        word_index_new = length_m1;
-      else if (reset_word_index_MSW == 1'b1)
-        word_index_new = 8'h0;
-      else if (montprod_ctrl_reg == CTRL_L_CALC_SDIV2)
-        word_index_new = word_index + 1'b1;
-      else
-        word_index_new = word_index - 1'b1;
-    end // prodcalc
+            if (b_reg)
+              tmp_s_mem_write_data = add_result_sa;
+            else if (q_reg)
+              tmp_s_mem_write_data = add_result_sm;
+            else if (first_iteration_reg)
+              tmp_s_mem_write_data = {OPW{1'b0}};
 
+            s_mem_write_data = tmp_s_mem_write_data;
+            add_carry_in_sa_new = add_carry_out_sa;
+            add_carry_in_sm_new = add_carry_out_sm;
 
-  always @*
-    begin : s_writer_process
-      shr_carry_new    = 1'b0;
-      s_mux_new        = SMUX_0;
 
-      s_mem_we_new  = 1'b0;
-      case (montprod_ctrl_reg)
-        CTRL_INIT_S:
-          begin
-            s_mem_we_new = 1'b1;
-            s_mux_new    = SMUX_0; // write 0
+            // Experimental integration of shift in add.
+            shr_data_in              = tmp_s_mem_write_data;
+            shifted_s_mem_write_data = {shr_carry_out, shr_data_out_reg};
           end
 
-        CTRL_L_CALC_SM:
-          begin
-            //s = (s + q*M + b*A) >>> 1;, if(q==1) S+= M. Takes (1..length) cycles.
-            s_mem_we_new     = q_reg;
-            s_mux_new        = SMUX_ADD_SM;
-          end
-
-        CTRL_L_CALC_SA:
-          begin
-            //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles.
-            s_mem_we_new     = b_reg;
-            s_mux_new        = SMUX_ADD_SA;
-          end
 
-        CTRL_L_CALC_SDIV2:
+        SMUX_SHR:
           begin
-            //s = (s + q*M + b*A) >>> 1; s>>=1.  Takes (1..length) cycles.
-            s_mux_new     = SMUX_SHR;
-            s_mem_we_new  = 1'b1;
+            shr_data_in      = s_mem_read_data;
+            s_mem_write_data = shr_data_out;
+            shr_carry_in_new = shr_carry_out;
           end
 
         default:
           begin
           end
       endcase
+    end // s_logic
 
-      add_carry_new_sa = 1'b0;
-      add_carry_new_sm = 1'b0;
 
-      case (s_mux_reg)
-        SMUX_ADD_SM:
-          add_carry_new_sm = add_carry_out_sm;
+  //----------------------------------------------------------------
+  // bq
+  //
+  // Extract b and q bits.
+  // b: current bit of B used.
+  // q = (s - b * A) & 1
+  // update the b bit and word indices based on loop counter.
+  //----------------------------------------------------------------
+  always @*
+   begin : bq
+     b_new = opb_data[b_bit_index_reg];
 
-        SMUX_ADD_SA:
-          add_carry_new_sa = add_carry_out_sa;
+     if (first_iteration_reg)
+       q_new = 1'b0 ^ (opa_data[0] & b_new);
+     else
+       q_new = s_mem_read_data[0] ^ (opa_data[0] & b_new);
 
-        SMUX_SHR:
-          shr_carry_new = shr_carry_out;
+     // B_bit_index      = 5'h1f - loop_counter[4:0];
+     b_bit_index_new = ((2**(13 - ADW)) - 1'b1) - loop_ctr_reg[(13 - ADW - 1) : 0];
+     b_word_index    = loop_ctr_reg[12 : (13 - ADW)];
+   end // bq
 
-        default:
-          begin
-          end
-      endcase
 
-    end // prodcalc
+  //----------------------------------------------------------------
+  // word_index
+  //
+  // Logic that implements the word index used to drive
+  // addresses for operands.
+  //----------------------------------------------------------------
+  always @*
+    begin : word_index
+      word_index_new = {ADW{1'b0}};
+      word_index_we  = 1'b0;
+
+      if (reset_word_index_lsw)
+        begin
+          word_index_new = length_m1;
+          word_index_we  = 1'b1;
+        end
+
+      if (reset_word_index_msw)
+        begin
+          word_index_new = {ADW{1'b0}};
+          word_index_we  = 1'b1;
+        end
+
+      if (inc_word_index)
+        begin
+          word_index_new = word_index_reg + 1'b1;
+          word_index_we  = 1'b1;
+        end
+
+      if (dec_word_index)
+        begin
+          word_index_new = word_index_reg - 1'b1;
+          word_index_we  = 1'b1;
+        end
+    end // word_index
+
+
+  //----------------------------------------------------------------
+  // loop_ctr
+  // Logic for updating the loop counter.
+  //----------------------------------------------------------------
+  always @*
+   begin : loop_ctr
+     loop_ctr_new = 13'h0;
+     loop_ctr_we  = 1'b0;
+
+     if (loop_ctr_set)
+         begin
+           loop_ctr_new = {length, {(13 - ADW){1'b0}}} - 1'b1;
+           loop_ctr_we  = 1'b1;
+         end
+
+     if (loop_ctr_dec)
+       begin
+           loop_ctr_new = loop_ctr_reg - 1'b1;
+           loop_ctr_we  = 1'b1;
+       end
+   end
 
 
   //----------------------------------------------------------------
@@ -443,146 +493,131 @@ module montprod(
   //----------------------------------------------------------------
   always @*
     begin : montprod_ctrl
-      ready_new         = 1'b0;
-      ready_we          = 1'b0;
-      montprod_ctrl_new = CTRL_IDLE;
-      montprod_ctrl_we  = 1'b0;
+      ready_new            = 1'b0;
+      ready_we             = 1'b0;
+      loop_ctr_set         = 1'b0;
+      loop_ctr_dec         = 1'b0;
+      b_bit_index_we       = 1'b0;
+      bq_we                = 1'b0;
+      s_mux_new            = SMUX_ZERO;
+      dec_word_index       = 1'b0;
+      inc_word_index       = 1'b0;
+      reset_word_index_lsw = 1'b0;
+      reset_word_index_msw = 1'b0;
+      first_iteration_new  = 1'b0;
+      first_iteration_we   = 1'b0;
+      tmp_opa_addr         = word_index_reg;
+      tmp_result_we        = 1'b0;
+      montprod_ctrl_new    = CTRL_IDLE;
+      montprod_ctrl_we     = 1'b0;
 
-      reset_word_index_LSW = 1'b0;
-      reset_word_index_MSW = 1'b0;
 
       case (montprod_ctrl_reg)
         CTRL_IDLE:
           begin
             if (calculate)
               begin
-                ready_new = 1'b0;
-                ready_we  = 1'b1;
-                montprod_ctrl_new = CTRL_INIT_S;
-                montprod_ctrl_we = 1'b1;
-                reset_word_index_LSW = 1'b1;
-              end
-            else
-              begin
-                ready_new = 1'b1;
-                ready_we  = 1'b1;
+                first_iteration_new  = 1'b1;
+                first_iteration_we   = 1'b1;
+                ready_new            = 1'b0;
+                ready_we             = 1'b1;
+                reset_word_index_lsw = 1'b1;
+                loop_ctr_set         = 1'b1;
+                montprod_ctrl_new    = CTRL_LOOP_ITER;
+                montprod_ctrl_we     = 1'b1;
               end
           end
 
-        CTRL_INIT_S:
-          begin
-            if (word_index == 8'h0)
-              begin
-                 montprod_ctrl_new = CTRL_LOOP_INIT;
-                 montprod_ctrl_we = 1'b1;
-              end
-          end
-
-
-        CTRL_LOOP_INIT:
-          begin
-            montprod_ctrl_new = CTRL_LOOP_ITER;
-            montprod_ctrl_we  = 1'b1;
-          end
-
         //calculate q = (s - b * A) & 1;.
         // Also abort loop if done.
         CTRL_LOOP_ITER:
           begin
-            reset_word_index_LSW = 1'b1;
-            montprod_ctrl_new = CTRL_LOOP_BQ;
-            montprod_ctrl_we  = 1'b1;
+            tmp_opa_addr         = length_m1;
+            b_bit_index_we       = 1'b1;
+            montprod_ctrl_new    = CTRL_LOOP_BQ;
+            montprod_ctrl_we     = 1'b1;
           end
 
         CTRL_LOOP_BQ:
           begin
-            reset_word_index_LSW = 1'b1;
-            montprod_ctrl_new = CTRL_L_CALC_SM;
-            montprod_ctrl_we  = 1'b1;
+            reset_word_index_lsw = 1'b1;
+            bq_we                = 1'b1;
+            montprod_ctrl_new    = CTRL_CALC_ADD;
+            montprod_ctrl_we     = 1'b1;
           end
 
-        CTRL_L_CALC_SM:
+        CTRL_CALC_ADD:
           begin
-            if (word_index == 8'h0)
+            s_mux_new      = SMUX_ADD;
+
+            if (word_index_reg == 0)
               begin
-                reset_word_index_LSW  = 1'b1;
-                montprod_ctrl_we  = 1'b1;
-                montprod_ctrl_new = CTRL_L_STALLPIPE_SM;
+                reset_word_index_lsw = 1'b1;
+                montprod_ctrl_new    = CTRL_STALLPIPE_ADD;
+                montprod_ctrl_we     = 1'b1;
               end
-          end
-
-        CTRL_L_STALLPIPE_SM:
-          begin
-            montprod_ctrl_new = CTRL_L_CALC_SA;
-            montprod_ctrl_we = 1'b1;
-            reset_word_index_LSW = 1'b1;
-          end
-
-        CTRL_L_CALC_SA:
-          begin
-            if (word_index == 8'h0)
+            else
               begin
-                reset_word_index_LSW  = 1'b1;
-                montprod_ctrl_new = CTRL_L_STALLPIPE_SA;
-                montprod_ctrl_we = 1'b1;
+                dec_word_index = 1'b1;
               end
           end
 
-        CTRL_L_STALLPIPE_SA:
+        CTRL_STALLPIPE_ADD:
           begin
-            montprod_ctrl_new = CTRL_L_CALC_SDIV2;
-            montprod_ctrl_we = 1'b1;
-            reset_word_index_MSW = 1'b1;
+            first_iteration_new  = 1'b0;
+            first_iteration_we   = 1'b1;
+            reset_word_index_msw = 1'b1;
+            montprod_ctrl_new    = CTRL_CALC_SDIV2;
+            montprod_ctrl_we     = 1'b1;
           end
 
-        CTRL_L_CALC_SDIV2:
+        CTRL_CALC_SDIV2:
           begin
-            if (word_index == length_m1)
+            s_mux_new      = SMUX_SHR;
+
+            if (word_index_reg == length_m1)
               begin
-                montprod_ctrl_new = CTRL_L_STALLPIPE_D2;
-                montprod_ctrl_we = 1'b1;
-                //reset_word_index = 1'b1;
+                montprod_ctrl_new = CTRL_STALLPIPE_SDIV2;
+                montprod_ctrl_we  = 1'b1;
               end
+            else
+              inc_word_index = 1'b1;
           end
 
-        CTRL_L_STALLPIPE_D2:
+        CTRL_STALLPIPE_SDIV2:
           begin
-            montprod_ctrl_new = CTRL_LOOP_ITER; //loop
-            montprod_ctrl_we = 1'b1;
-            reset_word_index_LSW = 1'b1;
-            if (loop_counter == 0)
+            loop_ctr_dec         = 1'b1;
+            montprod_ctrl_new    = CTRL_LOOP_ITER;
+            montprod_ctrl_we     = 1'b1;
+            reset_word_index_lsw = 1'b1;
+
+            if (loop_ctr_reg == 0)
               begin
                 montprod_ctrl_new = CTRL_L_STALLPIPE_ES;
-                montprod_ctrl_we = 1'b1;
+                montprod_ctrl_we  = 1'b1;
               end
           end
 
         CTRL_L_STALLPIPE_ES:
           begin
             montprod_ctrl_new = CTRL_EMIT_S;
-            montprod_ctrl_we = 1'b1;
-            //reset_word_index_LSW = 1'b1;
+            montprod_ctrl_we  = 1'b1;
           end
 
         CTRL_EMIT_S:
            begin
-             if (DEBUG)
-               $display("EMIT_S word_index: %d", word_index);
-             if (word_index_prev == 8'h0)
+             dec_word_index = 1'b1;
+             tmp_result_we  = 1'b1;
+
+             if (word_index_prev_reg == 0)
                begin
-                 montprod_ctrl_new = CTRL_DONE;
+                 ready_new         = 1'b1;
+                 ready_we          = 1'b1;
+                 montprod_ctrl_new = CTRL_IDLE;
                  montprod_ctrl_we  = 1'b1;
                end
            end
 
-        CTRL_DONE:
-          begin
-            ready_new         = 1'b1;
-            ready_we          = 1'b1;
-            montprod_ctrl_new = CTRL_IDLE;
-            montprod_ctrl_we  = 1'b1;
-          end
-
         default:
           begin
           end
diff --git a/src/rtl/residue.v b/src/rtl/residue.v
index 3fa1666..f3d114c 100644
--- a/src/rtl/residue.v
+++ b/src/rtl/residue.v
@@ -45,119 +45,109 @@
 //
 //======================================================================
 
-module residue(
-  input wire clk,
-  input wire reset_n,
+module residue #(parameter OPW = 32, parameter ADW = 8)
+              (
+               input wire                  clk,
+               input wire                  reset_n,
 
-  input wire  calculate,
-  output wire ready,
+               input wire                  calculate,
+               output wire                 ready,
 
-  input wire  [14 : 0] nn, //MAX(2*N)=8192*2 (14 bit)
-  input wire  [07 : 0] length,
+               input wire  [14 : 0]        nn, //MAX(2*N)=8192*2 (14 bit)
+               input wire  [(ADW - 1) : 0] length,
 
-  output wire [07 : 0] opa_rd_addr,
-  input wire  [31 : 0] opa_rd_data,
-  output wire [07 : 0] opa_wr_addr,
-  output wire [31 : 0] opa_wr_data,
-  output wire          opa_wr_we,
+               output wire [(ADW - 1) : 0] opa_rd_addr,
+               input wire  [(OPW - 1) : 0] opa_rd_data,
+               output wire [(ADW - 1) : 0] opa_wr_addr,
+               output wire [(OPW - 1) : 0] opa_wr_data,
+               output wire                 opa_wr_we,
 
-  output wire [07 : 0] opm_addr,
-  input wire  [31 : 0] opm_data
+               output wire [(ADW - 1) : 0] opm_addr,
+               input wire  [(OPW - 1) : 0] opm_data
+              );
 
-);
-
-//----------------------------------------------------------------
-// Internal constant and parameter definitions.
-//----------------------------------------------------------------
+  //----------------------------------------------------------------
+  // Internal constant and parameter definitions.
+  //----------------------------------------------------------------
+  localparam CTRL_IDLE          = 4'h0;
+  localparam CTRL_INIT          = 4'h1;
+  localparam CTRL_INIT_STALL    = 4'h2;
+  localparam CTRL_SHL           = 4'h3;
+  localparam CTRL_SHL_STALL     = 4'h4;
+  localparam CTRL_COMPARE       = 4'h5;
+  localparam CTRL_COMPARE_STALL = 4'h6;
+  localparam CTRL_SUB           = 4'h7;
+  localparam CTRL_SUB_STALL     = 4'h8;
+  localparam CTRL_LOOP          = 4'h9;
 
 
-// m_residue_2_2N_array( N, M, Nr)
-//   Nr = 00...01 ; Nr = 1 == 2**(2N-2N)
-//   for (int i = 0; i < 2 * N; i++)
-//     Nr = Nr shift left 1
-//     if (Nr less than M) continue;
-//     Nr = Nr - M
-// return Nr
-//
-localparam CTRL_IDLE          = 4'h0;
-localparam CTRL_INIT          = 4'h1; // Nr = 00...01 ; Nr = 1 == 2**(2N-2N)
-localparam CTRL_INIT_STALL    = 4'h2;
-localparam CTRL_SHL           = 4'h3; // Nr = Nr shift left 1
-localparam CTRL_SHL_STALL     = 4'h4;
-localparam CTRL_COMPARE       = 4'h5; //if (Nr less than M) continue;
-localparam CTRL_COMPARE_STALL = 4'h6;
-localparam CTRL_SUB           = 4'h7; //Nr = Nr - M
-localparam CTRL_SUB_STALL     = 4'h8;
-localparam CTRL_LOOP          = 4'h9; //for (int i = 0; i < 2 * N; i++)
-
-//----------------------------------------------------------------
-// Registers including update variables and write enable.
-//----------------------------------------------------------------
-
-reg [07 : 0] opa_rd_addr_reg;
-reg [07 : 0] opa_wr_addr_reg;
-reg [31 : 0] opa_wr_data_reg;
-reg          opa_wr_we_reg;
-reg [07 : 0] opm_addr_reg;
-reg          ready_reg;
-reg          ready_new;
-reg          ready_we;
-reg [03 : 0] residue_ctrl_reg;
-reg [03 : 0] residue_ctrl_new;
-reg          residue_ctrl_we;
-reg          reset_word_index;
-reg          reset_n_counter;
-reg [14 : 0] loop_counter_1_to_nn_reg; //for i = 1 to nn (2*N)
-reg [14 : 0] loop_counter_1_to_nn_new;
-reg          loop_counter_1_to_nn_we;
-reg [14 : 0] nn_reg;
-reg          nn_we;
-reg [07 : 0] length_m1_reg;
-reg [07 : 0] length_m1_new;
-reg          length_m1_we;
-reg [07 : 0] word_index_reg;
-reg [07 : 0] word_index_new;
-reg          word_index_we;
-
-reg  [31 : 0] one_data;
-wire [31 : 0] sub_data;
-wire [31 : 0] shl_data;
-reg           sub_carry_in_new;
-reg           sub_carry_in_reg;
-wire          sub_carry_out;
-reg           shl_carry_in_new;
-reg           shl_carry_in_reg;
-wire          shl_carry_out;
-
-//----------------------------------------------------------------
-// Concurrent connectivity for ports etc.
-//----------------------------------------------------------------
-assign opa_rd_addr = opa_rd_addr_reg;
-assign opa_wr_addr = opa_wr_addr_reg;
-assign opa_wr_data = opa_wr_data_reg;
-assign opa_wr_we   = opa_wr_we_reg;
-assign opm_addr    = opm_addr_reg;
-assign ready       = ready_reg;
+  //----------------------------------------------------------------
+  // Registers including update variables and write enable.
+  //----------------------------------------------------------------
+  reg [(ADW - 1) : 0]  opa_rd_addr_reg;
+  reg [(ADW - 1) : 0]  opa_wr_addr_reg;
+  reg [(OPW - 1) : 0]  opa_wr_data_reg;
+  reg                  opa_wr_we_reg;
+  reg [(ADW - 1) : 0]  opm_addr_reg;
+  reg                  ready_reg;
+  reg                  ready_new;
+  reg                  ready_we;
+  reg [03 : 0]         residue_ctrl_reg;
+  reg [03 : 0]         residue_ctrl_new;
+  reg                  residue_ctrl_we;
+  reg                  reset_word_index;
+  reg                  reset_n_counter;
+  reg [14 : 0]         loop_counter_1_to_nn_reg; //for i = 1 to nn (2*N)
+  reg [14 : 0]         loop_counter_1_to_nn_new;
+  reg                  loop_counter_1_to_nn_we;
+  reg [14 : 0]         nn_reg;
+  reg                  nn_we;
+  reg [(ADW - 1) : 0]  length_m1_reg;
+  reg [(ADW - 1) : 0]  length_m1_new;
+  reg                  length_m1_we;
+  reg [(ADW - 1) : 0]  word_index_reg;
+  reg [(ADW - 1) : 0]  word_index_new;
+  reg                  word_index_we;
+
+  reg  [(OPW - 1) : 0] one_data;
+  wire [(OPW - 1) : 0] sub_data;
+  wire [(OPW - 1) : 0] shl_data;
+  reg                  sub_carry_in_new;
+  reg                  sub_carry_in_reg;
+  wire                 sub_carry_out;
+  reg                  shl_carry_in_new;
+  reg                  shl_carry_in_reg;
+  wire                 shl_carry_out;
 
 
   //----------------------------------------------------------------
-  // Instantions
+  // Concurrent connectivity for ports etc.
   //----------------------------------------------------------------
-  adder32 subcmp(
-    .a(opa_rd_data),
-    .b( ~ opm_data),
-    .carry_in(sub_carry_in_reg),
-    .sum(sub_data),
-    .carry_out(sub_carry_out)
-  );
+  assign opa_rd_addr = opa_rd_addr_reg;
+  assign opa_wr_addr = opa_wr_addr_reg;
+  assign opa_wr_data = opa_wr_data_reg;
+  assign opa_wr_we   = opa_wr_we_reg;
+  assign opm_addr    = opm_addr_reg;
+  assign ready       = ready_reg;
 
-  shl32 shl(
-    .a(opa_rd_data),
-    .carry_in(shl_carry_in_reg),
-    .amul2(shl_data),
-    .carry_out(shl_carry_out)
-  );
 
+  //----------------------------------------------------------------
+  // Instantions
+  //----------------------------------------------------------------
+  adder #(.OPW(OPW)) add_inst(
+                              .a(opa_rd_data),
+                              .b( ~ opm_data),
+                              .carry_in(sub_carry_in_reg),
+                              .sum(sub_data),
+                              .carry_out(sub_carry_out)
+                             );
+
+  shl #(.OPW(OPW)) shl_inst(
+                            .a(opa_rd_data),
+                            .carry_in(shl_carry_in_reg),
+                            .amul2(shl_data),
+                            .carry_out(shl_carry_out)
+                           );
 
 
   //----------------------------------------------------------------
@@ -167,14 +157,14 @@ assign ready       = ready_reg;
     begin
       if (!reset_n)
         begin
-          residue_ctrl_reg <= CTRL_IDLE;
-          word_index_reg   <= 8'h0;
-          length_m1_reg    <= 8'h0;
-          nn_reg           <= 15'h0;
+          residue_ctrl_reg         <= CTRL_IDLE;
+          word_index_reg           <= {ADW{1'b1}};
+          length_m1_reg            <= {ADW{1'b1}};
+          nn_reg                   <= 15'h0;
           loop_counter_1_to_nn_reg <= 15'h0;
-          ready_reg        <= 1'b1;
-          sub_carry_in_reg <= 1'b0;
-          shl_carry_in_reg <= 1'b0;
+          ready_reg                <= 1'b1;
+          sub_carry_in_reg         <= 1'b0;
+          shl_carry_in_reg         <= 1'b0;
         end
       else
         begin
@@ -229,22 +219,24 @@ assign ready       = ready_reg;
         loop_counter_1_to_nn_we  = 1'b1;
     end
 
+
   //----------------------------------------------------------------
   // implements looping over words in a multiword operation
   //----------------------------------------------------------------
   always @*
     begin : word_index_process
-      word_index_new = word_index_reg - 8'h1;
+      word_index_new = word_index_reg - 1'b1;
       word_index_we  = 1'b1;
 
       if (reset_word_index)
         word_index_new = length_m1_reg;
 
       if (residue_ctrl_reg == CTRL_IDLE)
-        word_index_new = length_m1_new; //reduce a pipeline stage with early read
-
+        //reduce a pipeline stage with early read
+        word_index_new = length_m1_new;
     end
 
+
   //----------------------------------------------------------------
   // writer process. implements:
   //   Nr = 00...01 ; Nr = 1 == 2**(2N-2N)
@@ -299,6 +291,7 @@ assign ready       = ready_reg;
       opm_addr_reg    = word_index_new;
     end
 
+
   //----------------------------------------------------------------
   // carry process. "Ripple carry awesomeness!"
   //----------------------------------------------------------------
@@ -321,6 +314,7 @@ assign ready       = ready_reg;
       endcase
     end
 
+
   //----------------------------------------------------------------
   // Nr = 00...01 ; Nr = 1 == 2**(2N-2N)
   //----------------------------------------------------------------
@@ -329,128 +323,130 @@ assign ready       = ready_reg;
       one_data = 32'h0;
       if (residue_ctrl_reg == CTRL_INIT)
         if (word_index_reg == length_m1_reg)
-          one_data = 32'h1;
+          one_data = {{(OPW - 1){1'b0}}, 1'b1};
     end
 
-//----------------------------------------------------------------
-// residue_ctrl
-//
-// Control FSM for residue
-//----------------------------------------------------------------
-always @*
-  begin : residue_ctrl
-    ready_new = 1'b0;
-    ready_we  = 1'b0;
-
-    residue_ctrl_new = CTRL_IDLE;
-    residue_ctrl_we  = 1'b0;
 
-    reset_word_index = 1'b0;
-    reset_n_counter  = 1'b0;
+  //----------------------------------------------------------------
+  // residue_ctrl
+  //
+  // Control FSM for residue
+  //----------------------------------------------------------------
+  always @*
+    begin : residue_ctrl
+      ready_new        = 1'b0;
+      ready_we         = 1'b0;
+      reset_word_index = 1'b0;
+      reset_n_counter  = 1'b0;
+      length_m1_new    = length - 1'b1;
+      length_m1_we     = 1'b0;
+      nn_we            = 1'b0;
+      residue_ctrl_new = CTRL_IDLE;
+      residue_ctrl_we  = 1'b0;
 
-    length_m1_new  = length - 8'h1;
-    length_m1_we   = 1'b0;
+      case (residue_ctrl_reg)
+        CTRL_IDLE:
+          if (calculate)
+            begin
+              ready_new        = 1'b0;
+              ready_we         = 1'b1;
+              reset_word_index = 1'b1;
+              length_m1_we     = 1'b1;
+              nn_we            = 1'b1;
+              residue_ctrl_new = CTRL_INIT;
+              residue_ctrl_we  = 1'b1;
+            end
 
-    nn_we = 1'b0;
+        // Nr = 00...01 ; Nr = 1 == 2**(2N-2N)
+        CTRL_INIT:
+          if (word_index_reg == 0)
+            begin
+              residue_ctrl_new = CTRL_INIT_STALL;
+              residue_ctrl_we  = 1'b1;
+            end
 
-    case (residue_ctrl_reg)
-      CTRL_IDLE:
-        if (calculate)
+        CTRL_INIT_STALL:
           begin
-            ready_new = 1'b0;
-            ready_we  = 1'b1;
-            residue_ctrl_new = CTRL_INIT;
-            residue_ctrl_we  = 1'b1;
             reset_word_index = 1'b1;
-            length_m1_we     = 1'b1;
-            nn_we            = 1'b1;
+            reset_n_counter  = 1'b1;
+            residue_ctrl_new = CTRL_SHL;
+            residue_ctrl_we  = 1'b1;
           end
 
-      CTRL_INIT:
-        if (word_index_reg == 8'h0)
+        // Nr = Nr shift left 1
+        CTRL_SHL:
           begin
-            residue_ctrl_new = CTRL_INIT_STALL;
-            residue_ctrl_we  = 1'b1;
+            if (word_index_reg == 0)
+              begin
+                residue_ctrl_new = CTRL_SHL_STALL;
+                residue_ctrl_we  = 1'b1;
+              end
           end
 
-      CTRL_INIT_STALL:
-        begin
-          reset_word_index = 1'b1;
-          reset_n_counter  = 1'b1;
-          residue_ctrl_new = CTRL_SHL;
-          residue_ctrl_we  = 1'b1;
-        end
-
-      CTRL_SHL:
-        begin
-        if (word_index_reg == 8'h0)
+        CTRL_SHL_STALL:
           begin
-            residue_ctrl_new = CTRL_SHL_STALL;
+            reset_word_index = 1'b1;
+            residue_ctrl_new = CTRL_COMPARE;
             residue_ctrl_we  = 1'b1;
           end
-        end
 
-      CTRL_SHL_STALL:
-        begin
-          reset_word_index = 1'b1;
-          residue_ctrl_new = CTRL_COMPARE;
-          residue_ctrl_we  = 1'b1;
-        end
+        //if (Nr less than M) continue
+        CTRL_COMPARE:
+          if (word_index_reg == 0)
+            begin
+              residue_ctrl_new = CTRL_COMPARE_STALL;
+              residue_ctrl_we  = 1'b1;
+            end
 
-      CTRL_COMPARE:
-        if (word_index_reg == 8'h0)
+        CTRL_COMPARE_STALL:
           begin
-            residue_ctrl_new = CTRL_COMPARE_STALL;
+            reset_word_index = 1'b1;
             residue_ctrl_we  = 1'b1;
+            if (sub_carry_in_reg == 1'b1)
+              //TODO: Bug! detect CF to detect less than, but no detect ZF to detect equal to.
+              residue_ctrl_new = CTRL_SUB;
+            else
+              residue_ctrl_new = CTRL_LOOP;
           end
 
-      CTRL_COMPARE_STALL:
-        begin
-          reset_word_index = 1'b1;
-          residue_ctrl_we  = 1'b1;
-          if (sub_carry_in_reg == 1'b1)
-            //TODO: Bug! detect CF to detect less than, but no detect ZF to detect equal to.
-            residue_ctrl_new = CTRL_SUB;
-          else
-            residue_ctrl_new = CTRL_LOOP;
-        end
+        //Nr = Nr - M
+        CTRL_SUB:
+          if (word_index_reg == 0)
+            begin
+              residue_ctrl_new = CTRL_SUB_STALL;
+              residue_ctrl_we  = 1'b1;
+            end
 
-      CTRL_SUB:
-        if (word_index_reg == 8'h0)
+        CTRL_SUB_STALL:
           begin
-            residue_ctrl_new = CTRL_SUB_STALL;
+            residue_ctrl_new = CTRL_LOOP;
             residue_ctrl_we  = 1'b1;
           end
 
-      CTRL_SUB_STALL:
-        begin
-          residue_ctrl_new = CTRL_LOOP;
-          residue_ctrl_we  = 1'b1;
-        end
-
-      CTRL_LOOP:
-        begin
-          if (loop_counter_1_to_nn_reg == nn_reg)
-            begin
-              ready_new = 1'b1;
-              ready_we  = 1'b1;
-              residue_ctrl_new = CTRL_IDLE;
-              residue_ctrl_we  = 1'b1;
-            end
-          else
-            begin
-              reset_word_index = 1'b1;
-              residue_ctrl_new = CTRL_SHL;
-              residue_ctrl_we  = 1'b1;
-            end
-        end
+        //for (int i = 0; i < 2 * N; i++)
+        CTRL_LOOP:
+          begin
+            if (loop_counter_1_to_nn_reg == nn_reg)
+              begin
+                ready_new = 1'b1;
+                ready_we  = 1'b1;
+                residue_ctrl_new = CTRL_IDLE;
+                residue_ctrl_we  = 1'b1;
+              end
+            else
+              begin
+                reset_word_index = 1'b1;
+                residue_ctrl_new = CTRL_SHL;
+                residue_ctrl_we  = 1'b1;
+              end
+          end
 
-      default:
-        begin
-        end
+        default:
+          begin
+          end
 
-    endcase
-  end
+      endcase
+    end
 
 endmodule // residue
 
diff --git a/src/rtl/shl.v b/src/rtl/shl.v
new file mode 100644
index 0000000..bed83e8
--- /dev/null
+++ b/src/rtl/shl.v
@@ -0,0 +1,56 @@
+//======================================================================
+//
+// shl.v
+// -----
+// One bit left shift of words with carry in and carry out. Used in
+// the residue module of the modexp core.
+//
+//
+// Author: Peter Magnusson, Joachim Strömbergson
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module shl #(parameter OPW = 32)
+          (
+           input  wire [(OPW - 1) : 0] a,
+           input  wire                 carry_in,
+
+           output wire [(OPW - 1) : 0] amul2,
+           output wire                 carry_out
+          );
+
+   assign amul2     = {a[(OPW - 2) : 0], carry_in};
+   assign carry_out = a[(OPW - 1)];
+
+endmodule // shl
+
+//======================================================================
+// EOF shl.v
+//======================================================================
diff --git a/src/rtl/shr.v b/src/rtl/shr.v
new file mode 100644
index 0000000..40ef111
--- /dev/null
+++ b/src/rtl/shr.v
@@ -0,0 +1,56 @@
+//======================================================================
+//
+// shr32.v
+// -------
+// One bit right shift with carry in and carry out.
+// Used in the montprod module of the modexp core.
+//
+//
+// Author: Peter Magnusson, Joachim Strömbergson
+// Copyright (c) 2015, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module shr #(parameter OPW = 32)
+          (
+           input wire  [(OPW - 1) : 0] a,
+           input wire                  carry_in,
+
+           output wire [(OPW - 1) : 0] adiv2,
+           output wire                 carry_out
+          );
+
+  assign adiv2      = {carry_in, a[(OPW - 1) : 1]};
+  assign carry_out = a[0];
+
+endmodule // shr
+
+//======================================================================
+// EOF shr.v
+//======================================================================
diff --git a/toolruns/Makefile b/toolruns/Makefile
index 268af7f..bc84cd6 100755
--- a/toolruns/Makefile
+++ b/toolruns/Makefile
@@ -44,7 +44,7 @@ LINT = verilator
 LINTFLAGS = --lint-only -Wall
 
 # Sources.
-COMMON_SRC = ../src/rtl/adder32.v ../src/rtl/shl32.v ../src/rtl/shr32.v \
+COMMON_SRC = ../src/rtl/adder.v ../src/rtl/adder32.v ../src/rtl/shl.v ../src/rtl/shl32.v ../src/rtl/shr.v \
 	../src/rtl/blockmem1r1w.v ../src/rtl/blockmem2r1w.v \
 	../src/rtl/blockmem2r1wptr.v  ../src/rtl/blockmem2rptr1w.v
 



More information about the Commits mailing list