[Cryptech-Commits] [core/cipher/chacha] branch cleanup updated: (1) Cleanup of top an core code with no functional changes. The code is now much more compact. (2) Fixed how the QR modules are used in parallel to actually work in parallel. This increases performance. (3) Changed registers into arrays and cleaned up how operands and data are accessed. This decreased total design size.

git at cryptech.is git at cryptech.is
Wed Dec 28 09:56:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

joachim at secworks.se pushed a commit to branch cleanup
in repository core/cipher/chacha.

The following commit(s) were added to refs/heads/cleanup by this push:
     new f4731e8  (1) Cleanup of top an core code with no functional changes. The code is now much more compact. (2) Fixed how the QR modules are used in parallel to actually work in parallel. This increases performance. (3) Changed registers into arrays and cleaned up how operands and data are accessed. This decreased total design size.
f4731e8 is described below

commit f4731e83511a3b35f05e4a6222ba27af5920fcd8
Author: Joachim Strömbergson <joachim at secworks.se>
AuthorDate: Wed Dec 28 10:55:30 2016 +0100

    (1) Cleanup of top an core code with no functional changes. The code is now much more compact. (2) Fixed how the QR modules are used in parallel to actually work in parallel. This increases performance. (3) Changed registers into arrays and cleaned up how operands and data are accessed. This decreased total design size.
---
 src/rtl/chacha.v        |  793 ++++----------------------------
 src/rtl/chacha_core.v   | 1176 +++++++++++------------------------------------
 src/tb/tb_chacha.v      |  258 ++++++-----
 src/tb/tb_chacha_core.v |  137 +++---
 4 files changed, 564 insertions(+), 1800 deletions(-)

diff --git a/src/rtl/chacha.v b/src/rtl/chacha.v
index 497f51d..bc891f7 100644
--- a/src/rtl/chacha.v
+++ b/src/rtl/chacha.v
@@ -38,15 +38,10 @@
 //======================================================================
 
 module chacha(
-              // Clock and reset.
               input wire           clk,
               input wire           reset_n,
-
-              // Control.
               input wire           cs,
               input wire           we,
-
-              // Data ports.
               input wire  [7 : 0]  address,
               input wire  [31 : 0] write_data,
               output wire [31 : 0] read_data,
@@ -74,156 +69,51 @@ module chacha(
   localparam ROUNDS_LOW_BIT   = 0;
 
   localparam ADDR_KEY0        = 8'h10;
-  localparam ADDR_KEY1        = 8'h11;
-  localparam ADDR_KEY2        = 8'h12;
-  localparam ADDR_KEY3        = 8'h13;
-  localparam ADDR_KEY4        = 8'h14;
-  localparam ADDR_KEY5        = 8'h15;
-  localparam ADDR_KEY6        = 8'h16;
   localparam ADDR_KEY7        = 8'h17;
 
   localparam ADDR_IV0         = 8'h20;
   localparam ADDR_IV1         = 8'h21;
 
   localparam ADDR_DATA_IN0    = 8'h40;
-  localparam ADDR_DATA_IN1    = 8'h41;
-  localparam ADDR_DATA_IN2    = 8'h42;
-  localparam ADDR_DATA_IN3    = 8'h43;
-  localparam ADDR_DATA_IN4    = 8'h44;
-  localparam ADDR_DATA_IN5    = 8'h45;
-  localparam ADDR_DATA_IN6    = 8'h46;
-  localparam ADDR_DATA_IN7    = 8'h47;
-  localparam ADDR_DATA_IN8    = 8'h48;
-  localparam ADDR_DATA_IN9    = 8'h49;
-  localparam ADDR_DATA_IN10   = 8'h4a;
-  localparam ADDR_DATA_IN11   = 8'h4b;
-  localparam ADDR_DATA_IN12   = 8'h4c;
-  localparam ADDR_DATA_IN13   = 8'h4d;
-  localparam ADDR_DATA_IN14   = 8'h4e;
   localparam ADDR_DATA_IN15   = 8'h4f;
 
   localparam ADDR_DATA_OUT0   = 8'h80;
-  localparam ADDR_DATA_OUT1   = 8'h81;
-  localparam ADDR_DATA_OUT2   = 8'h82;
-  localparam ADDR_DATA_OUT3   = 8'h83;
-  localparam ADDR_DATA_OUT4   = 8'h84;
-  localparam ADDR_DATA_OUT5   = 8'h85;
-  localparam ADDR_DATA_OUT6   = 8'h86;
-  localparam ADDR_DATA_OUT7   = 8'h87;
-  localparam ADDR_DATA_OUT8   = 8'h88;
-  localparam ADDR_DATA_OUT9   = 8'h89;
-  localparam ADDR_DATA_OUT10  = 8'h8a;
-  localparam ADDR_DATA_OUT11  = 8'h8b;
-  localparam ADDR_DATA_OUT12  = 8'h8c;
-  localparam ADDR_DATA_OUT13  = 8'h8d;
-  localparam ADDR_DATA_OUT14  = 8'h8e;
   localparam ADDR_DATA_OUT15  = 8'h8f;
 
-  localparam DEFAULT_CTR_INIT = 64'h0000000000000000;
+  localparam CORE_NAME0       = 32'h63686163; // "chac"
+  localparam CORE_NAME1       = 32'h68612020; // "ha  "
+  localparam CORE_VERSION     = 32'h302e3831; // "0.81"
 
-  localparam CORE_NAME0          = 32'h63686163; // "chac"
-  localparam CORE_NAME1          = 32'h68612020; // "ha  "
-  localparam CORE_VERSION        = 32'h302e3830; // "0.80"
+  localparam DEFAULT_CTR_INIT = 64'h0;
 
 
   //----------------------------------------------------------------
   // Registers including update variables and write enable.
   //----------------------------------------------------------------
-  reg init_reg;
-  reg next_reg;
-  reg ctrl_we;
-
-  reg ready_reg;
-
-  reg keylen_reg;
-  reg keylen_we;
-
-  reg [4 : 0] rounds_reg;
-  reg         rounds_we;
-
-  reg data_out_valid_reg;
-
-  reg [31 : 0] key0_reg;
-  reg          key0_we;
-  reg [31 : 0] key1_reg;
-  reg          key1_we;
-  reg [31 : 0] key2_reg;
-  reg          key2_we;
-  reg [31 : 0] key3_reg;
-  reg          key3_we;
-  reg [31 : 0] key4_reg;
-  reg          key4_we;
-  reg [31 : 0] key5_reg;
-  reg          key5_we;
-  reg [31 : 0] key6_reg;
-  reg          key6_we;
-  reg [31 : 0] key7_reg;
-  reg          key7_we;
-
-  reg [31 : 0] iv0_reg;
-  reg          iv0_we;
-  reg [31 : 0] iv1_reg;
-  reg          iv1_we;
-
-  reg [31 : 0] data_in0_reg;
-  reg          data_in0_we;
-  reg [31 : 0] data_in1_reg;
-  reg          data_in1_we;
-  reg [31 : 0] data_in2_reg;
-  reg          data_in2_we;
-  reg [31 : 0] data_in3_reg;
-  reg          data_in3_we;
-  reg [31 : 0] data_in4_reg;
-  reg          data_in4_we;
-  reg [31 : 0] data_in5_reg;
-  reg          data_in5_we;
-  reg [31 : 0] data_in6_reg;
-  reg          data_in6_we;
-  reg [31 : 0] data_in7_reg;
-  reg          data_in7_we;
-  reg [31 : 0] data_in8_reg;
-  reg          data_in8_we;
-  reg [31 : 0] data_in9_reg;
-  reg          data_in9_we;
-  reg [31 : 0] data_in10_reg;
-  reg          data_in10_we;
-  reg [31 : 0] data_in11_reg;
-  reg          data_in11_we;
-  reg [31 : 0] data_in12_reg;
-  reg          data_in12_we;
-  reg [31 : 0] data_in13_reg;
-  reg          data_in13_we;
-  reg [31 : 0] data_in14_reg;
-  reg          data_in14_we;
-  reg [31 : 0] data_in15_reg;
-  reg          data_in15_we;
-
-  reg [31 : 0] data_out0_reg;
-  reg [31 : 0] data_out1_reg;
-  reg [31 : 0] data_out2_reg;
-  reg [31 : 0] data_out3_reg;
-  reg [31 : 0] data_out4_reg;
-  reg [31 : 0] data_out5_reg;
-  reg [31 : 0] data_out6_reg;
-  reg [31 : 0] data_out7_reg;
-  reg [31 : 0] data_out8_reg;
-  reg [31 : 0] data_out9_reg;
-  reg [31 : 0] data_out10_reg;
-  reg [31 : 0] data_out11_reg;
-  reg [31 : 0] data_out12_reg;
-  reg [31 : 0] data_out13_reg;
-  reg [31 : 0] data_out14_reg;
-  reg [31 : 0] data_out15_reg;
+  reg          init_reg;
+  reg          next_reg;
+  reg          ctrl_we;
+
+  reg          keylen_reg;
+  reg          keylen_we;
+
+  reg [4 : 0]  rounds_reg;
+  reg          rounds_we;
+
+  reg [31 : 0] key_reg [0 : 7];
+  reg          key_we;
+
+  reg [31 : 0] iv_reg[0 : 1];
+  reg          iv_we;
+
+  reg [31 : 0] data_in_reg [0 : 15];
+  reg          data_in_we;
 
 
   //----------------------------------------------------------------
   // Wires.
   //----------------------------------------------------------------
-  wire           core_init;
-  wire           core_next;
   wire [255 : 0] core_key;
-  wire           core_keylen;
-  wire [4 : 0]   core_rounds;
   wire [63 : 0]  core_iv;
   wire           core_ready;
   wire [511 : 0] core_data_in;
@@ -231,32 +121,24 @@ module chacha(
   wire           core_data_out_valid;
 
   reg [31 : 0]   tmp_read_data;
-  reg            tmp_error;
 
 
   //----------------------------------------------------------------
   // Concurrent connectivity for ports etc.
   //----------------------------------------------------------------
-  assign core_init    = init_reg;
-
-  assign core_next    = next_reg;
-
-  assign core_keylen  = keylen_reg;
+  assign core_key     = {key_reg[0], key_reg[1], key_reg[2], key_reg[3],
+                         key_reg[4], key_reg[5], key_reg[6], key_reg[7]};
 
-  assign core_rounds  = rounds_reg;
+  assign core_iv      = {iv_reg[0], iv_reg[1]};
 
-  assign core_key     = {key0_reg, key1_reg, key2_reg, key3_reg,
-                         key4_reg, key5_reg, key6_reg, key7_reg};
+  assign core_data_in = {data_in_reg[00], data_in_reg[01], data_in_reg[02], data_in_reg[03],
+                         data_in_reg[04], data_in_reg[05], data_in_reg[06], data_in_reg[07],
+                         data_in_reg[08], data_in_reg[09], data_in_reg[10], data_in_reg[11],
+                         data_in_reg[12], data_in_reg[13], data_in_reg[14], data_in_reg[15]};
 
-  assign core_iv      = {iv0_reg, iv1_reg};
+  assign read_data     = tmp_read_data;
 
-  assign core_data_in = {data_in0_reg, data_in1_reg, data_in2_reg, data_in3_reg,
-                         data_in4_reg, data_in5_reg, data_in6_reg, data_in7_reg,
-                         data_in8_reg, data_in9_reg, data_in10_reg, data_in11_reg,
-                         data_in12_reg, data_in13_reg, data_in14_reg, data_in15_reg};
-
-  assign read_data = tmp_read_data;
-  assign error     = tmp_error;
+  assign error         = 1'b0;
 
 
   //----------------------------------------------------------------
@@ -265,20 +147,15 @@ module chacha(
   chacha_core core (
                     .clk(clk),
                     .reset_n(reset_n),
-
-                    .init(core_init),
-                    .next(core_next),
-
+                    .init(init_reg),
+                    .next(next_reg),
                     .key(core_key),
-                    .keylen(core_keylen),
+                    .keylen(keylen_reg),
                     .iv(core_iv),
                     .ctr(DEFAULT_CTR_INIT),
-                    .rounds(core_rounds),
-
+                    .rounds(rounds_reg),
                     .data_in(core_data_in),
-
                     .ready(core_ready),
-
                     .data_out(core_data_out),
                     .data_out_valid(core_data_out_valid)
                    );
@@ -286,72 +163,31 @@ module chacha(
 
   //----------------------------------------------------------------
   // reg_update
+  //
   // Update functionality for all registers in the core.
   // All registers are positive edge triggered with asynchronous
-  // active low reset.
+  // active low reset. All registers have write enable.
   //----------------------------------------------------------------
-  always @ (posedge clk or negedge reset_n)
-    begin
+  always @ (posedge clk)
+    begin : reg_update
+     integer i;
       if (!reset_n)
         begin
-          init_reg           <= 0;
-          next_reg           <= 0;
-          ready_reg          <= 0;
-          keylen_reg         <= 0;
-          rounds_reg         <= 5'b00000;
-          data_out_valid_reg <= 0;
-
-          key0_reg           <= 32'h00000000;
-          key1_reg           <= 32'h00000000;
-          key2_reg           <= 32'h00000000;
-          key3_reg           <= 32'h00000000;
-          key4_reg           <= 32'h00000000;
-          key5_reg           <= 32'h00000000;
-          key6_reg           <= 32'h00000000;
-          key7_reg           <= 32'h00000000;
-
-          iv0_reg            <= 32'h00000000;
-          iv1_reg            <= 32'h00000000;
-
-          data_in0_reg       <= 32'h00000000;
-          data_in1_reg       <= 32'h00000000;
-          data_in2_reg       <= 32'h00000000;
-          data_in3_reg       <= 32'h00000000;
-          data_in4_reg       <= 32'h00000000;
-          data_in5_reg       <= 32'h00000000;
-          data_in6_reg       <= 32'h00000000;
-          data_in7_reg       <= 32'h00000000;
-          data_in8_reg       <= 32'h00000000;
-          data_in9_reg       <= 32'h00000000;
-          data_in10_reg      <= 32'h00000000;
-          data_in11_reg      <= 32'h00000000;
-          data_in12_reg      <= 32'h00000000;
-          data_in13_reg      <= 32'h00000000;
-          data_in14_reg      <= 32'h00000000;
-          data_in15_reg      <= 32'h00000000;
-
-          data_out0_reg      <= 32'h00000000;
-          data_out1_reg      <= 32'h00000000;
-          data_out2_reg      <= 32'h00000000;
-          data_out3_reg      <= 32'h00000000;
-          data_out4_reg      <= 32'h00000000;
-          data_out5_reg      <= 32'h00000000;
-          data_out6_reg      <= 32'h00000000;
-          data_out7_reg      <= 32'h00000000;
-          data_out8_reg      <= 32'h00000000;
-          data_out9_reg      <= 32'h00000000;
-          data_out10_reg     <= 32'h00000000;
-          data_out11_reg     <= 32'h00000000;
-          data_out12_reg     <= 32'h00000000;
-          data_out13_reg     <= 32'h00000000;
-          data_out14_reg     <= 32'h00000000;
-          data_out15_reg     <= 32'h00000000;
+          init_reg   <= 0;
+          next_reg   <= 0;
+          keylen_reg <= 0;
+          rounds_reg <= 5'h0;
+          iv_reg[0]  <= 32'h0;
+          iv_reg[1]  <= 32'h0;
+
+          for (i = 0 ; i < 8 ; i = i + 1)
+            key_reg[i] <= 32'h0;
+
+          for (i = 0 ; i < 16 ; i = i + 1)
+            data_in_reg[i] <= 32'h0;
         end
       else
         begin
-          ready_reg          <= core_ready;
-          data_out_valid_reg <= core_data_out_valid;
-
           if (ctrl_we)
             begin
               init_reg <= write_data[CTRL_INIT_BIT];
@@ -359,164 +195,19 @@ module chacha(
             end
 
           if (keylen_we)
-            begin
-              keylen_reg <= write_data[KEYLEN_BIT];
-            end
+            keylen_reg <= write_data[KEYLEN_BIT];
 
           if (rounds_we)
-            begin
-              rounds_reg <= write_data[ROUNDS_HIGH_BIT : ROUNDS_LOW_BIT];
-            end
+            rounds_reg <= write_data[ROUNDS_HIGH_BIT : ROUNDS_LOW_BIT];
 
-          if (key0_we)
-            begin
-              key0_reg <= write_data;
-            end
+          if (key_we)
+            key_reg[address[2 : 0]] <= write_data;
 
-          if (key1_we)
-            begin
-              key1_reg <= write_data;
-            end
-
-          if (key2_we)
-            begin
-              key2_reg <= write_data;
-            end
-
-          if (key3_we)
-            begin
-              key3_reg <= write_data;
-            end
-
-          if (key4_we)
-            begin
-              key4_reg <= write_data;
-            end
-
-          if (key5_we)
-            begin
-              key5_reg <= write_data;
-            end
-
-          if (key6_we)
-            begin
-              key6_reg <= write_data;
-            end
-
-          if (key7_we)
-            begin
-              key7_reg <= write_data;
-            end
-
-          if (iv0_we)
-            begin
-              iv0_reg <= write_data;
-            end
-
-          if (iv1_we)
-            begin
-              iv1_reg <= write_data;
-            end
-
-          if (data_in0_we)
-            begin
-              data_in0_reg <= write_data;
-            end
-
-          if (data_in1_we)
-            begin
-              data_in1_reg <= write_data;
-            end
-
-          if (data_in2_we)
-            begin
-              data_in2_reg <= write_data;
-            end
-
-          if (data_in3_we)
-            begin
-              data_in3_reg <= write_data;
-            end
-
-          if (data_in4_we)
-            begin
-              data_in4_reg <= write_data;
-            end
-
-          if (data_in5_we)
-            begin
-              data_in5_reg <= write_data;
-            end
-
-          if (data_in6_we)
-            begin
-              data_in6_reg <= write_data;
-            end
+          if (iv_we)
+            iv_reg[address[0]] <= write_data;
 
-          if (data_in7_we)
-            begin
-              data_in7_reg <= write_data;
-            end
-
-          if (data_in8_we)
-            begin
-              data_in8_reg <= write_data;
-            end
-
-          if (data_in9_we)
-            begin
-              data_in9_reg <= write_data;
-            end
-
-          if (data_in10_we)
-            begin
-              data_in10_reg <= write_data;
-            end
-
-          if (data_in11_we)
-            begin
-              data_in11_reg <= write_data;
-            end
-
-          if (data_in12_we)
-            begin
-              data_in12_reg <= write_data;
-            end
-
-          if (data_in13_we)
-            begin
-              data_in13_reg <= write_data;
-            end
-
-          if (data_in14_we)
-            begin
-              data_in14_reg <= write_data;
-            end
-
-          if (data_in15_we)
-            begin
-              data_in15_reg <= write_data;
-            end
-
-          if (core_data_out_valid)
-            begin
-              data_out0_reg  <= core_data_out[511 : 480];
-              data_out1_reg  <= core_data_out[479 : 448];
-              data_out2_reg  <= core_data_out[447 : 416];
-              data_out3_reg  <= core_data_out[415 : 384];
-              data_out4_reg  <= core_data_out[383 : 352];
-              data_out5_reg  <= core_data_out[351 : 320];
-              data_out6_reg  <= core_data_out[319 : 288];
-              data_out7_reg  <= core_data_out[287 : 256];
-              data_out8_reg  <= core_data_out[255 : 224];
-              data_out9_reg  <= core_data_out[223 : 192];
-              data_out10_reg <= core_data_out[191 : 160];
-              data_out11_reg <= core_data_out[159 : 128];
-              data_out12_reg <= core_data_out[127 :  96];
-              data_out13_reg <= core_data_out[95  :  64];
-              data_out14_reg <= core_data_out[63  :  32];
-              data_out15_reg <= core_data_out[31  :   0];
-            end
+          if (data_in_we)
+            data_in_reg[address[3 : 0]] <= write_data;
         end
     end // reg_update
 
@@ -526,356 +217,58 @@ module chacha(
   //----------------------------------------------------------------
   always @*
     begin : addr_decoder
-      ctrl_we      = 0;
-      keylen_we    = 0;
-      rounds_we    = 0;
-
-      key0_we      = 0;
-      key1_we      = 0;
-      key2_we      = 0;
-      key3_we      = 0;
-      key4_we      = 0;
-      key5_we      = 0;
-      key6_we      = 0;
-      key7_we      = 0;
-
-      iv0_we       = 0;
-      iv1_we       = 0;
-
-      data_in0_we  = 0;
-      data_in1_we  = 0;
-      data_in2_we  = 0;
-      data_in3_we  = 0;
-      data_in4_we  = 0;
-      data_in5_we  = 0;
-      data_in6_we  = 0;
-      data_in7_we  = 0;
-      data_in8_we  = 0;
-      data_in9_we  = 0;
-      data_in10_we = 0;
-      data_in11_we = 0;
-      data_in12_we = 0;
-      data_in13_we = 0;
-      data_in14_we = 0;
-      data_in15_we = 0;
-
-      tmp_read_data = 32'h00000000;
-      tmp_error     = 0;
+      ctrl_we       = 0;
+      keylen_we     = 0;
+      rounds_we     = 0;
+      key_we        = 0;
+      iv_we         = 0;
+      data_in_we    = 0;
+      tmp_read_data = 32'h0;
 
       if (cs)
         begin
           if (we)
             begin
-              case (address)
-                ADDR_CTRL:
-                  begin
-                    ctrl_we  = 1;
-                  end
-
-                ADDR_KEYLEN:
-                  begin
-                    keylen_we = 1;
-                  end
-
-                ADDR_ROUNDS:
-                  begin
-                    rounds_we  = 1;
-                  end
-
-                ADDR_KEY0:
-                  begin
-                    key0_we  = 1;
-                  end
-
-                ADDR_KEY1:
-                  begin
-                    key1_we  = 1;
-                  end
-
-                ADDR_KEY2:
-                  begin
-                    key2_we  = 1;
-                  end
-
-                ADDR_KEY3:
-                  begin
-                    key3_we  = 1;
-                  end
-
-                ADDR_KEY4:
-                  begin
-                    key4_we  = 1;
-                  end
-
-                ADDR_KEY5:
-                  begin
-                    key5_we  = 1;
-                  end
-
-                ADDR_KEY6:
-                  begin
-                    key6_we  = 1;
-                  end
-
-                ADDR_KEY7:
-                  begin
-                    key7_we  = 1;
-                  end
-
-                ADDR_IV0:
-                  begin
-                    iv0_we = 1;
-                  end
-
-                ADDR_IV1:
-                  begin
-                    iv1_we = 1;
-                  end
+              if (address == ADDR_CTRL)
+                ctrl_we = 1;
 
-                ADDR_DATA_IN0:
-                  begin
-                    data_in0_we = 1;
-                  end
-
-                ADDR_DATA_IN1:
-                  begin
-                    data_in1_we = 1;
-                  end
-
-                ADDR_DATA_IN2:
-                  begin
-                    data_in2_we = 1;
-                  end
-
-                ADDR_DATA_IN3:
-                  begin
-                    data_in3_we = 1;
-                  end
+              if (address == ADDR_KEYLEN)
+                keylen_we = 1;
 
-                ADDR_DATA_IN4:
-                  begin
-                    data_in4_we = 1;
-                  end
+              if (address == ADDR_ROUNDS)
+                rounds_we = 1;
 
-                ADDR_DATA_IN5:
-                  begin
-                    data_in5_we = 1;
-                  end
-
-                ADDR_DATA_IN6:
-                  begin
-                    data_in6_we = 1;
-                  end
+              if ((address >= ADDR_KEY0) && (address <= ADDR_KEY7))
+                key_we = 1;
 
-                ADDR_DATA_IN7:
-                  begin
-                    data_in7_we = 1;
-                  end
-
-                ADDR_DATA_IN8:
-                  begin
-                    data_in8_we = 1;
-                  end
-
-                ADDR_DATA_IN9:
-                  begin
-                    data_in9_we = 1;
-                  end
-
-                ADDR_DATA_IN10:
-                  begin
-                    data_in10_we = 1;
-                  end
-
-                ADDR_DATA_IN11:
-                  begin
-                    data_in11_we = 1;
-                  end
-
-                ADDR_DATA_IN12:
-                  begin
-                    data_in12_we = 1;
-                  end
+              if ((address >= ADDR_IV0) && (address <= ADDR_IV1))
+                iv_we = 1;
 
-                ADDR_DATA_IN13:
-                  begin
-                    data_in13_we = 1;
-                  end
-
-                ADDR_DATA_IN14:
-                  begin
-                    data_in14_we = 1;
-                  end
-
-                ADDR_DATA_IN15:
-                  begin
-                    data_in15_we = 1;
-                  end
-
-                default:
-                  begin
-                    tmp_error = 1;
-                  end
-              endcase // case (address)
+              if ((address >= ADDR_DATA_IN0) && (address <= ADDR_DATA_IN15))
+                data_in_we = 1;
             end // if (we)
 
           else
             begin
-              case (address)
-                ADDR_CTRL:
-                  begin
-                    tmp_read_data = {28'h0000000, 2'b00, next_reg, init_reg};
-                  end
-
-                ADDR_STATUS:
-                  begin
-                    tmp_read_data = {28'h0000000, 2'b00,
-                                    {data_out_valid_reg, ready_reg}};
-                  end
+              if ((address >= ADDR_KEY0) && (address <= ADDR_KEY7))
+                tmp_read_data = key_reg[address[2 : 0]];
 
-                ADDR_KEYLEN:
-                  begin
-                    tmp_read_data = {28'h0000000, 3'b000, keylen_reg};
-                  end
+              if ((address >= ADDR_DATA_OUT0) && (address <= ADDR_DATA_OUT15))
+                tmp_read_data = core_data_out[(15 - (address - ADDR_DATA_OUT0)) * 32 +: 32];
 
-                ADDR_ROUNDS:
-                  begin
-                    tmp_read_data = {24'h000000, 3'b000, rounds_reg};
-                  end
-
-                ADDR_KEY0:
-                  begin
-                    tmp_read_data = key0_reg;
-                  end
-
-                ADDR_KEY1:
-                  begin
-                    tmp_read_data = key1_reg;
-                  end
-
-                ADDR_KEY2:
-                  begin
-                    tmp_read_data = key2_reg;
-                  end
-
-                ADDR_KEY3:
-                  begin
-                    tmp_read_data = key3_reg;
-                  end
-
-                ADDR_KEY4:
-                  begin
-                    tmp_read_data = key4_reg;
-                  end
-
-                ADDR_KEY5:
-                  begin
-                    tmp_read_data = key5_reg;
-                  end
-
-                ADDR_KEY6:
-                  begin
-                    tmp_read_data = key6_reg;
-                  end
-
-                ADDR_KEY7:
-                  begin
-                    tmp_read_data = key7_reg;
-                  end
-
-                ADDR_IV0:
-                  begin
-                    tmp_read_data = iv0_reg;
-                  end
-
-                ADDR_IV1:
-                  begin
-                    tmp_read_data = iv1_reg;
-                  end
-
-                ADDR_DATA_OUT0:
-                  begin
-                    tmp_read_data = data_out0_reg;
-                  end
-
-                ADDR_DATA_OUT1:
-                  begin
-                    tmp_read_data = data_out1_reg;
-                  end
-
-                ADDR_DATA_OUT2:
-                  begin
-                    tmp_read_data = data_out2_reg;
-                  end
-
-                ADDR_DATA_OUT3:
-                  begin
-                    tmp_read_data = data_out3_reg;
-                  end
-
-                ADDR_DATA_OUT4:
-                  begin
-                    tmp_read_data = data_out4_reg;
-                  end
-
-                ADDR_DATA_OUT5:
-                  begin
-                    tmp_read_data = data_out5_reg;
-                  end
-
-                ADDR_DATA_OUT6:
-                  begin
-                    tmp_read_data = data_out6_reg;
-                  end
-
-                ADDR_DATA_OUT7:
-                  begin
-                    tmp_read_data = data_out7_reg;
-                  end
-
-                ADDR_DATA_OUT8:
-                  begin
-                    tmp_read_data = data_out8_reg;
-                  end
-
-                ADDR_DATA_OUT9:
-                  begin
-                    tmp_read_data = data_out9_reg;
-                  end
-
-                ADDR_DATA_OUT10:
-                  begin
-                    tmp_read_data = data_out10_reg;
-                  end
-
-                ADDR_DATA_OUT11:
-                  begin
-                    tmp_read_data = data_out11_reg;
-                  end
-
-                ADDR_DATA_OUT12:
-                  begin
-                    tmp_read_data = data_out12_reg;
-                  end
-
-                ADDR_DATA_OUT13:
-                  begin
-                    tmp_read_data = data_out13_reg;
-                  end
-
-                ADDR_DATA_OUT14:
-                  begin
-                    tmp_read_data = data_out14_reg;
-                  end
-
-                ADDR_DATA_OUT15:
-                  begin
-                    tmp_read_data = data_out15_reg;
-                  end
+              case (address)
+                ADDR_NAME0:   tmp_read_data = CORE_NAME0;
+                ADDR_NAME1:   tmp_read_data = CORE_NAME1;
+                ADDR_VERSION: tmp_read_data = CORE_VERSION;
+                ADDR_CTRL:    tmp_read_data = {30'h0, next_reg, init_reg};
+                ADDR_STATUS:  tmp_read_data = {30'h0, core_data_out_valid, core_ready};
+                ADDR_KEYLEN:  tmp_read_data = {31'h0, keylen_reg};
+                ADDR_ROUNDS:  tmp_read_data = {27'h0, rounds_reg};
+                ADDR_IV0:     tmp_read_data = iv_reg[0];
+                ADDR_IV1:     tmp_read_data = iv_reg[1];
 
                 default:
                   begin
-                    tmp_error = 1;
                   end
               endcase // case (address)
             end
diff --git a/src/rtl/chacha_core.v b/src/rtl/chacha_core.v
index 0e1158b..5f496a4 100644
--- a/src/rtl/chacha_core.v
+++ b/src/rtl/chacha_core.v
@@ -7,7 +7,7 @@
 //
 //
 // Author: Joachim Strombergson
-// Copyright (c) 2014, NORDUnet A/S All rights reserved.
+// Copyright (c) 2011, NORDUnet A/S All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -63,202 +63,89 @@ module chacha_core(
   // Internal constant and parameter definitions.
   //----------------------------------------------------------------
   // Datapath quartterround states names.
-  parameter STATE_QR0 = 1'b0;
-  parameter STATE_QR1 = 1'b1;
+  localparam QR0 = 0;
+  localparam QR1 = 1;
 
-  parameter NUM_ROUNDS = 4'h8;
+  localparam NUM_ROUNDS = 4'h8;
 
-  parameter TAU0 = 32'h61707865;
-  parameter TAU1 = 32'h3120646e;
-  parameter TAU2 = 32'h79622d36;
-  parameter TAU3 = 32'h6b206574;
+  localparam TAU0 = 32'h61707865;
+  localparam TAU1 = 32'h3120646e;
+  localparam TAU2 = 32'h79622d36;
+  localparam TAU3 = 32'h6b206574;
 
-  parameter SIGMA0 = 32'h61707865;
-  parameter SIGMA1 = 32'h3320646e;
-  parameter SIGMA2 = 32'h79622d32;
-  parameter SIGMA3 = 32'h6b206574;
+  localparam SIGMA0 = 32'h61707865;
+  localparam SIGMA1 = 32'h3320646e;
+  localparam SIGMA2 = 32'h79622d32;
+  localparam SIGMA3 = 32'h6b206574;
 
-  parameter CTRL_IDLE     = 3'h0;
-  parameter CTRL_INIT     = 3'h1;
-  parameter CTRL_ROUNDS   = 3'h2;
-  parameter CTRL_FINALIZE = 3'h3;
-  parameter CTRL_DONE     = 3'h4;
+  localparam CTRL_IDLE     = 3'h0;
+  localparam CTRL_INIT     = 3'h1;
+  localparam CTRL_ROUNDS   = 3'h2;
+  localparam CTRL_FINALIZE = 3'h3;
+  localparam CTRL_DONE     = 3'h4;
+
+
+  //----------------------------------------------------------------
+  // l2b()
+  //
+  // Swap bytes from little to big endian byte order.
+  //----------------------------------------------------------------
+  function [31 : 0] l2b(input [31 : 0] op);
+    begin
+      l2b = {op[7 : 0], op[15 : 8], op[23 : 16], op[31 : 24]};
+    end
+  endfunction // b2l
 
 
   //----------------------------------------------------------------
   // Registers including update variables and write enable.
   //----------------------------------------------------------------
-  reg [31 : 0] key0_reg;
-  reg [31 : 0] key0_new;
-  reg [31 : 0] key1_reg;
-  reg [31 : 0] key1_new;
-  reg [31 : 0] key2_reg;
-  reg [31 : 0] key2_new;
-  reg [31 : 0] key3_reg;
-  reg [31 : 0] key3_new;
-  reg [31 : 0] key4_reg;
-  reg [31 : 0] key4_new;
-  reg [31 : 0] key5_reg;
-  reg [31 : 0] key5_new;
-  reg [31 : 0] key6_reg;
-  reg [31 : 0] key6_new;
-  reg [31 : 0] key7_reg;
-  reg [31 : 0] key7_new;
-
-  reg keylen_reg;
-  reg keylen_new;
-
-  reg [31 : 0] iv0_reg;
-  reg [31 : 0] iv0_new;
-  reg [31 : 0] iv1_reg;
-  reg [31 : 0] iv1_new;
-
-  reg [31 : 0] state0_reg;
-  reg [31 : 0] state0_new;
-  reg [31 : 0] state1_reg;
-  reg [31 : 0] state1_new;
-  reg [31 : 0] state2_reg;
-  reg [31 : 0] state2_new;
-  reg [31 : 0] state3_reg;
-  reg [31 : 0] state3_new;
-  reg [31 : 0] state4_reg;
-  reg [31 : 0] state4_new;
-  reg [31 : 0] state5_reg;
-  reg [31 : 0] state5_new;
-  reg [31 : 0] state6_reg;
-  reg [31 : 0] state6_new;
-  reg [31 : 0] state7_reg;
-  reg [31 : 0] state7_new;
-  reg [31 : 0] state8_reg;
-  reg [31 : 0] state8_new;
-  reg [31 : 0] state9_reg;
-  reg [31 : 0] state9_new;
-  reg [31 : 0] state10_reg;
-  reg [31 : 0] state10_new;
-  reg [31 : 0] state11_reg;
-  reg [31 : 0] state11_new;
-  reg [31 : 0] state12_reg;
-  reg [31 : 0] state12_new;
-  reg [31 : 0] state13_reg;
-  reg [31 : 0] state13_new;
-  reg [31 : 0] state14_reg;
-  reg [31 : 0] state14_new;
-  reg [31 : 0] state15_reg;
-  reg [31 : 0] state15_new;
-  reg state_we;
-
-  reg [31 : 0] x0_reg;
-  reg [31 : 0] x0_new;
-  reg          x0_we;
-
-  reg [31 : 0] x1_reg;
-  reg [31 : 0] x1_new;
-  reg          x1_we;
-
-  reg [31 : 0] x2_reg;
-  reg [31 : 0] x2_new;
-  reg          x2_we;
-
-  reg [31 : 0] x3_reg;
-  reg [31 : 0] x3_new;
-  reg          x3_we;
-
-  reg [31 : 0] x4_reg;
-  reg [31 : 0] x4_new;
-  reg          x4_we;
-
-  reg [31 : 0] x5_reg;
-  reg [31 : 0] x5_new;
-  reg          x5_we;
-
-  reg [31 : 0] x6_reg;
-  reg [31 : 0] x6_new;
-  reg          x6_we;
-
-  reg [31 : 0] x7_reg;
-  reg [31 : 0] x7_new;
-  reg          x7_we;
-
-  reg [31 : 0] x8_reg;
-  reg [31 : 0] x8_new;
-  reg          x8_we;
-
-  reg [31 : 0] x9_reg;
-  reg [31 : 0] x9_new;
-  reg          x9_we;
-
-  reg [31 : 0] x10_reg;
-  reg [31 : 0] x10_new;
-  reg          x10_we;
-
-  reg [31 : 0] x11_reg;
-  reg [31 : 0] x11_new;
-  reg          x11_we;
-
-  reg [31 : 0] x12_reg;
-  reg [31 : 0] x12_new;
-  reg          x12_we;
-
-  reg [31 : 0] x13_reg;
-  reg [31 : 0] x13_new;
-  reg          x13_we;
-
-  reg [31 : 0] x14_reg;
-  reg [31 : 0] x14_new;
-  reg          x14_we;
-
-  reg [31 : 0] x15_reg;
-  reg [31 : 0] x15_new;
-  reg          x15_we;
-
-  reg [3 : 0] rounds_reg;
-  reg [3 : 0] rounds_new;
-
-  reg [511 : 0] data_in_reg;
-  reg           data_in_we;
+  reg [31 : 0]  state_reg [0 : 15];
+  reg [31 : 0]  state_new [0 : 15];
+  reg           state_we;
 
   reg [511 : 0] data_out_reg;
   reg [511 : 0] data_out_new;
-  reg           data_out_we;
-
-  reg  data_out_valid_reg;
-  reg  data_out_valid_new;
-  reg  data_out_valid_we;
-
-  reg  ready_reg;
-  reg  ready_new;
-  reg  ready_we;
-
-  reg         qr_ctr_reg;
-  reg         qr_ctr_new;
-  reg         qr_ctr_we;
-  reg         qr_ctr_inc;
-  reg         qr_ctr_rst;
-
-  reg [3 : 0] dr_ctr_reg;
-  reg [3 : 0] dr_ctr_new;
-  reg         dr_ctr_we;
-  reg         dr_ctr_inc;
-  reg         dr_ctr_rst;
-
-  reg [31 : 0] block0_ctr_reg;
-  reg [31 : 0] block0_ctr_new;
-  reg          block0_ctr_we;
-  reg [31 : 0] block1_ctr_reg;
-  reg [31 : 0] block1_ctr_new;
-  reg          block1_ctr_we;
-  reg          block_ctr_inc;
-  reg          block_ctr_rst;
-
-  reg [2 : 0] chacha_ctrl_reg;
-  reg [2 : 0] chacha_ctrl_new;
-  reg         chacha_ctrl_we;
+
+  reg           data_out_valid_reg;
+  reg           data_out_valid_new;
+  reg           data_out_valid_we;
+
+  reg           qr_ctr_reg;
+  reg           qr_ctr_new;
+  reg           qr_ctr_we;
+  reg           qr_ctr_inc;
+  reg           qr_ctr_rst;
+
+  reg [3 : 0]   dr_ctr_reg;
+  reg [3 : 0]   dr_ctr_new;
+  reg           dr_ctr_we;
+  reg           dr_ctr_inc;
+  reg           dr_ctr_rst;
+
+  reg [31 : 0]  block0_ctr_reg;
+  reg [31 : 0]  block0_ctr_new;
+  reg           block0_ctr_we;
+  reg [31 : 0]  block1_ctr_reg;
+  reg [31 : 0]  block1_ctr_new;
+  reg           block1_ctr_we;
+  reg           block_ctr_inc;
+  reg           block_ctr_set;
+
+  reg           ready_reg;
+  reg           ready_new;
+  reg           ready_we;
+
+  reg [2 : 0]   chacha_ctrl_reg;
+  reg [2 : 0]   chacha_ctrl_new;
+  reg           chacha_ctrl_we;
 
 
   //----------------------------------------------------------------
   // Wires.
   //----------------------------------------------------------------
-  reg sample_params;
+  reg [31 : 0] init_state_word [0 : 15];
+
   reg init_state;
   reg update_state;
   reg update_output;
@@ -356,455 +243,127 @@ module chacha_core(
   // Concurrent connectivity for ports etc.
   //----------------------------------------------------------------
   assign data_out = data_out_reg;
-
   assign data_out_valid = data_out_valid_reg;
-
   assign ready = ready_reg;
 
 
-
   //----------------------------------------------------------------
   // reg_update
   //
   // Update functionality for all registers in the core.
-  // All registers are positive edge triggered with asynchronous
+  // All registers are positive edge triggered with synchronous
   // active low reset. All registers have write enable.
   //----------------------------------------------------------------
-  always @ (posedge clk or negedge reset_n)
+  always @ (posedge clk)
     begin : reg_update
+     integer i;
+
       if (!reset_n)
         begin
-          key0_reg           <= 32'h00000000;
-          key1_reg           <= 32'h00000000;
-          key2_reg           <= 32'h00000000;
-          key3_reg           <= 32'h00000000;
-          key4_reg           <= 32'h00000000;
-          key5_reg           <= 32'h00000000;
-          key6_reg           <= 32'h00000000;
-          key7_reg           <= 32'h00000000;
-          iv0_reg            <= 32'h00000000;
-          iv1_reg            <= 32'h00000000;
-          state0_reg         <= 32'h00000000;
-          state1_reg         <= 32'h00000000;
-          state2_reg         <= 32'h00000000;
-          state3_reg         <= 32'h00000000;
-          state4_reg         <= 32'h00000000;
-          state5_reg         <= 32'h00000000;
-          state6_reg         <= 32'h00000000;
-          state7_reg         <= 32'h00000000;
-          state8_reg         <= 32'h00000000;
-          state9_reg         <= 32'h00000000;
-          state10_reg        <= 32'h00000000;
-          state11_reg        <= 32'h00000000;
-          state12_reg        <= 32'h00000000;
-          state13_reg        <= 32'h00000000;
-          state14_reg        <= 32'h00000000;
-          state15_reg        <= 32'h00000000;
-          x0_reg             <= 32'h00000000;
-          x1_reg             <= 32'h00000000;
-          x2_reg             <= 32'h00000000;
-          x3_reg             <= 32'h00000000;
-          x4_reg             <= 32'h00000000;
-          x5_reg             <= 32'h00000000;
-          x6_reg             <= 32'h00000000;
-          x7_reg             <= 32'h00000000;
-          x8_reg             <= 32'h00000000;
-          x9_reg             <= 32'h00000000;
-          x10_reg            <= 32'h00000000;
-          x11_reg            <= 32'h00000000;
-          x12_reg            <= 32'h00000000;
-          x13_reg            <= 32'h00000000;
-          x14_reg            <= 32'h00000000;
-          x15_reg            <= 32'h00000000;
-          data_in_reg        <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
-          data_out_reg       <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
-          rounds_reg         <= 4'h0;
-          ready_reg          <= 1;
+          for (i = 0 ; i < 16 ; i = i + 1)
+            state_reg[i] <= 32'h0;
+
+          data_out_reg       <= 512'h0;
           data_out_valid_reg <= 0;
-          qr_ctr_reg         <= STATE_QR0;
+          qr_ctr_reg         <= QR0;
           dr_ctr_reg         <= 0;
-          block0_ctr_reg     <= 32'h00000000;
-          block1_ctr_reg     <= 32'h00000000;
+          block0_ctr_reg     <= 32'h0;
+          block1_ctr_reg     <= 32'h0;
           chacha_ctrl_reg    <= CTRL_IDLE;
+          ready_reg          <= 1;
         end
       else
         begin
-          if (sample_params)
-            begin
-              key0_reg   <= key0_new;
-              key1_reg   <= key1_new;
-              key2_reg   <= key2_new;
-              key3_reg   <= key3_new;
-              key4_reg   <= key4_new;
-              key5_reg   <= key5_new;
-              key6_reg   <= key6_new;
-              key7_reg   <= key7_new;
-              iv0_reg    <= iv0_new;
-              iv1_reg    <= iv1_new;
-              rounds_reg <= rounds_new;
-              keylen_reg <= keylen_new;
-            end
-
-          if (data_in_we)
-            begin
-              data_in_reg <= data_in;
-            end
-
           if (state_we)
             begin
-              state0_reg  <= state0_new;
-              state1_reg  <= state1_new;
-              state2_reg  <= state2_new;
-              state3_reg  <= state3_new;
-              state4_reg  <= state4_new;
-              state5_reg  <= state5_new;
-              state6_reg  <= state6_new;
-              state7_reg  <= state7_new;
-              state8_reg  <= state8_new;
-              state9_reg  <= state9_new;
-              state10_reg <= state10_new;
-              state11_reg <= state11_new;
-              state12_reg <= state12_new;
-              state13_reg <= state13_new;
-              state14_reg <= state14_new;
-              state15_reg <= state15_new;
-            end
-
-          if (x0_we)
-            begin
-              x0_reg <= x0_new;
-            end
-
-          if (x1_we)
-            begin
-              x1_reg <= x1_new;
-            end
-
-          if (x2_we)
-            begin
-              x2_reg <= x2_new;
-            end
-
-          if (x3_we)
-            begin
-              x3_reg <= x3_new;
-            end
-
-          if (x4_we)
-            begin
-              x4_reg <= x4_new;
-            end
-
-          if (x5_we)
-            begin
-              x5_reg <= x5_new;
-            end
-
-          if (x6_we)
-            begin
-              x6_reg <= x6_new;
-            end
-
-          if (x7_we)
-            begin
-              x7_reg <= x7_new;
-            end
-
-          if (x8_we)
-            begin
-              x8_reg <= x8_new;
-            end
-
-          if (x9_we)
-            begin
-              x9_reg <= x9_new;
-            end
-
-          if (x10_we)
-            begin
-              x10_reg <= x10_new;
-            end
-
-          if (x11_we)
-            begin
-              x11_reg <= x11_new;
-            end
-
-          if (x12_we)
-            begin
-              x12_reg <= x12_new;
+              for (i = 0 ; i < 16 ; i = i + 1)
+                state_reg[i] <= state_new[i];
             end
 
-          if (x13_we)
-            begin
-              x13_reg <= x13_new;
-            end
-
-          if (x14_we)
-            begin
-              x14_reg <= x14_new;
-            end
-
-          if (x15_we)
-            begin
-              x15_reg <= x15_new;
-            end
-
-          if (data_out_we)
-            begin
-              data_out_reg <= data_out_new;
-            end
-
-          if (ready_we)
-            begin
-              ready_reg <= ready_new;
-            end
+          if (update_output)
+            data_out_reg <= data_out_new;
 
           if (data_out_valid_we)
-            begin
-              data_out_valid_reg <= data_out_valid_new;
-            end
+            data_out_valid_reg <= data_out_valid_new;
 
           if (qr_ctr_we)
-            begin
-              qr_ctr_reg <= qr_ctr_new;
-            end
+            qr_ctr_reg <= qr_ctr_new;
 
           if (dr_ctr_we)
-            begin
-              dr_ctr_reg <= dr_ctr_new;
-            end
+            dr_ctr_reg <= dr_ctr_new;
 
           if (block0_ctr_we)
-            begin
-              block0_ctr_reg <= block0_ctr_new;
-            end
+            block0_ctr_reg <= block0_ctr_new;
 
           if (block1_ctr_we)
-            begin
-              block1_ctr_reg <= block1_ctr_new;
-            end
+            block1_ctr_reg <= block1_ctr_new;
+
+          if (ready_we)
+            ready_reg <= ready_new;
 
           if (chacha_ctrl_we)
-            begin
-              chacha_ctrl_reg <= chacha_ctrl_new;
-            end
+            chacha_ctrl_reg <= chacha_ctrl_new;
         end
     end // reg_update
 
 
   //----------------------------------------------------------------
-  // data_out_logic
-  // Final output logic that combines the result from procceing
-  // with the input word. This adds a final layer of XOR gates.
+  // init_state_logic
   //
-  // Note that we also remap all the words into LSB format.
+  // Calculates the initial state for a given block.
   //----------------------------------------------------------------
   always @*
-    begin : data_out_logic
-      reg [31 : 0]  msb_block_state0;
-      reg [31 : 0]  msb_block_state1;
-      reg [31 : 0]  msb_block_state2;
-      reg [31 : 0]  msb_block_state3;
-      reg [31 : 0]  msb_block_state4;
-      reg [31 : 0]  msb_block_state5;
-      reg [31 : 0]  msb_block_state6;
-      reg [31 : 0]  msb_block_state7;
-      reg [31 : 0]  msb_block_state8;
-      reg [31 : 0]  msb_block_state9;
-      reg [31 : 0]  msb_block_state10;
-      reg [31 : 0]  msb_block_state11;
-      reg [31 : 0]  msb_block_state12;
-      reg [31 : 0]  msb_block_state13;
-      reg [31 : 0]  msb_block_state14;
-      reg [31 : 0]  msb_block_state15;
-
-      reg [31 : 0]  lsb_block_state0;
-      reg [31 : 0]  lsb_block_state1;
-      reg [31 : 0]  lsb_block_state2;
-      reg [31 : 0]  lsb_block_state3;
-      reg [31 : 0]  lsb_block_state4;
-      reg [31 : 0]  lsb_block_state5;
-      reg [31 : 0]  lsb_block_state6;
-      reg [31 : 0]  lsb_block_state7;
-      reg [31 : 0]  lsb_block_state8;
-      reg [31 : 0]  lsb_block_state9;
-      reg [31 : 0]  lsb_block_state10;
-      reg [31 : 0]  lsb_block_state11;
-      reg [31 : 0]  lsb_block_state12;
-      reg [31 : 0]  lsb_block_state13;
-      reg [31 : 0]  lsb_block_state14;
-      reg [31 : 0]  lsb_block_state15;
-
-      reg [511 : 0] lsb_block_state;
-
-      lsb_block_state = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
-
-      data_out_new = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
-      data_out_we = 0;
-
-      if (update_output)
+    begin : init_state_logic
+      reg [31 : 0] key0;
+      reg [31 : 0] key1;
+      reg [31 : 0] key2;
+      reg [31 : 0] key3;
+      reg [31 : 0] key4;
+      reg [31 : 0] key5;
+      reg [31 : 0] key6;
+      reg [31 : 0] key7;
+
+      key0 = l2b(key[255 : 224]);
+      key1 = l2b(key[223 : 192]);
+      key2 = l2b(key[191 : 160]);
+      key3 = l2b(key[159 : 128]);
+      key4 = l2b(key[127 :  96]);
+      key5 = l2b(key[95  :  64]);
+      key6 = l2b(key[63  :  32]);
+      key7 = l2b(key[31  :   0]);
+
+      init_state_word[04] = key0;
+      init_state_word[05] = key1;
+      init_state_word[06] = key2;
+      init_state_word[07] = key3;
+      init_state_word[12] = block0_ctr_reg;
+      init_state_word[13] = block1_ctr_reg;
+      init_state_word[14] = l2b(iv[63 : 32]);
+      init_state_word[15] = l2b(iv[31 :  0]);
+
+      if (keylen)
         begin
-          msb_block_state0  = state0_reg  + x0_reg;
-          msb_block_state1  = state1_reg  + x1_reg;
-          msb_block_state2  = state2_reg  + x2_reg;
-          msb_block_state3  = state3_reg  + x3_reg;
-          msb_block_state4  = state4_reg  + x4_reg;
-          msb_block_state5  = state5_reg  + x5_reg;
-          msb_block_state6  = state6_reg  + x6_reg;
-          msb_block_state7  = state7_reg  + x7_reg;
-          msb_block_state8  = state8_reg  + x8_reg;
-          msb_block_state9  = state9_reg  + x9_reg;
-          msb_block_state10 = state10_reg + x10_reg;
-          msb_block_state11 = state11_reg + x11_reg;
-          msb_block_state12 = state12_reg + x12_reg;
-          msb_block_state13 = state13_reg + x13_reg;
-          msb_block_state14 = state14_reg + x14_reg;
-          msb_block_state15 = state15_reg + x15_reg;
-
-          lsb_block_state0 = {msb_block_state0[7  :  0],
-                              msb_block_state0[15 :  8],
-                              msb_block_state0[23 : 16],
-                              msb_block_state0[31 : 24]};
-
-          lsb_block_state1 = {msb_block_state1[7  :  0],
-                              msb_block_state1[15 :  8],
-                              msb_block_state1[23 : 16],
-                              msb_block_state1[31 : 24]};
-
-          lsb_block_state2 = {msb_block_state2[7  :  0],
-                              msb_block_state2[15 :  8],
-                              msb_block_state2[23 : 16],
-                              msb_block_state2[31 : 24]};
-
-          lsb_block_state3 = {msb_block_state3[7  :  0],
-                              msb_block_state3[15 :  8],
-                              msb_block_state3[23 : 16],
-                              msb_block_state3[31 : 24]};
-
-          lsb_block_state4 = {msb_block_state4[7  :  0],
-                              msb_block_state4[15 :  8],
-                              msb_block_state4[23 : 16],
-                              msb_block_state4[31 : 24]};
-
-          lsb_block_state5 = {msb_block_state5[7  :  0],
-                              msb_block_state5[15 :  8],
-                              msb_block_state5[23 : 16],
-                              msb_block_state5[31 : 24]};
-
-          lsb_block_state6 = {msb_block_state6[7  :  0],
-                              msb_block_state6[15 :  8],
-                              msb_block_state6[23 : 16],
-                              msb_block_state6[31 : 24]};
-
-          lsb_block_state7 = {msb_block_state7[7  :  0],
-                              msb_block_state7[15 :  8],
-                              msb_block_state7[23 : 16],
-                              msb_block_state7[31 : 24]};
-
-          lsb_block_state8 = {msb_block_state8[7  :  0],
-                              msb_block_state8[15 :  8],
-                              msb_block_state8[23 : 16],
-                              msb_block_state8[31 : 24]};
-
-          lsb_block_state9 = {msb_block_state9[7  :  0],
-                              msb_block_state9[15 :  8],
-                              msb_block_state9[23 : 16],
-                              msb_block_state9[31 : 24]};
-
-          lsb_block_state10 = {msb_block_state10[7  :  0],
-                               msb_block_state10[15 :  8],
-                               msb_block_state10[23 : 16],
-                               msb_block_state10[31 : 24]};
-
-          lsb_block_state11 = {msb_block_state11[7  :  0],
-                               msb_block_state11[15 :  8],
-                               msb_block_state11[23 : 16],
-                               msb_block_state11[31 : 24]};
-
-          lsb_block_state12 = {msb_block_state12[7  :  0],
-                               msb_block_state12[15 :  8],
-                               msb_block_state12[23 : 16],
-                               msb_block_state12[31 : 24]};
-
-          lsb_block_state13 = {msb_block_state13[7  :  0],
-                               msb_block_state13[15 :  8],
-                               msb_block_state13[23 : 16],
-                               msb_block_state13[31 : 24]};
-
-          lsb_block_state14 = {msb_block_state14[7  :  0],
-                               msb_block_state14[15 :  8],
-                               msb_block_state14[23 : 16],
-                               msb_block_state14[31 : 24]};
-
-          lsb_block_state15 = {msb_block_state15[7  :  0],
-                               msb_block_state15[15 :  8],
-                               msb_block_state15[23 : 16],
-                               msb_block_state15[31 : 24]};
-
-          lsb_block_state = {lsb_block_state0,  lsb_block_state1,
-                             lsb_block_state2,  lsb_block_state3,
-                             lsb_block_state4,  lsb_block_state5,
-                             lsb_block_state6,  lsb_block_state7,
-                             lsb_block_state8,  lsb_block_state9,
-                             lsb_block_state10, lsb_block_state11,
-                             lsb_block_state12, lsb_block_state13,
-                             lsb_block_state14, lsb_block_state15};
-
-          data_out_new = data_in_reg ^ lsb_block_state;
-          data_out_we   = 1;
-        end // if (update_output)
-    end // data_out_logic
-
-
-  //----------------------------------------------------------------
-  // sample_parameters
-  // Logic (wires) that convert parameter input to appropriate
-  // format for processing.
-  //----------------------------------------------------------------
-  always @*
-    begin : sample_parameters
-      key0_new   = 32'h00000000;
-      key1_new   = 32'h00000000;
-      key2_new   = 32'h00000000;
-      key3_new   = 32'h00000000;
-      key4_new   = 32'h00000000;
-      key5_new   = 32'h00000000;
-      key6_new   = 32'h00000000;
-      key7_new   = 32'h00000000;
-      iv0_new    = 32'h00000000;
-      iv1_new    = 32'h00000000;
-      rounds_new = 4'h0;
-      keylen_new = 1'b0;
-
-      if (sample_params)
+          // 256 bit key.
+          init_state_word[00] = SIGMA0;
+          init_state_word[01] = SIGMA1;
+          init_state_word[02] = SIGMA2;
+          init_state_word[03] = SIGMA3;
+          init_state_word[08] = key4;
+          init_state_word[09] = key5;
+          init_state_word[10] = key6;
+          init_state_word[11] = key7;
+        end
+      else
         begin
-          key0_new = {key[231 : 224], key[239 : 232],
-                      key[247 : 240], key[255 : 248]};
-          key1_new = {key[199 : 192], key[207 : 200],
-                      key[215 : 208], key[223 : 216]};
-          key2_new = {key[167 : 160], key[175 : 168],
-                      key[183 : 176], key[191 : 184]};
-          key3_new = {key[135 : 128], key[143 : 136],
-                      key[151 : 144], key[159 : 152]};
-          key4_new = {key[103 :  96], key[111 : 104],
-                      key[119 : 112], key[127 : 120]};
-          key5_new = {key[71  :  64], key[79  :  72],
-                      key[87  :  80], key[95  :  88]};
-          key6_new = {key[39  :  32], key[47  :  40],
-                      key[55  :  48], key[63  :  56]};
-          key7_new = {key[7   :   0], key[15  :   8],
-                      key[23  :  16], key[31  :  24]};
-
-          iv0_new = {iv[39  :  32], iv[47  :  40],
-                     iv[55  :  48], iv[63  :  56]};
-          iv1_new = {iv[7   :   0], iv[15  :   8],
-                     iv[23  :  16], iv[31  :  24]};
-
-          // Div by two since we count double rounds.
-          rounds_new = rounds[4 : 1];
-
-          keylen_new = keylen;
+          // 128 bit key.
+          init_state_word[00] = TAU0;
+          init_state_word[01] = TAU1;
+          init_state_word[02] = TAU2;
+          init_state_word[03] = TAU3;
+          init_state_word[08] = key0;
+          init_state_word[09] = key1;
+          init_state_word[10] = key2;
+          init_state_word[11] = key3;
         end
     end
 
@@ -815,260 +374,110 @@ module chacha_core(
   //----------------------------------------------------------------
   always @*
     begin : state_logic
-      reg [31 : 0] new_state_word0;
-      reg [31 : 0] new_state_word1;
-      reg [31 : 0] new_state_word2;
-      reg [31 : 0] new_state_word3;
-      reg [31 : 0] new_state_word4;
-      reg [31 : 0] new_state_word5;
-      reg [31 : 0] new_state_word6;
-      reg [31 : 0] new_state_word7;
-      reg [31 : 0] new_state_word8;
-      reg [31 : 0] new_state_word9;
-      reg [31 : 0] new_state_word10;
-      reg [31 : 0] new_state_word11;
-      reg [31 : 0] new_state_word12;
-      reg [31 : 0] new_state_word13;
-      reg [31 : 0] new_state_word14;
-      reg [31 : 0] new_state_word15;
-
-      new_state_word0  = 32'h00000000;
-      new_state_word1  = 32'h00000000;
-      new_state_word2  = 32'h00000000;
-      new_state_word3  = 32'h00000000;
-      new_state_word4  = 32'h00000000;
-      new_state_word5  = 32'h00000000;
-      new_state_word6  = 32'h00000000;
-      new_state_word7  = 32'h00000000;
-      new_state_word8  = 32'h00000000;
-      new_state_word9  = 32'h00000000;
-      new_state_word10 = 32'h00000000;
-      new_state_word11 = 32'h00000000;
-      new_state_word12 = 32'h00000000;
-      new_state_word13 = 32'h00000000;
-      new_state_word14 = 32'h00000000;
-      new_state_word15 = 32'h00000000;
-
-      x0_new  = 32'h00000000;
-      x1_new  = 32'h00000000;
-      x2_new  = 32'h00000000;
-      x3_new  = 32'h00000000;
-      x4_new  = 32'h00000000;
-      x5_new  = 32'h00000000;
-      x6_new  = 32'h00000000;
-      x7_new  = 32'h00000000;
-      x8_new  = 32'h00000000;
-      x9_new  = 32'h00000000;
-      x10_new = 32'h00000000;
-      x11_new = 32'h00000000;
-      x12_new = 32'h00000000;
-      x13_new = 32'h00000000;
-      x14_new = 32'h00000000;
-      x15_new = 32'h00000000;
-      x0_we   = 0;
-      x1_we   = 0;
-      x2_we   = 0;
-      x3_we   = 0;
-      x4_we   = 0;
-      x5_we   = 0;
-      x6_we   = 0;
-      x7_we   = 0;
-      x8_we   = 0;
-      x9_we   = 0;
-      x10_we  = 0;
-      x11_we  = 0;
-      x12_we  = 0;
-      x13_we  = 0;
-      x14_we  = 0;
-      x15_we  = 0;
-
-      state0_new  = 32'h00000000;
-      state1_new  = 32'h00000000;
-      state2_new  = 32'h00000000;
-      state3_new  = 32'h00000000;
-      state4_new  = 32'h00000000;
-      state5_new  = 32'h00000000;
-      state6_new  = 32'h00000000;
-      state7_new  = 32'h00000000;
-      state8_new  = 32'h00000000;
-      state9_new  = 32'h00000000;
-      state10_new = 32'h00000000;
-      state11_new = 32'h00000000;
-      state12_new = 32'h00000000;
-      state13_new = 32'h00000000;
-      state14_new = 32'h00000000;
-      state15_new = 32'h00000000;
+      integer i;
+
+      for (i = 0 ; i < 16 ; i = i + 1)
+        state_new[i] = 32'h0;
       state_we = 0;
 
+      qr0_a = 32'h0;
+      qr0_b = 32'h0;
+      qr0_c = 32'h0;
+      qr0_d = 32'h0;
+      qr1_a = 32'h0;
+      qr1_b = 32'h0;
+      qr1_c = 32'h0;
+      qr1_d = 32'h0;
+      qr2_a = 32'h0;
+      qr2_b = 32'h0;
+      qr2_c = 32'h0;
+      qr2_d = 32'h0;
+      qr3_a = 32'h0;
+      qr3_b = 32'h0;
+      qr3_c = 32'h0;
+      qr3_d = 32'h0;
+
       if (init_state)
         begin
-          new_state_word4  = key0_reg;
-          new_state_word5  = key1_reg;
-          new_state_word6  = key2_reg;
-          new_state_word7  = key3_reg;
-
-          new_state_word12 = block0_ctr_reg;
-          new_state_word13 = block1_ctr_reg;
-
-          new_state_word14 = iv0_reg;
-          new_state_word15 = iv1_reg;
-
-          if (keylen_reg)
-            begin
-              // 256 bit key.
-              new_state_word0  = SIGMA0;
-              new_state_word1  = SIGMA1;
-              new_state_word2  = SIGMA2;
-              new_state_word3  = SIGMA3;
-              new_state_word8  = key4_reg;
-              new_state_word9  = key5_reg;
-              new_state_word10 = key6_reg;
-              new_state_word11 = key7_reg;
-            end
-          else
-            begin
-              // 128 bit key.
-              new_state_word0  = TAU0;
-              new_state_word1  = TAU1;
-              new_state_word2  = TAU2;
-              new_state_word3  = TAU3;
-              new_state_word8  = key0_reg;
-              new_state_word9  = key1_reg;
-              new_state_word10 = key2_reg;
-              new_state_word11 = key3_reg;
-            end
-
-          x0_new  = new_state_word0;
-          x1_new  = new_state_word1;
-          x2_new  = new_state_word2;
-          x3_new  = new_state_word3;
-          x4_new  = new_state_word4;
-          x5_new  = new_state_word5;
-          x6_new  = new_state_word6;
-          x7_new  = new_state_word7;
-          x8_new  = new_state_word8;
-          x9_new  = new_state_word9;
-          x10_new = new_state_word10;
-          x11_new = new_state_word11;
-          x12_new = new_state_word12;
-          x13_new = new_state_word13;
-          x14_new = new_state_word14;
-          x15_new = new_state_word15;
-          x0_we  = 1;
-          x1_we  = 1;
-          x2_we  = 1;
-          x3_we  = 1;
-          x4_we  = 1;
-          x5_we  = 1;
-          x6_we  = 1;
-          x7_we  = 1;
-          x8_we  = 1;
-          x9_we  = 1;
-          x10_we = 1;
-          x11_we = 1;
-          x12_we = 1;
-          x13_we = 1;
-          x14_we = 1;
-          x15_we = 1;
-
-          state0_new  = new_state_word0;
-          state1_new  = new_state_word1;
-          state2_new  = new_state_word2;
-          state3_new  = new_state_word3;
-          state4_new  = new_state_word4;
-          state5_new  = new_state_word5;
-          state6_new  = new_state_word6;
-          state7_new  = new_state_word7;
-          state8_new  = new_state_word8;
-          state9_new  = new_state_word9;
-          state10_new = new_state_word10;
-          state11_new = new_state_word11;
-          state12_new = new_state_word12;
-          state13_new = new_state_word13;
-          state14_new = new_state_word14;
-          state15_new = new_state_word15;
-          state_we = 1;
+          for (i = 0 ; i < 16 ; i = i + 1)
+            state_new[i] = init_state_word[i];
+          state_we   = 1;
         end // if (init_state)
 
-      else if (update_state)
+      if (update_state)
         begin
+          state_we = 1;
           case (qr_ctr_reg)
-            STATE_QR0:
+            QR0:
               begin
-                x0_new  = qr0_a_prim;
-                x4_new  = qr0_b_prim;
-                x8_new  = qr0_c_prim;
-                x12_new = qr0_d_prim;
-                x0_we   = 1;
-                x4_we   = 1;
-                x8_we   = 1;
-                x12_we  = 1;
-
-                x1_new  = qr1_a_prim;
-                x5_new  = qr1_b_prim;
-                x9_new  = qr1_c_prim;
-                x13_new = qr1_d_prim;
-                x1_we   = 1;
-                x5_we   = 1;
-                x9_we   = 1;
-                x13_we  = 1;
-
-                x2_new  = qr2_a_prim;
-                x6_new  = qr2_b_prim;
-                x10_new = qr2_c_prim;
-                x14_new = qr2_d_prim;
-                x2_we   = 1;
-                x6_we   = 1;
-                x10_we  = 1;
-                x14_we  = 1;
-
-                x3_new  = qr3_a_prim;
-                x7_new  = qr3_b_prim;
-                x11_new = qr3_c_prim;
-                x15_new = qr3_d_prim;
-                x3_we   = 1;
-                x7_we   = 1;
-                x11_we  = 1;
-                x15_we  = 1;
+                qr0_a = state_reg[00];
+                qr0_b = state_reg[04];
+                qr0_c = state_reg[08];
+                qr0_d = state_reg[12];
+                qr1_a = state_reg[01];
+                qr1_b = state_reg[05];
+                qr1_c = state_reg[09];
+                qr1_d = state_reg[13];
+                qr2_a = state_reg[02];
+                qr2_b = state_reg[06];
+                qr2_c = state_reg[10];
+                qr2_d = state_reg[14];
+                qr3_a = state_reg[03];
+                qr3_b = state_reg[07];
+                qr3_c = state_reg[11];
+                qr3_d = state_reg[15];
+                state_new[00] = qr0_a_prim;
+                state_new[04] = qr0_b_prim;
+                state_new[08] = qr0_c_prim;
+                state_new[12] = qr0_d_prim;
+                state_new[01] = qr1_a_prim;
+                state_new[05] = qr1_b_prim;
+                state_new[09] = qr1_c_prim;
+                state_new[13] = qr1_d_prim;
+                state_new[02] = qr2_a_prim;
+                state_new[06] = qr2_b_prim;
+                state_new[10] = qr2_c_prim;
+                state_new[14] = qr2_d_prim;
+                state_new[03] = qr3_a_prim;
+                state_new[07] = qr3_b_prim;
+                state_new[11] = qr3_c_prim;
+                state_new[15] = qr3_d_prim;
               end
 
-            STATE_QR1:
+            QR1:
               begin
-                x0_new  = qr0_a_prim;
-                x5_new  = qr0_b_prim;
-                x10_new = qr0_c_prim;
-                x15_new = qr0_d_prim;
-                x0_we   = 1;
-                x5_we   = 1;
-                x10_we  = 1;
-                x15_we  = 1;
-
-                x1_new  = qr1_a_prim;
-                x6_new  = qr1_b_prim;
-                x11_new = qr1_c_prim;
-                x12_new = qr1_d_prim;
-                x1_we   = 1;
-                x6_we   = 1;
-                x11_we  = 1;
-                x12_we  = 1;
-
-                x2_new  = qr2_a_prim;
-                x7_new  = qr2_b_prim;
-                x8_new  = qr2_c_prim;
-                x13_new = qr2_d_prim;
-                x2_we   = 1;
-                x7_we   = 1;
-                x8_we   = 1;
-                x13_we  = 1;
-
-                x3_new  = qr3_a_prim;
-                x4_new  = qr3_b_prim;
-                x9_new  = qr3_c_prim;
-                x14_new = qr3_d_prim;
-                x3_we   = 1;
-                x4_we   = 1;
-                x9_we   = 1;
-                x14_we  = 1;
+                qr0_a = state_reg[00];
+                qr0_b = state_reg[05];
+                qr0_c = state_reg[10];
+                qr0_d = state_reg[15];
+                qr1_a = state_reg[01];
+                qr1_b = state_reg[06];
+                qr1_c = state_reg[11];
+                qr1_d = state_reg[12];
+                qr2_a = state_reg[02];
+                qr2_b = state_reg[07];
+                qr2_c = state_reg[08];
+                qr2_d = state_reg[13];
+                qr3_a = state_reg[03];
+                qr3_b = state_reg[04];
+                qr3_c = state_reg[09];
+                qr3_d = state_reg[14];
+                state_new[00] = qr0_a_prim;
+                state_new[05] = qr0_b_prim;
+                state_new[10] = qr0_c_prim;
+                state_new[15] = qr0_d_prim;
+                state_new[01] = qr1_a_prim;
+                state_new[06] = qr1_b_prim;
+                state_new[11] = qr1_c_prim;
+                state_new[12] = qr1_d_prim;
+                state_new[02] = qr2_a_prim;
+                state_new[07] = qr2_b_prim;
+                state_new[08] = qr2_c_prim;
+                state_new[13] = qr2_d_prim;
+                state_new[03] = qr3_a_prim;
+                state_new[04] = qr3_b_prim;
+                state_new[09] = qr3_c_prim;
+                state_new[14] = qr3_d_prim;
               end
           endcase // case (quarterround_select)
         end // if (update_state)
@@ -1076,59 +485,37 @@ module chacha_core(
 
 
   //----------------------------------------------------------------
-  // quarterround_mux
-  // Quarterround muxes that selects operands for quarterrounds.
+  // data_out_logic
+  // Final output logic that combines the result from state
+  // update with the input block. This adds a 16 rounds and
+  // a final layer of XOR gates.
+  //
+  // Note that we also remap all the words into LSB format.
   //----------------------------------------------------------------
   always @*
-    begin : quarterround_mux
-      case (qr_ctr_reg)
-          STATE_QR0:
-            begin
-              qr0_a = x0_reg;
-              qr0_b = x4_reg;
-              qr0_c = x8_reg;
-              qr0_d = x12_reg;
-
-              qr1_a = x1_reg;
-              qr1_b = x5_reg;
-              qr1_c = x9_reg;
-              qr1_d = x13_reg;
-
-              qr2_a = x2_reg;
-              qr2_b = x6_reg;
-              qr2_c = x10_reg;
-              qr2_d = x14_reg;
-
-              qr3_a = x3_reg;
-              qr3_b = x7_reg;
-              qr3_c = x11_reg;
-              qr3_d = x15_reg;
-            end
+    begin : data_out_logic
+      integer i;
+      reg [31 : 0] msb_block_state [0 : 15];
+      reg [31 : 0] lsb_block_state [0 : 15];
+      reg [511 : 0] block_state;
 
-          STATE_QR1:
-            begin
-              qr0_a = x0_reg;
-              qr0_b = x5_reg;
-              qr0_c = x10_reg;
-              qr0_d = x15_reg;
-
-              qr1_a = x1_reg;
-              qr1_b = x6_reg;
-              qr1_c = x11_reg;
-              qr1_d = x12_reg;
-
-              qr2_a = x2_reg;
-              qr2_b = x7_reg;
-              qr2_c = x8_reg;
-              qr2_d = x13_reg;
-
-              qr3_a = x3_reg;
-              qr3_b = x4_reg;
-              qr3_c = x9_reg;
-              qr3_d = x14_reg;
-            end
-      endcase // case (quarterround_select)
-    end // quarterround_mux
+      for (i = 0 ; i < 16 ; i = i + 1)
+        begin
+          msb_block_state[i] = init_state_word[i] + state_reg[i];
+          lsb_block_state[i] = l2b(msb_block_state[i][31 : 0]);
+        end
+
+      block_state = {lsb_block_state[00], lsb_block_state[01],
+                     lsb_block_state[02], lsb_block_state[03],
+                     lsb_block_state[04], lsb_block_state[05],
+                     lsb_block_state[06], lsb_block_state[07],
+                     lsb_block_state[08], lsb_block_state[09],
+                     lsb_block_state[10], lsb_block_state[11],
+                     lsb_block_state[12], lsb_block_state[13],
+                     lsb_block_state[14], lsb_block_state[15]};
+
+      data_out_new = data_in ^ block_state;
+    end // data_out_logic
 
 
   //----------------------------------------------------------------
@@ -1186,13 +573,12 @@ module chacha_core(
   //----------------------------------------------------------------
   always @*
     begin : block_ctr
-      // Defult assignments
-      block0_ctr_new = 32'h00000000;
-      block1_ctr_new = 32'h00000000;
+      block0_ctr_new = 32'h0;
+      block1_ctr_new = 32'h0;
       block0_ctr_we = 0;
       block1_ctr_we = 0;
 
-      if (block_ctr_rst)
+      if (block_ctr_set)
         begin
           block0_ctr_new = ctr[31 : 00];
           block1_ctr_new = ctr[63 : 32];
@@ -1223,46 +609,33 @@ module chacha_core(
     begin : chacha_ctrl_fsm
       init_state         = 0;
       update_state       = 0;
-      sample_params      = 0;
       update_output      = 0;
-
       qr_ctr_inc         = 0;
       qr_ctr_rst         = 0;
-
       dr_ctr_inc         = 0;
       dr_ctr_rst         = 0;
-
       block_ctr_inc      = 0;
-      block_ctr_rst      = 0;
-
-      data_in_we         = 0;
-
+      block_ctr_set      = 0;
       ready_new          = 0;
       ready_we           = 0;
-
       data_out_valid_new = 0;
       data_out_valid_we  = 0;
-
       chacha_ctrl_new    = CTRL_IDLE;
       chacha_ctrl_we     = 0;
 
-
       case (chacha_ctrl_reg)
         CTRL_IDLE:
           begin
             if (init)
               begin
+                block_ctr_set   = 1;
                 ready_new       = 0;
                 ready_we        = 1;
-                data_in_we      = 1;
-                sample_params   = 1;
-                block_ctr_rst   = 1;
                 chacha_ctrl_new = CTRL_INIT;
                 chacha_ctrl_we  = 1;
               end
           end
 
-
         CTRL_INIT:
           begin
             init_state      = 1;
@@ -1272,15 +645,14 @@ module chacha_core(
             chacha_ctrl_we  = 1;
           end
 
-
         CTRL_ROUNDS:
           begin
             update_state = 1;
             qr_ctr_inc   = 1;
-            if (qr_ctr_reg == STATE_QR1)
+            if (qr_ctr_reg == QR1)
               begin
                 dr_ctr_inc = 1;
-                if (dr_ctr_reg == (rounds_reg - 1))
+                if (dr_ctr_reg == (rounds[4 : 1] - 1))
                   begin
                     chacha_ctrl_new = CTRL_FINALIZE;
                     chacha_ctrl_we  = 1;
@@ -1288,7 +660,6 @@ module chacha_core(
               end
           end
 
-
         CTRL_FINALIZE:
           begin
             ready_new          = 1;
@@ -1300,7 +671,6 @@ module chacha_core(
             chacha_ctrl_we     = 1;
           end
 
-
         CTRL_DONE:
           begin
             if (init)
@@ -1309,9 +679,7 @@ module chacha_core(
                 ready_we           = 1;
                 data_out_valid_new = 0;
                 data_out_valid_we  = 1;
-                data_in_we         = 1;
-                sample_params      = 1;
-                block_ctr_rst      = 1;
+                block_ctr_set      = 1;
                 chacha_ctrl_new    = CTRL_INIT;
                 chacha_ctrl_we     = 1;
               end
@@ -1321,14 +689,12 @@ module chacha_core(
                 ready_we           = 1;
                 data_out_valid_new = 0;
                 data_out_valid_we  = 1;
-                data_in_we         = 1;
                 block_ctr_inc      = 1;
                 chacha_ctrl_new    = CTRL_INIT;
                 chacha_ctrl_we     = 1;
               end
           end
 
-
         default:
           begin
 
diff --git a/src/tb/tb_chacha.v b/src/tb/tb_chacha.v
index ac2d3b0..fab2ad7 100644
--- a/src/tb/tb_chacha.v
+++ b/src/tb/tb_chacha.v
@@ -36,19 +36,15 @@
 //
 //======================================================================
 
-//------------------------------------------------------------------
-// Simulator directives.
-//------------------------------------------------------------------
-`timescale 1ns/100ps
-
 module tb_chacha();
 
   //----------------------------------------------------------------
   // Internal constant and parameter definitions.
   //----------------------------------------------------------------
-  localparam DEBUG = 0;
+  localparam DEBUG = 1;
 
-  localparam CLK_HALF_PERIOD = 2;
+  localparam CLK_HALF_PERIOD = 1;
+  localparam CLK_PERIOD = 2 * CLK_HALF_PERIOD;
 
   localparam TC1  = 1;
   localparam TC2  = 2;
@@ -111,20 +107,6 @@ module tb_chacha();
   localparam ADDR_IV1         = 8'h21;
 
   localparam ADDR_DATA_IN0    = 8'h40;
-  localparam ADDR_DATA_IN1    = 8'h41;
-  localparam ADDR_DATA_IN2    = 8'h42;
-  localparam ADDR_DATA_IN3    = 8'h43;
-  localparam ADDR_DATA_IN4    = 8'h44;
-  localparam ADDR_DATA_IN5    = 8'h45;
-  localparam ADDR_DATA_IN6    = 8'h46;
-  localparam ADDR_DATA_IN7    = 8'h47;
-  localparam ADDR_DATA_IN8    = 8'h48;
-  localparam ADDR_DATA_IN9    = 8'h49;
-  localparam ADDR_DATA_IN10   = 8'h4a;
-  localparam ADDR_DATA_IN11   = 8'h4b;
-  localparam ADDR_DATA_IN12   = 8'h4c;
-  localparam ADDR_DATA_IN13   = 8'h4d;
-  localparam ADDR_DATA_IN14   = 8'h4e;
   localparam ADDR_DATA_IN15   = 8'h4f;
 
   localparam ADDR_DATA_OUT0   = 8'h80;
@@ -170,25 +152,20 @@ module tb_chacha();
 
   reg display_cycle_ctr;
   reg display_read_write;
+  reg display_core_state;
 
 
   //----------------------------------------------------------------
   // Chacha device under test.
   //----------------------------------------------------------------
   chacha dut(
-             // Clock and reset.
              .clk(tb_clk),
              .reset_n(tb_reset_n),
-
-             // Control.
              .cs(tb_cs),
              .we(tb_write_read),
-
-             // Data ports.
              .address(tb_address),
              .write_data(tb_data_in),
-             .read_data(tb_data_out),
-             .error(tb_error)
+             .read_data(tb_data_out)
             );
 
 
@@ -218,6 +195,24 @@ module tb_chacha();
           $display("cycle = %016x:", cycle_ctr);
         end
 
+      if (display_core_state)
+        begin
+          $display("core ctrl: 0x%02x, core_qr_ctr: 0x%02x, core_dr_ctr: 0x%02x, init: 0x%01x, next: 0x%01x, core_ready: 0x%02x",
+                   dut.core.chacha_ctrl_reg, dut.core.qr_ctr_reg,
+                   dut.core.dr_ctr_reg, dut.core.init,
+                   dut.core.next, dut.core.ready_reg);
+
+          $display("state0_reg  = 0x%08x, state1_reg  = 0x%08x, state2_reg  = 0x%08x, state3_reg  = 0x%08x",
+                   dut.core.state_reg[00], dut.core.state_reg[01], dut.core.state_reg[02], dut.core.state_reg[03]);
+          $display("state4_reg  = 0x%08x, state5_reg  = 0x%08x, state6_reg  = 0x%08x, state7_reg  = 0x%08x",
+                   dut.core.state_reg[04], dut.core.state_reg[05], dut.core.state_reg[06], dut.core.state_reg[07]);
+          $display("state8_reg  = 0x%08x, state9_reg  = 0x%08x, state10_reg = 0x%08x, state11_reg = 0x%08x",
+                   dut.core.state_reg[08], dut.core.state_reg[09], dut.core.state_reg[10], dut.core.state_reg[11]);
+          $display("state12_reg = 0x%08x, state13_reg = 0x%08x, state14_reg = 0x%08x, state15_reg = 0x%08x",
+                   dut.core.state_reg[12], dut.core.state_reg[13], dut.core.state_reg[14], dut.core.state_reg[15]);
+          $display("");
+        end
+
       if (display_read_write)
         begin
 
@@ -238,31 +233,39 @@ module tb_chacha();
 
 
   //----------------------------------------------------------------
-  // set_display_prefs()
-  //
-  // Set the different monitor displays we want to see during
-  // simulation.
+  // reset_dut
   //----------------------------------------------------------------
-  task set_display_prefs(
-                         input cycles,
-                         input read_write);
+  task reset_dut;
     begin
-      display_cycle_ctr  = cycles;
-      display_read_write = read_write;
+      tb_reset_n = 0;
+      #(2 * CLK_PERIOD);
+      tb_reset_n = 1;
     end
-  endtask // set_display_prefs
+  endtask // reset_dut
 
 
   //----------------------------------------------------------------
-  // reset_dut
+  // init_sim()
+  //
+  // Set the input to the DUT to defined values.
   //----------------------------------------------------------------
-  task reset_dut();
+  task init_sim;
     begin
-      tb_reset_n = 0;
-      #(4 * CLK_HALF_PERIOD);
-      tb_reset_n = 1;
+      cycle_ctr     = 0;
+      error_ctr     = 0;
+      tc_ctr        = 0;
+      tb_clk        = 0;
+      tb_reset_n    = 0;
+      tb_cs         = 0;
+      tb_write_read = 0;
+      tb_address    = 8'h0;
+      tb_data_in    = 32'h0;
+
+      display_cycle_ctr  = 0;
+      display_read_write = 0;
+      display_core_state = 0;
     end
-  endtask // reset_dut
+  endtask // init_sim
 
 
   //----------------------------------------------------------------
@@ -276,11 +279,11 @@ module tb_chacha();
       tb_cs         = 1;
       tb_write_read = 0;
       tb_address    = addr;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
       tb_cs         = 0;
       tb_write_read = 0;
-      tb_address    = 8'h00;
-      tb_data_in    = 32'h00000000;
+      tb_address    = 8'h0;
+      tb_data_in    = 32'h0;
     end
   endtask // read_reg
 
@@ -296,11 +299,11 @@ module tb_chacha();
       tb_write_read = 1;
       tb_address    = addr;
       tb_data_in    = data;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
       tb_cs         = 0;
       tb_write_read = 0;
-      tb_address    = 8'h00;
-      tb_data_in    = 32'h00000000;
+      tb_address    = 8'h0;
+      tb_data_in    = 32'h0;
     end
   endtask // write_reg
 
@@ -310,36 +313,49 @@ module tb_chacha();
   //
   // Dump the internal state of the top to std out.
   //----------------------------------------------------------------
-  task dump_top_state();
+  task dump_top_state;
     begin
       $display("");
       $display("Top internal state");
       $display("------------------");
       $display("init_reg   = %01x", dut.init_reg);
       $display("next_reg   = %01x", dut.next_reg);
-      $display("ready_reg  = %01x", dut.ready_reg);
       $display("keylen_reg = %01x", dut.keylen_reg);
       $display("rounds_reg = %01x", dut.rounds_reg);
       $display("");
 
-      $display("key0_reg = %08x, key1_reg  = %08x, key2_reg = %08x, key3_reg  = %08x", dut.key0_reg, dut.key1_reg, dut.key2_reg, dut.key3_reg);
-      $display("key4_reg = %08x, key5_reg  = %08x, key6_reg = %08x, key7_reg  = %08x", dut.key4_reg, dut.key5_reg, dut.key6_reg, dut.key7_reg);
+      $display("key0_reg = %08x, key1_reg  = %08x, key2_reg = %08x, key3_reg  = %08x",
+               dut.key_reg[0], dut.key_reg[1], dut.key_reg[2], dut.key_reg[3]);
+      $display("key4_reg = %08x, key5_reg  = %08x, key6_reg = %08x, key7_reg  = %08x",
+               dut.key_reg[4], dut.key_reg[5], dut.key_reg[6], dut.key_reg[7]);
       $display("");
 
-      $display("iv0_reg = %08x, iv1_reg = %08x", dut.iv0_reg, dut.iv1_reg);
+      $display("iv0_reg = %08x, iv1_reg = %08x", dut.iv_reg[0], dut.iv_reg[1]);
       $display("");
 
-      $display("data_in0_reg  = %08x, data_in1_reg   = %08x, data_in2_reg  = %08x, data_in3_reg   = %08x", dut.data_in0_reg, dut.data_in1_reg, dut.data_in2_reg, dut.data_in3_reg);
-      $display("data_in4_reg  = %08x, data_in5_reg   = %08x, data_in6_reg  = %08x, data_in7_reg   = %08x", dut.data_in4_reg, dut.data_in5_reg, dut.data_in6_reg, dut.data_in7_reg);
-      $display("data_in8_reg  = %08x, data_in9_reg   = %08x, data_in10_reg = %08x, data_in11_reg  = %08x", dut.data_in8_reg, dut.data_in9_reg, dut.data_in10_reg, dut.data_in11_reg);
-      $display("data_in12_reg = %08x, data_in13_reg  = %08x, data_in14_reg = %08x, data_in15_reg  = %08x", dut.data_in12_reg, dut.data_in13_reg, dut.data_in14_reg, dut.data_in15_reg);
+      $display("data_in0_reg  = %08x, data_in1_reg   = %08x, data_in2_reg  = %08x, data_in3_reg   = %08x",
+               dut.data_in_reg[00], dut.data_in_reg[01], dut.data_in_reg[02], dut.data_in_reg[03]);
+      $display("data_in4_reg  = %08x, data_in5_reg   = %08x, data_in6_reg  = %08x, data_in7_reg   = %08x",
+               dut.data_in_reg[04], dut.data_in_reg[05], dut.data_in_reg[06], dut.data_in_reg[07]);
+      $display("data_in8_reg  = %08x, data_in9_reg   = %08x, data_in10_reg = %08x, data_in11_reg  = %08x",
+               dut.data_in_reg[08], dut.data_in_reg[09], dut.data_in_reg[10], dut.data_in_reg[11]);
+      $display("data_in12_reg = %08x, data_in13_reg  = %08x, data_in14_reg = %08x, data_in15_reg  = %08x",
+               dut.data_in_reg[12], dut.data_in_reg[13], dut.data_in_reg[14], dut.data_in_reg[15]);
       $display("");
 
-      $display("data_out_valid_reg = %01x", dut.data_out_valid_reg);
-      $display("data_out0_reg  = %08x, data_out1_reg   = %08x, data_out2_reg  = %08x, data_out3_reg   = %08x", dut.data_out0_reg, dut.data_out1_reg, dut.data_out2_reg, dut.data_out3_reg);
-      $display("data_out4_reg  = %08x, data_out5_reg   = %08x, data_out6_reg  = %08x, data_out7_reg   = %08x", dut.data_out4_reg, dut.data_out5_reg, dut.data_out6_reg, dut.data_out7_reg);
-      $display("data_out8_reg  = %08x, data_out9_reg   = %08x, data_out10_reg = %08x, data_out11_reg  = %08x", dut.data_out8_reg, dut.data_out9_reg, dut.data_out10_reg, dut.data_out11_reg);
-      $display("data_out12_reg = %08x, data_out13_reg  = %08x, data_out14_reg = %08x, data_out15_reg  = %08x", dut.data_out12_reg, dut.data_out13_reg, dut.data_out14_reg, dut.data_out15_reg);
+      $display("ready = 0x%01x, data_out_valid = %01x", dut.core_ready, dut.core_data_out_valid);
+      $display("data_out00 = %08x, data_out01 = %08x, data_out02 = %08x, data_out03 = %08x",
+               dut.core_data_out[511 : 480], dut.core_data_out[479 : 448],
+               dut.core_data_out[447 : 416], dut.core_data_out[415 : 384]);
+      $display("data_out04 = %08x, data_out05 = %08x, data_out06 = %08x, data_out07 = %08x",
+               dut.core_data_out[383 : 352], dut.core_data_out[351 : 320],
+               dut.core_data_out[319 : 288], dut.core_data_out[287 : 256]);
+      $display("data_out08 = %08x, data_out09 = %08x, data_out10 = %08x, data_out11 = %08x",
+               dut.core_data_out[255 : 224], dut.core_data_out[223 : 192],
+               dut.core_data_out[191 : 160], dut.core_data_out[159 : 128]);
+      $display("data_out12 = %08x, data_out13 = %08x, data_out14 = %08x, data_out15 = %08x",
+               dut.core_data_out[127 :  96], dut.core_data_out[95  :  64],
+               dut.core_data_out[63  :  32], dut.core_data_out[31  :   0]);
       $display("");
     end
   endtask // dump_top_state
@@ -350,27 +366,23 @@ module tb_chacha();
   //
   // Dump the internal state of the core to std out.
   //----------------------------------------------------------------
-  task dump_core_state();
+  task dump_core_state;
     begin
       $display("");
       $display("Core internal state");
       $display("-------------------");
-//      $display("Internal data state vector:");
-//      $display("0x%064x", dut.core.state_reg);
-      $display("");
-
-      $display("Round state X:");
-      $display("x0_reg  = %08x, x1_reg  = %08x", dut.core.x0_reg, dut.core.x1_reg);
-      $display("x2_reg  = %08x, x3_reg  = %08x", dut.core.x2_reg, dut.core.x3_reg);
-      $display("x4_reg  = %08x, x5_reg  = %08x", dut.core.x4_reg, dut.core.x5_reg);
-      $display("x6_reg  = %08x, x7_reg  = %08x", dut.core.x6_reg, dut.core.x7_reg);
-      $display("x8_reg  = %08x, x9_reg  = %08x", dut.core.x8_reg, dut.core.x9_reg);
-      $display("x10_reg = %08x, x11_reg = %08x", dut.core.x10_reg, dut.core.x11_reg);
-      $display("x12_reg = %08x, x13_reg = %08x", dut.core.x12_reg, dut.core.x13_reg);
-      $display("x14_reg = %08x, x15_reg = %08x", dut.core.x14_reg, dut.core.x15_reg);
+      $display("Round state:");
+      $display("state0_reg  = 0x%08x, state1_reg  = 0x%08x, state2_reg  = 0x%08x, state3_reg  = 0x%08x",
+               dut.core.state_reg[00], dut.core.state_reg[01], dut.core.state_reg[02], dut.core.state_reg[03]);
+      $display("state4_reg  = 0x%08x, state5_reg  = 0x%08x, state6_reg  = 0x%08x, state7_reg  = 0x%08x",
+               dut.core.state_reg[04], dut.core.state_reg[05], dut.core.state_reg[06], dut.core.state_reg[07]);
+      $display("state8_reg  = 0x%08x, state9_reg  = 0x%08x, state10_reg = 0x%08x, state11_reg = 0x%08x",
+               dut.core.state_reg[08], dut.core.state_reg[09], dut.core.state_reg[10], dut.core.state_reg[11]);
+      $display("state12_reg = 0x%08x, state13_reg = 0x%08x, state14_reg = 0x%08x, state15_reg = 0x%08x",
+               dut.core.state_reg[12], dut.core.state_reg[13], dut.core.state_reg[14], dut.core.state_reg[15]);
       $display("");
 
-      $display("rounds_reg = %01x", dut.core.rounds_reg);
+      $display("rounds = %01x", dut.core.rounds);
       $display("qr_ctr_reg = %01x, dr_ctr_reg  = %01x", dut.core.qr_ctr_reg, dut.core.dr_ctr_reg);
       $display("block0_ctr_reg = %08x, block1_ctr_reg = %08x", dut.core.block0_ctr_reg, dut.core.block1_ctr_reg);
 
@@ -379,7 +391,7 @@ module tb_chacha();
       $display("chacha_ctrl_reg = %02x", dut.core.chacha_ctrl_reg);
       $display("");
 
-      $display("data_in_reg = %064x", dut.core.data_in_reg);
+      $display("data_in = %064x", dut.core.data_in);
       $display("data_out_valid_reg = %01x", dut.core.data_out_valid_reg);
       $display("");
 
@@ -395,7 +407,7 @@ module tb_chacha();
   //
   // Display the accumulated test results.
   //----------------------------------------------------------------
-  task display_test_result();
+  task display_test_result;
     begin
       if (error_ctr == 0)
         begin
@@ -410,28 +422,6 @@ module tb_chacha();
 
 
   //----------------------------------------------------------------
-  // init_dut()
-  //
-  // Set the input to the DUT to defined values.
-  //----------------------------------------------------------------
-  task init_dut();
-    begin
-      // Set clock, reset and DUT input signals to
-      // defined values at simulation start.
-      cycle_ctr     = 0;
-      error_ctr     = 0;
-      tc_ctr        = 0;
-      tb_clk        = 0;
-      tb_reset_n    = 0;
-      tb_cs         = 0;
-      tb_write_read = 0;
-      tb_address    = 8'h00;
-      tb_data_in    = 32'h00000000;
-    end
-  endtask // init_dut
-
-
-  //----------------------------------------------------------------
   // read_write_test()
   //
   // Simple test case that tries to read and write to the
@@ -439,7 +429,7 @@ module tb_chacha();
   //
   // Note: Currently not self testing. No expected values.
   //----------------------------------------------------------------
-  task read_write_test();
+  task read_write_test;
     begin
       tc_ctr = tc_ctr + 1;
 
@@ -484,8 +474,8 @@ module tb_chacha();
       write_reg(ADDR_KEY7, key[31 :    0]);
       write_reg(ADDR_IV0, iv[63 : 32]);
       write_reg(ADDR_IV1, iv[31 : 0]);
-      write_reg(ADDR_KEYLEN, {{31'b0000000000000000000000000000000}, key_length});
-      write_reg(ADDR_ROUNDS, {{27'b000000000000000000000000000}, rounds});
+      write_reg(ADDR_KEYLEN, {{31'h0}, key_length});
+      write_reg(ADDR_ROUNDS, {{27'h0}, rounds});
     end
   endtask // write_parameters
 
@@ -499,10 +489,10 @@ module tb_chacha();
   // Note: It is the callers responsibility to call the function
   // when the dut is ready to react on the init signal.
   //----------------------------------------------------------------
-  task start_init_block();
+  task start_init_block;
     begin
       write_reg(ADDR_CTRL, 32'h00000001);
-      #(4 * CLK_HALF_PERIOD);
+      #(2 * CLK_PERIOD);
       write_reg(ADDR_CTRL, 32'h00000000);
     end
   endtask // start_init_block
@@ -517,17 +507,17 @@ module tb_chacha();
   // Note: It is the callers responsibility to call the function
   // when the dut is ready to react on the next signal.
   //----------------------------------------------------------------
-  task start_next_block();
+  task start_next_block;
     begin
       write_reg(ADDR_CTRL, 32'h00000002);
-      #(4 * CLK_HALF_PERIOD);
+      #(2 * CLK_PERIOD);
       write_reg(ADDR_CTRL, 32'h00000000);
 
       if (DEBUG)
         begin
           $display("Debug of next state.");
           dump_core_state();
-          #(4 * CLK_HALF_PERIOD);
+          #(2 * CLK_PERIOD);
           dump_core_state();
         end
     end
@@ -543,7 +533,7 @@ module tb_chacha();
   // when the dut is actively processing and will in fact at some
   // point set the flag.
   //----------------------------------------------------------------
-  task wait_ready();
+  task wait_ready;
     begin
       while (!tb_data_out[STATUS_READY_BIT])
         begin
@@ -559,7 +549,7 @@ module tb_chacha();
   // Extracts all 16 data out words and combine them into the
   // global extracted_data.
   //----------------------------------------------------------------
-  task extract_data();
+  task extract_data;
     begin
       read_reg(ADDR_DATA_OUT0);
       extracted_data[511 : 480] = tb_data_out;
@@ -598,6 +588,34 @@ module tb_chacha();
 
 
   //----------------------------------------------------------------
+  // check_name_version()
+  //
+  // Read the name and version from the DUT.
+  //----------------------------------------------------------------
+  task check_name_version;
+    reg [31 : 0] name0;
+    reg [31 : 0] name1;
+    reg [31 : 0] version;
+    begin
+      $display("*** Trying to read name and version from core.");
+      read_reg(ADDR_NAME0);
+      name0 = tb_data_out;
+      read_reg(ADDR_NAME1);
+      name1 = tb_data_out;
+      read_reg(ADDR_VERSION);
+      version = tb_data_out;
+
+      $display("DUT name: %c%c%c%c%c%c%c%c",
+               name0[31 : 24], name0[23 : 16], name0[15 : 8], name0[7 : 0],
+               name1[31 : 24], name1[23 : 16], name1[15 : 8], name1[7 : 0]);
+      $display("DUT version: %c%c%c%c",
+               version[31 : 24], version[23 : 16], version[15 : 8], version[7 : 0]);
+      $display("");
+    end
+  endtask // check_name_version
+
+
+  //----------------------------------------------------------------
   // run_two_blocks_test_vector()
   //
   // Runs a test case with two blocks based on the given
@@ -616,7 +634,6 @@ module tb_chacha();
       $display("***TC%2d-%2d started", major, minor);
       $display("***-----------------");
       write_parameters(key, key_length, iv, rounds);
-
       start_init_block();
       wait_ready();
       extract_data();
@@ -690,7 +707,10 @@ module tb_chacha();
       write_parameters(key, key_length, iv, rounds);
 
       start_init_block();
+      $display("*** Started.");
       wait_ready();
+      $display("*** Ready seen.");
+      dump_top_state();
       extract_data();
 
       if (extracted_data != expected)
@@ -720,18 +740,20 @@ module tb_chacha();
   initial
     begin : chacha_test
       $display("   -- Testbench for chacha started --");
-      init_dut();
-      set_display_prefs(0, 0);
+      init_sim();
       reset_dut();
 
       $display("State at init after reset:");
       dump_top_state();
 
+      // Check name and version.
+      check_name_version();
+
       $display("TC1-1: All zero inputs. 128 bit key, 8 rounds.");
       run_test_vector(TC1, ONE,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_128_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     EIGHT_ROUNDS,
                     512'he28a5fa4a67f8c5defed3e6fb7303486aa8427d31419a729572d777953491120b64ab8e72b8deb85cd6aea7cb6089a101824beeb08814a428aab1fa2c816081b);
 
diff --git a/src/tb/tb_chacha_core.v b/src/tb/tb_chacha_core.v
index a1ddfdc..a36899f 100644
--- a/src/tb/tb_chacha_core.v
+++ b/src/tb/tb_chacha_core.v
@@ -36,17 +36,13 @@
 //
 //======================================================================
 
-//------------------------------------------------------------------
-// Simulator directives.
-//------------------------------------------------------------------
-`timescale 1ns/100ps
-
 module tb_chacha_core();
 
   //----------------------------------------------------------------
   // Internal constant and parameter definitions.
   //----------------------------------------------------------------
   parameter CLK_HALF_PERIOD = 2;
+  parameter CLK_PERIOD = 2 * CLK_HALF_PERIOD;
 
   parameter TC1  = 1;
   parameter TC2  = 2;
@@ -78,7 +74,7 @@ module tb_chacha_core();
   parameter DISABLE = 0;
   parameter ENABLE  = 1;
 
-  parameter DEFAULT_CTR_INIT = 64'h0000000000000000;
+  localparam DEFAULT_CTR_INIT = 64'h0;
 
 
   //----------------------------------------------------------------
@@ -105,7 +101,6 @@ module tb_chacha_core();
   reg            display_ctrl_and_ctrs;
   reg            display_qround;
   reg            display_state;
-  reg            display_x_state;
 
 
   //----------------------------------------------------------------
@@ -180,31 +175,24 @@ module tb_chacha_core();
       // Display the internal state register.
       if (display_state)
         begin
-//          $display("Internal state:");
-//          $display("0x%064x", dut.state_reg);
-//          $display("");
-        end
-
-      // Display the round processing state register X.
-      if (display_x_state)
-        begin
-          $display("Round state X:");
-          $display("x0_reg   = 0x%08x, x0_new   = 0x%08x, x0_we  = 0x%01x", dut.x0_reg,  dut.x0_new,  dut.x0_we);
-          $display("x1_reg   = 0x%08x, x1_new   = 0x%08x, x1_we  = 0x%01x", dut.x1_reg,  dut.x1_new,  dut.x1_we);
-          $display("x2_reg   = 0x%08x, x2_new   = 0x%08x, x2_we  = 0x%01x", dut.x2_reg,  dut.x2_new,  dut.x2_we);
-          $display("x3_reg   = 0x%08x, x3_new   = 0x%08x, x3_we  = 0x%01x", dut.x3_reg,  dut.x3_new,  dut.x3_we);
-          $display("x4_reg   = 0x%08x, x4_new   = 0x%08x, x4_we  = 0x%01x", dut.x4_reg,  dut.x4_new,  dut.x4_we);
-          $display("x5_reg   = 0x%08x, x5_new   = 0x%08x, x5_we  = 0x%01x", dut.x5_reg,  dut.x5_new,  dut.x5_we);
-          $display("x6_reg   = 0x%08x, x6_new   = 0x%08x, x6_we  = 0x%01x", dut.x6_reg,  dut.x6_new,  dut.x6_we);
-          $display("x7_reg   = 0x%08x, x7_new   = 0x%08x, x7_we  = 0x%01x", dut.x7_reg,  dut.x7_new,  dut.x7_we);
-          $display("x8_reg   = 0x%08x, x8_new   = 0x%08x, x8_we  = 0x%01x", dut.x8_reg,  dut.x8_new,  dut.x8_we);
-          $display("x9_reg   = 0x%08x, x9_new   = 0x%08x, x9_we  = 0x%01x", dut.x9_reg,  dut.x9_new,  dut.x9_we);
-          $display("x10_reg  = 0x%08x, x10_new  = 0x%08x, x10_we = 0x%01x", dut.x10_reg, dut.x10_new, dut.x10_we);
-          $display("x11_reg  = 0x%08x, x11_new  = 0x%08x, x11_we = 0x%01x", dut.x11_reg, dut.x11_new, dut.x11_we);
-          $display("x12_reg  = 0x%08x, x12_new  = 0x%08x, x12_we = 0x%01x", dut.x12_reg, dut.x12_new, dut.x12_we);
-          $display("x13_reg  = 0x%08x, x13_new  = 0x%08x, x13_we = 0x%01x", dut.x13_reg, dut.x13_new, dut.x13_we);
-          $display("x14_reg  = 0x%08x, x14_new  = 0x%08x, x14_we = 0x%01x", dut.x14_reg, dut.x14_new, dut.x14_we);
-          $display("x15_reg  = 0x%08x, x15_new  = 0x%08x, x15_we = 0x%01x", dut.x15_reg, dut.x15_new, dut.x15_we);
+          $display("Round state:");
+          $display("state0_reg  = 0x%08x, state0_new  = 0x%08x", dut.state_reg[00], dut.state_new[00]);
+          $display("state1_reg  = 0x%08x, state1_new  = 0x%08x", dut.state_reg[01], dut.state_new[01]);
+          $display("state2_reg  = 0x%08x, state2_new  = 0x%08x", dut.state_reg[02], dut.state_new[02]);
+          $display("state3_reg  = 0x%08x, state3_new  = 0x%08x", dut.state_reg[03], dut.state_new[03]);
+          $display("state4_reg  = 0x%08x, state4_new  = 0x%08x", dut.state_reg[04], dut.state_new[04]);
+          $display("state5_reg  = 0x%08x, state5_new  = 0x%08x", dut.state_reg[05], dut.state_new[05]);
+          $display("state6_reg  = 0x%08x, state6_new  = 0x%08x", dut.state_reg[06], dut.state_new[06]);
+          $display("state7_reg  = 0x%08x, state7_new  = 0x%08x", dut.state_reg[07], dut.state_new[07]);
+          $display("state8_reg  = 0x%08x, state8_new  = 0x%08x", dut.state_reg[08], dut.state_new[08]);
+          $display("state9_reg  = 0x%08x, state9_new  = 0x%08x", dut.state_reg[09], dut.state_new[09]);
+          $display("state10_reg = 0x%08x, state10_new = 0x%08x", dut.state_reg[10], dut.state_new[10]);
+          $display("state11_reg = 0x%08x, state11_new = 0x%08x", dut.state_reg[11], dut.state_new[11]);
+          $display("state12_reg = 0x%08x, state12_new = 0x%08x", dut.state_reg[12], dut.state_new[12]);
+          $display("state13_reg = 0x%08x, state13_new = 0x%08x", dut.state_reg[13], dut.state_new[13]);
+          $display("state14_reg = 0x%08x, state14_new = 0x%08x", dut.state_reg[14], dut.state_new[14]);
+          $display("state15_reg = 0x%08x, state15_new = 0x%08x", dut.state_reg[15], dut.state_new[15]);
+          $display("state_we    = 0x%01x", dut.state_we);
           $display("");
         end
 
@@ -215,7 +203,6 @@ module tb_chacha_core();
           $display("qr0_a_prim = %08x, qr0_b_prim = %08x, qr0_c_prim = %08x, qr0_d_prim = %08x", dut.qr0_a_prim, dut.qr0_b_prim, dut.qr0_c_prim, dut.qr0_d_prim);
           $display("");
         end
-
     end // dut_monitor
 
 
@@ -223,26 +210,23 @@ module tb_chacha_core();
   // dump_state()
   // Dump the internal CHACHA state to std out.
   //----------------------------------------------------------------
-  task dump_state();
+  task dump_state;
     begin
       $display("");
       $display("Internal state:");
       $display("---------------");
-//      $display("0x%064x", dut.state_reg);
-//      $display("");
-
-      $display("Round state X::");
-      $display("x0_reg  = %08x, x1_reg  = %08x", dut.x0_reg, dut.x1_reg);
-      $display("x2_reg  = %08x, x3_reg  = %08x", dut.x2_reg, dut.x3_reg);
-      $display("x4_reg  = %08x, x5_reg  = %08x", dut.x4_reg, dut.x5_reg);
-      $display("x6_reg  = %08x, x7_reg  = %08x", dut.x6_reg, dut.x7_reg);
-      $display("x8_reg  = %08x, x9_reg  = %08x", dut.x8_reg, dut.x9_reg);
-      $display("x10_reg = %08x, x11_reg = %08x", dut.x10_reg, dut.x11_reg);
-      $display("x12_reg = %08x, x13_reg = %08x", dut.x12_reg, dut.x13_reg);
-      $display("x14_reg = %08x, x15_reg = %08x", dut.x14_reg, dut.x15_reg);
+      $display("Round state:");
+      $display("state0_reg  = %08x, state1_reg  = %08x, state2_reg  = %08x, state3_reg  = %08x",
+               dut.state_reg[00],  dut.state_reg[01], dut.state_reg[02],  dut.state_reg[03]);
+      $display("state4_reg  = %08x, state5_reg  = %08x, state6_reg  = %08x, state7_reg  = %08x",
+               dut.state_reg[04],  dut.state_reg[05], dut.state_reg[06],  dut.state_reg[07]);
+      $display("state8_reg  = %08x, state9_reg  = %08x, state10_reg = %08x, state11_reg = %08x",
+               dut.state_reg[08],  dut.state_reg[09], dut.state_reg[10],  dut.state_reg[11]);
+      $display("state12_reg = %08x, state13_reg = %08x, state14_reg = %08x, state15_reg = %08x",
+               dut.state_reg[12],  dut.state_reg[13], dut.state_reg[14],  dut.state_reg[15]);
       $display("");
 
-      $display("rounds_reg = %01x", dut.rounds_reg);
+      $display("rounds = %01x", dut.rounds);
       $display("qr_ctr_reg = %01x, dr_ctr_reg  = %01x", dut.qr_ctr_reg, dut.dr_ctr_reg);
       $display("block0_ctr_reg = %08x, block1_ctr_reg = %08x", dut.block0_ctr_reg, dut.block1_ctr_reg);
 
@@ -251,7 +235,7 @@ module tb_chacha_core();
       $display("chacha_ctrl_reg = %02x", dut.chacha_ctrl_reg);
       $display("");
 
-      $display("data_in_reg = %064x", dut.data_in_reg);
+      $display("data_in = %064x", dut.data_in);
       $display("data_out_valid_reg = %01x", dut.data_out_valid_reg);
       $display("");
 
@@ -266,7 +250,7 @@ module tb_chacha_core();
   // dump_inout()
   // Dump the status for input and output ports.
   //----------------------------------------------------------------
-  task dump_inout();
+  task dump_inout;
     begin
       $display("");
       $display("State for input and output ports:");
@@ -308,7 +292,7 @@ module tb_chacha_core();
       dut.qr0_b = b;
       dut.qr0_c = c;
       dut.qr0_d = d;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
 
       $display("a0 = 0x%08x, a1 = 0x%08x", dut.qr0.qr.a0, dut.qr0.qr.a1);
       $display("b0 = 0x%08x, b1 = 0x%08x", dut.qr0.qr.b0, dut.qr0.qr.b1);
@@ -331,7 +315,7 @@ module tb_chacha_core();
   // Run some simple test on the qr logic.
   // Note: Not self testing. No expected value used.
   //----------------------------------------------------------------
-  task qr_tests();
+  task qr_tests;
     begin
       $display("*** Test of Quarterround:");
       $display("");
@@ -389,15 +373,15 @@ module tb_chacha_core();
   //
   // Cycles the reset signal on the dut.
   //----------------------------------------------------------------
-  task cycle_reset();
+  task cycle_reset;
     begin
       tb_reset_n = 0;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
 
       @(negedge tb_clk)
 
       tb_reset_n = 1;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
     end
   endtask // cycle_reset
 
@@ -425,7 +409,7 @@ module tb_chacha_core();
       set_core_key_iv_rounds(key, key_length, iv, rounds);
       set_core_init(1);
 
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
       set_core_init(0);
       dump_state();
 
@@ -456,11 +440,11 @@ module tb_chacha_core();
   //
   // Display the accumulated test results.
   //----------------------------------------------------------------
-  task display_test_result();
+  task display_test_result;
     begin
       if (error_ctr == 0)
         begin
-          $display("*** All %d test cases completed successfully", tc_ctr);
+          $display("*** All test cases completed successfully");
         end
       else
         begin
@@ -475,7 +459,7 @@ module tb_chacha_core();
   //
   // Set the input to the DUT to defined values.
   //----------------------------------------------------------------
-  task init_dut();
+  task init_dut;
     begin
       cycle_ctr         = 0;
       tb_clk            = 0;
@@ -489,7 +473,7 @@ module tb_chacha_core();
 
       tb_core_init      = 0;
       tb_core_next      = 0;
-      tb_core_data_in   = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
+      tb_core_data_in   = 512'h0;
     end
   endtask // init_dut
 
@@ -510,7 +494,6 @@ module tb_chacha_core();
       display_cycle_ctr     = cycles;
       display_ctrl_and_ctrs = ctrl_ctr;
       display_state         = state;
-      display_x_state       = x_state;
       display_qround        = qround;
     end
   endtask // set_display_prefs
@@ -533,64 +516,64 @@ module tb_chacha_core();
       $display("");
       dump_state();
 
-      #(4 * CLK_HALF_PERIOD);
+      #(2 * CLK_PERIOD);
       @(negedge tb_clk)
       tb_reset_n = 1;
-      #(2 * CLK_HALF_PERIOD);
+      #(CLK_PERIOD);
       $display("*** State after release of reset:");
       $display("");
       dump_state();
 
       $display("TC1-1: All zero inputs. 128 bit key, 8 rounds.");
       run_test_case(TC1, ONE,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_128_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     EIGHT_ROUNDS,
                     512'he28a5fa4a67f8c5defed3e6fb7303486aa8427d31419a729572d777953491120b64ab8e72b8deb85cd6aea7cb6089a101824beeb08814a428aab1fa2c816081b);
 
 
      $display("TC1-2: All zero inputs. 128 bit key, 12 rounds.");
       run_test_case(TC1, TWO,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_128_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     TWELWE_ROUNDS,
                     512'he1047ba9476bf8ff312c01b4345a7d8ca5792b0ad467313f1dc412b5fdce32410dea8b68bd774c36a920f092a04d3f95274fbeff97bc8491fcef37f85970b450);
 
 
      $display("TC1-3: All zero inputs. 128 bit key, 20 rounds.");
       run_test_case(TC1, THREE,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_128_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     TWENTY_ROUNDS,
                     512'h89670952608364fd00b2f90936f031c8e756e15dba04b8493d00429259b20f46cc04f111246b6c2ce066be3bfb32d9aa0fddfbc12123d4b9e44f34dca05a103f);
 
 
       $display("TC1-4: All zero inputs. 256 bit key, 8 rounds.");
       run_test_case(TC1, FOUR,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_256_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     EIGHT_ROUNDS,
                     512'h3e00ef2f895f40d67f5bb8e81f09a5a12c840ec3ce9a7f3b181be188ef711a1e984ce172b9216f419f445367456d5619314a42a3da86b001387bfdb80e0cfe42);
 
 
       $display("TC1-5: All zero inputs. 256 bit key, 12 rounds.");
       run_test_case(TC1, FIVE,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_256_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     TWELWE_ROUNDS,
                     512'h9bf49a6a0755f953811fce125f2683d50429c3bb49e074147e0089a52eae155f0564f879d27ae3c02ce82834acfa8c793a629f2ca0de6919610be82f411326be);
 
 
       $display("TC1-6: All zero inputs. 256 bit key, 20 rounds.");
       run_test_case(TC1, SIX,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_256_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     TWENTY_ROUNDS,
                     512'h76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586);
 
@@ -599,7 +582,7 @@ module tb_chacha_core();
       run_test_case(TC2, ONE,
                     256'h0100000000000000000000000000000000000000000000000000000000000000,
                     KEY_128_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     EIGHT_ROUNDS,
                     512'h03a7669888605a0765e8357475e58673f94fc8161da76c2a3aa2f3caf9fe5449e0fcf38eb882656af83d430d410927d55c972ac4c92ab9da3713e19f761eaa14);
 
@@ -608,14 +591,14 @@ module tb_chacha_core();
       run_test_case(TC2, ONE,
                     256'h0100000000000000000000000000000000000000000000000000000000000000,
                     KEY_256_BITS,
-                    64'h0000000000000000,
+                    64'h0,
                     EIGHT_ROUNDS,
                     512'hcf5ee9a0494aa9613e05d5ed725b804b12f4a465ee635acc3a311de8740489ea289d04f43c7518db56eb4433e498a1238cd8464d3763ddbb9222ee3bd8fae3c8);
 
 
       $display("TC3-1: All zero key, one bit in IV set. 128 bit key, 8 rounds.");
       run_test_case(TC3, ONE,
-                    256'h0000000000000000000000000000000000000000000000000000000000000000,
+                    256'h0,
                     KEY_128_BITS,
                     64'h0100000000000000,
                     EIGHT_ROUNDS,

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list