[Cryptech-Commits] [user/shatov/gost/streebog] 01/01: Initial version of GOST 34.11-2012 (aka Streebog) hash core

git at cryptech.is git at cryptech.is
Thu May 28 09:03:53 UTC 2015


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/gost/streebog.

commit cd8f45d313fe760d7f71a425bdbb567afac219d1
Author: Pavel V. Shatov <meisterpaul1 at yandex.ru>
Date:   Thu May 28 01:51:26 2015 +0400

    Initial version of GOST 34.11-2012 (aka Streebog) hash core
---
 streebog.v                             | 233 ++++++++++++++++++
 streebog_hash/ip/adder_s6.xco          |  73 ++++++
 streebog_hash/streebog_core_adder_s6.v | 152 ++++++++++++
 streebog_hash/streebog_core_lps.v      | 405 +++++++++++++++++++++++++++++++
 streebog_hash/streebog_hash_top.v      | 421 +++++++++++++++++++++++++++++++++
 streebog_hash/streebog_rom_a_matrix.v  | 152 ++++++++++++
 streebog_hash/streebog_rom_c_array.v   |  58 +++++
 streebog_hash/streebog_rom_s_table.v   | 299 +++++++++++++++++++++++
 streebog_hash/tb/streebog_tb.v         | 198 ++++++++++++++++
 streebog_wrapper.v                     | 241 +++++++++++++++++++
 10 files changed, 2232 insertions(+)

diff --git a/streebog.v b/streebog.v
new file mode 100644
index 0000000..cd622e1
--- /dev/null
+++ b/streebog.v
@@ -0,0 +1,233 @@
+module streebog_wrapper
+	(
+		input		wire           clk,
+		input		wire           rst,
+
+		input		wire           cs,
+		input		wire           we,
+
+		input		wire  [ 7: 0]	address,
+		input		wire  [31: 0]	write_data,
+		output	wire	[31: 0]	read_data
+	);
+
+	  //----------------------------------------------------------------
+	  // Internal constant and parameter definitions.
+	  //----------------------------------------------------------------
+	localparam ADDR_NAME0		= 8'h00;
+	localparam ADDR_NAME1		= 8'h01;
+	localparam ADDR_VERSION		= 8'h02;
+
+	localparam ADDR_CTRL			= 8'h08;		// {short, final, update, init}
+	localparam ADDR_STATUS		= 8'h09;		// {valid, ready}
+	localparam ADDR_BLOCK_BITS	= 8'h0a;		// block length in bits
+
+	localparam ADDR_BLOCK0		= 8'h10;
+	localparam ADDR_BLOCK1		= 8'h11;
+	localparam ADDR_BLOCK2		= 8'h12;
+	localparam ADDR_BLOCK3		= 8'h13;
+	localparam ADDR_BLOCK4		= 8'h14;
+	localparam ADDR_BLOCK5		= 8'h15;
+	localparam ADDR_BLOCK6		= 8'h16;
+	localparam ADDR_BLOCK7		= 8'h17;
+	localparam ADDR_BLOCK8		= 8'h18;
+	localparam ADDR_BLOCK9		= 8'h19;
+	localparam ADDR_BLOCK10		= 8'h1a;
+	localparam ADDR_BLOCK11		= 8'h1b;
+	localparam ADDR_BLOCK12		= 8'h1c;
+	localparam ADDR_BLOCK13		= 8'h1d;
+	localparam ADDR_BLOCK14		= 8'h1e;
+	localparam ADDR_BLOCK15		= 8'h1f;
+
+	localparam ADDR_DIGEST0		= 8'h20;
+	localparam ADDR_DIGEST1		= 8'h21;
+	localparam ADDR_DIGEST2		= 8'h22;
+	localparam ADDR_DIGEST3		= 8'h23;
+	localparam ADDR_DIGEST4		= 8'h24;
+	localparam ADDR_DIGEST5		= 8'h25;
+	localparam ADDR_DIGEST6		= 8'h26;
+	localparam ADDR_DIGEST7		= 8'h27;
+	localparam ADDR_DIGEST8		= 8'h28;
+	localparam ADDR_DIGEST9		= 8'h29;
+	localparam ADDR_DIGEST10	= 8'h2a;
+	localparam ADDR_DIGEST11	= 8'h2b;
+	localparam ADDR_DIGEST12	= 8'h2c;
+	localparam ADDR_DIGEST13	= 8'h2d;
+	localparam ADDR_DIGEST14	= 8'h2e;
+	localparam ADDR_DIGEST15	= 8'h2f;
+
+
+	localparam CTRL_INIT_BIT		= 0;
+	localparam CTRL_UPDATE_BIT		= 1;
+	localparam CTRL_FINAL_BIT		= 2;
+	localparam CTRL_SHORT_BIT		= 3;
+
+	localparam STATUS_READY_BIT	= 0;
+	localparam STATUS_VALID_BIT	= 1;
+
+	localparam CORE_NAME0     = 32'h73747265;	// "stre"
+	localparam CORE_NAME1     = 32'h65626F67;	// "ebog"
+	localparam CORE_VERSION   = 32'h302E3130;	// "0.10"
+
+
+		//----------------------------------------------------------------
+		// Control register
+		//----------------------------------------------------------------
+	reg	[3:0]	reg_ctrl;			// core input
+	reg	[9:0]	reg_block_bits;	// input block length
+	
+
+		//----------------------------------------------------------------
+		// Init, Update and Final 1-Cycle Pulses
+		//----------------------------------------------------------------
+	reg	[3:0]	reg_ctrl_dly;
+	always @(posedge clk) reg_ctrl_dly <= reg_ctrl;
+
+	wire core_init_pulse		= (reg_ctrl[CTRL_INIT_BIT]   == 1'b1) && (reg_ctrl_dly[CTRL_INIT_BIT]   == 1'b0);
+	wire core_update_pulse	= (reg_ctrl[CTRL_UPDATE_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_UPDATE_BIT] == 1'b0);
+	wire core_final_pulse	= (reg_ctrl[CTRL_FINAL_BIT]  == 1'b1) && (reg_ctrl_dly[CTRL_FINAL_BIT]  == 1'b0);
+
+
+		//----------------------------------------------------------------
+		// Status register
+		//----------------------------------------------------------------
+	wire  core_ready;		// core output
+	wire  digest_valid;	// core output
+
+	wire [1:0] reg_status = {digest_valid, core_ready};
+
+
+		//----------------------------------------------------------------
+		// Block and Digest
+		//----------------------------------------------------------------
+	reg  [511 : 0] core_block;		// core input
+	wire [511 : 0] core_digest;	// core output
+
+
+		//----------------------------------------------------------------
+		// core instantiation.
+		//----------------------------------------------------------------
+	streebog_hash_top streebog
+	(
+		.clock			(clk),
+		
+		.block			(core_block),
+		.block_length	(reg_block_bits),
+		
+		.init				(core_init_pulse),
+		.update			(core_update_pulse),
+		.final			(core_final_pulse),
+		
+		.short_mode		(reg_ctrl[CTRL_SHORT_BIT]),
+		.digest			(core_digest),
+		.digest_valid	(digest_valid),
+		.ready			(core_ready)
+	);
+
+		//----------------------------------------------------------------
+		// Read Latch
+		//----------------------------------------------------------------
+	reg [31: 0] tmp_read_data;
+
+	assign read_data = tmp_read_data;
+
+
+	//----------------------------------------------------------------
+	// Read/Write Interface
+	//----------------------------------------------------------------
+	always @(posedge clk)
+		//
+		if (rst) begin
+			//
+			reg_ctrl			<= 2'b00;
+			core_block		<= {512{1'b0}};
+			tmp_read_data	<= 32'h00000000;
+			//
+		end else if (cs) begin
+			//
+			if (we) begin
+				//
+				// Write Handler
+				//
+				case (address)
+					ADDR_CTRL:			reg_ctrl					<= write_data[3:0];
+					ADDR_BLOCK_BITS:	reg_block_bits			<= write_data[9:0];
+					ADDR_BLOCK0:		core_block[511:480]	<= write_data;
+					ADDR_BLOCK1:		core_block[479:448]	<= write_data;
+					ADDR_BLOCK2:		core_block[447:416]	<= write_data;
+					ADDR_BLOCK3:		core_block[415:384]	<= write_data;
+					ADDR_BLOCK4:		core_block[383:352]	<= write_data;
+					ADDR_BLOCK5:		core_block[351:320]	<= write_data;
+					ADDR_BLOCK6:		core_block[319:288]	<= write_data;
+					ADDR_BLOCK7:		core_block[287:256]	<= write_data;
+					ADDR_BLOCK8:		core_block[255:224]	<= write_data;
+					ADDR_BLOCK9:		core_block[223:192]	<= write_data;
+					ADDR_BLOCK10:		core_block[191:160]	<= write_data;
+					ADDR_BLOCK11:		core_block[159:128]	<= write_data;
+					ADDR_BLOCK12:		core_block[127: 96]	<= write_data;
+					ADDR_BLOCK13:		core_block[ 95: 64]	<= write_data;
+					ADDR_BLOCK14:		core_block[ 63: 32]	<= write_data;
+					ADDR_BLOCK15:		core_block[ 31:  0]	<= write_data;
+				endcase
+				//
+			end else begin
+				//
+				// Read Handler
+				//
+				case (address)
+					ADDR_NAME0:			tmp_read_data <= CORE_NAME0;
+					ADDR_NAME1:			tmp_read_data <= CORE_NAME1;
+					ADDR_VERSION:		tmp_read_data <= CORE_VERSION;
+					ADDR_CTRL:			tmp_read_data <= {{28{1'b0}}, reg_ctrl};
+					ADDR_STATUS:		tmp_read_data <= {{30{1'b0}}, reg_status};
+					ADDR_BLOCK_BITS:	tmp_read_data <= {{22{1'b0}}, reg_block_bits};
+					//
+					ADDR_BLOCK0:		tmp_read_data <= core_block[511:480];
+					ADDR_BLOCK1:		tmp_read_data <= core_block[479:448];
+					ADDR_BLOCK2:		tmp_read_data <= core_block[447:416];
+					ADDR_BLOCK3:		tmp_read_data <= core_block[415:384];
+					ADDR_BLOCK4:		tmp_read_data <= core_block[383:352];
+					ADDR_BLOCK5:		tmp_read_data <= core_block[351:320];
+					ADDR_BLOCK6:		tmp_read_data <= core_block[319:288];
+					ADDR_BLOCK7:		tmp_read_data <= core_block[287:256];
+					ADDR_BLOCK8:		tmp_read_data <= core_block[255:224];
+					ADDR_BLOCK9:		tmp_read_data <= core_block[223:192];
+					ADDR_BLOCK10:		tmp_read_data <= core_block[191:160];
+					ADDR_BLOCK11:		tmp_read_data <= core_block[159:128];
+					ADDR_BLOCK12:		tmp_read_data <= core_block[127: 96];
+					ADDR_BLOCK13:		tmp_read_data <= core_block[ 95: 64];
+					ADDR_BLOCK14:		tmp_read_data <= core_block[ 63: 32];
+					ADDR_BLOCK15:		tmp_read_data <= core_block[ 31:  0];
+					//
+					ADDR_DIGEST0:		tmp_read_data <= core_digest[511:480];
+					ADDR_DIGEST1:		tmp_read_data <= core_digest[479:448];
+					ADDR_DIGEST2:		tmp_read_data <= core_digest[447:416];
+					ADDR_DIGEST3:		tmp_read_data <= core_digest[415:384];
+					ADDR_DIGEST4:		tmp_read_data <= core_digest[383:352];
+					ADDR_DIGEST5:		tmp_read_data <= core_digest[351:320];
+					ADDR_DIGEST6:		tmp_read_data <= core_digest[319:288];
+					ADDR_DIGEST7:		tmp_read_data <= core_digest[287:256];
+					ADDR_DIGEST8:		tmp_read_data <= core_digest[255:224];
+					ADDR_DIGEST9:		tmp_read_data <= core_digest[223:192];
+					ADDR_DIGEST10:		tmp_read_data <= core_digest[191:160];
+					ADDR_DIGEST11:		tmp_read_data <= core_digest[159:128];
+					ADDR_DIGEST12:		tmp_read_data <= core_digest[127: 96];
+					ADDR_DIGEST13:		tmp_read_data <= core_digest[ 95: 64];
+					ADDR_DIGEST14:		tmp_read_data <= core_digest[ 63: 32];
+					ADDR_DIGEST15:		tmp_read_data <= core_digest[ 31:  0];
+					//
+					default:				tmp_read_data <= 32'h00000000;
+					//
+				endcase
+				//
+			end
+			//
+		end
+
+
+endmodule // streebog_wrapper
+
+
+//======================================================================
+// EOF streebog_wrapper.v
+//======================================================================
diff --git a/streebog_hash/ip/adder_s6.xco b/streebog_hash/ip/adder_s6.xco
new file mode 100644
index 0000000..23b7d94
--- /dev/null
+++ b/streebog_hash/ip/adder_s6.xco
@@ -0,0 +1,73 @@
+##############################################################
+#
+# Xilinx Core Generator version 14.7
+# Date: Tue Mar 24 19:41:47 2015
+#
+##############################################################
+#
+#  This file contains the customisation parameters for a
+#  Xilinx CORE Generator IP GUI. It is strongly recommended
+#  that you do not manually alter this file as it may cause
+#  unexpected and unsupported behavior.
+#
+##############################################################
+#
+#  Generated from component: xilinx.com:ip:c_addsub:11.0
+#
+##############################################################
+#
+# BEGIN Project Options
+SET addpads = false
+SET asysymbol = true
+SET busformat = BusFormatAngleBracketNotRipped
+SET createndf = false
+SET designentry = Verilog
+SET device = xc6slx45
+SET devicefamily = spartan6
+SET flowvendor = Other
+SET formalverification = false
+SET foundationsym = false
+SET implementationfiletype = Ngc
+SET package = csg324
+SET removerpms = false
+SET simulationfiles = Behavioral
+SET speedgrade = -3
+SET verilogsim = true
+SET vhdlsim = false
+# END Project Options
+# BEGIN Select
+SELECT Adder_Subtracter xilinx.com:ip:c_addsub:11.0
+# END Select
+# BEGIN Parameters
+CSET a_type=Unsigned
+CSET a_width=32
+CSET add_mode=Add
+CSET ainit_value=0
+CSET b_constant=false
+CSET b_type=Unsigned
+CSET b_value=00000000000000000000000000000000
+CSET b_width=32
+CSET borrow_sense=Active_Low
+CSET bypass=false
+CSET bypass_ce_priority=CE_Overrides_Bypass
+CSET bypass_sense=Active_High
+CSET c_in=true
+CSET c_out=true
+CSET ce=true
+CSET component_name=adder_s6
+CSET implementation=DSP48
+CSET latency=1
+CSET latency_configuration=Manual
+CSET out_width=32
+CSET sclr=false
+CSET sinit=false
+CSET sinit_value=0
+CSET sset=false
+CSET sync_ce_priority=Sync_Overrides_CE
+CSET sync_ctrl_priority=Reset_Overrides_Set
+# END Parameters
+# BEGIN Extra information
+MISC pkg_timestamp=2013-07-22T10:35:41Z
+# END Extra information
+GENERATE
+# CRC: 13f690be
diff --git a/streebog_hash/streebog_core_adder_s6.v b/streebog_hash/streebog_core_adder_s6.v
new file mode 100644
index 0000000..3c254eb
--- /dev/null
+++ b/streebog_hash/streebog_core_adder_s6.v
@@ -0,0 +1,152 @@
+`timescale 1ns / 1ps
+
+module streebog_core_adder_s6
+	(
+		clk,
+		ena, rdy,
+		x, y, sum
+	);
+
+
+		//
+		// Ports
+		//
+	input		wire				clk;	// core clock
+	input		wire				ena;	// start addition flag
+	output	wire				rdy;	// addition done flag (sum is valid)
+	input		wire	[511:0]	x;		// item x
+	input		wire	[511:0]	y;		// item y
+	output	wire	[511:0]	sum;	// x+y
+
+
+		/*
+		 * ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so
+		 * Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time, 
+		 * so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose
+		 * carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification,
+		 * that's why only 15 intermediate carry lines are required.
+		 *
+		 *     +-------------------+-------------------+-         -+-------------------+
+		 * [X] |         511 : 480 |         479 : 448 |    ...    |          31 :   0 |
+		 *     +------*------------+------*------------+-         -+------*------------+
+		 *            |                   |                               |
+		 *     +------|------------+------|------------+-         -+------|------------+
+		 * [Y] |      |  511 : 480 |      |  479 : 448 |    ...    |      |   31 :   0 |
+		 *     +------|-----*------+------|------------+-         -+------|------------+
+		 *            |     |             |     |                         |     |
+		 *            |     |             |     |                         |     |
+		 *            v     v             v     v                         v     v
+		 *          +---+-+---+         +---+-+---+                     +---+-+---+
+		 *          | A | | B |         | A | | B |                     | A | | B |
+		 *          +---------+         +---+-+---+                     +---+-+---+
+		 *          | DSP #15 |         | DSP #15 |                     | DSP  #0 |
+		 *          |---------|         |---------|                     |---------|
+		 *          |  Carry  |         |  Carry  |                     |  Carry  |
+		 *      X --<-Out  In-<--C[14]--<-Out  In-<--C[13]- ... -C[ 0]--<-Out  In-<-- 0
+		 *          +---------+         +---------+                     +---------+
+		 *          |    S    |         |    S    |                     |    S    |
+		 *          +---------+         +---------+                     +---------+
+		 *               |                   |                               |
+		 *               v                   v                               v
+		 *     +---------*---------+---------*---------+-         -+---------*---------+
+		 * [Z] |         511 : 480 |         479 : 448 |    ...    |          31 :   0 |
+		 *     +-------------------+-------------------+-         -+-------------------+
+		 *
+		 */
+
+
+		//
+		// Internals
+		//
+	wire	[511:0]	z;				// concatenated outputs of adders
+	wire	[14:0]	z_carry;		// carry lines
+	reg	[511:0]	sum_reg;		// output register
+	
+	assign sum = sum_reg;
+
+
+		//
+		// Shift Register
+		//
+	
+		/*
+		 * This shift register is re-loaded with "walking one" bit pattern whenever enable
+		 * input is active and adder core is ready. The most significant bit [17] acts as a
+		 * ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate
+		 * bit [16] is required to compensate for 1-cycle latency of DSP blocks.
+		 *
+		 */
+	
+	reg	[17: 0]	ce_shreg	= {1'b1, 1'b0, 16'h0000};
+	
+	assign rdy = ce_shreg[17];
+	
+	
+		//
+		// Shift Register Logic
+		//
+	always @(posedge clk)
+		//
+		if (! rdy)		ce_shreg	<= {ce_shreg[16:0], 1'b0};
+		else if (ena)	ce_shreg	<= {1'b0, 1'b0, 16'h0001};
+	
+	
+		//
+		// Output Register Logic
+		//
+	always @(posedge clk)
+		//
+		if (ce_shreg[16] == 1'b1) sum_reg <= z;
+		
+
+		//
+		// LSB Adder
+		//
+	adder_s6 adder_s6_lsb
+	(
+		.clk		(clk),				//
+		.ce		(ce_shreg[0]),		// clock enable [0]
+		.a			(x[ 31:  0]),		//
+		.b			(y[ 31:  0]),		//
+		.s			(z[ 31:  0]),		//
+		.c_in		(1'b0),				// carry input tied to 0
+		.c_out	(z_carry[0])		// carry[0] to next adder
+	);
+	
+	
+		//
+		// MSB Adder
+		//
+	adder_s6 adder_s6_msb
+	(
+		.clk		(clk),				//
+		.ce		(ce_shreg[15]),	// clock enable [15]
+		.a			(x[511:480]),		//
+		.b			(y[511:480]),		//
+		.s			(z[511:480]),		//
+		.c_in		(z_carry[14]),		// carry[14] from previous adder
+		.c_out	()						// carry output not connected
+	);	
+
+
+		//
+		// Intermediate Adders
+		//
+	genvar i;
+	generate for (i=1; i<=14; i=i+1)
+		begin: gen_adder_s6
+			adder_s6 adder_s6_int
+			(
+				.clk		(clk),					//
+				.ce		(ce_shreg[i]),			// clock enable [1..14]
+				.a			(x[32*i+31:32*i]),	//
+				.b			(y[32*i+31:32*i]),	//
+				.s			(z[32*i+31:32*i]),	//
+				.c_in		(z_carry[i-1]),		// carry[0..13] from previous adder
+				.c_out	(z_carry[i])			// carry[1..14] to next adder
+			);
+		end
+	endgenerate
+	
+	
+endmodule
diff --git a/streebog_hash/streebog_core_lps.v b/streebog_hash/streebog_core_lps.v
new file mode 100644
index 0000000..a668f16
--- /dev/null
+++ b/streebog_hash/streebog_core_lps.v
@@ -0,0 +1,405 @@
+`timescale 1ns / 1ps
+
+module streebog_core_lps
+	(
+		clk,
+		ena, rdy, last,
+		din, dout
+	);
+	
+	
+		//
+		// Parameters
+		//
+	parameter	PS_PIPELINE_STAGES	=  8;	// 2, 4, 8
+	parameter	L_PIPELINE_STAGES		=  8;	// 2, 4, 8, 16, 32, 64
+
+
+		//
+		// Ports
+		//
+	input		wire				clk;		// core clock
+	input		wire				ena;		// start transformation flag
+	output	wire				rdy;		// transformation done flag (dout is valid)
+	output	wire				last;		// transformation about to complete (rdy flag will be asserted during the next cycle)
+	input		wire	[511:0]	din;		// input data to transform
+	output	wire	[511:0]	dout;		// output data (result of transformation)
+	
+				
+		/*
+		 * This LPS core has parametrized internal pipeline. P and S transformations are combined into one PS transformation and
+		 * have common pipeline. L transformation has its own separate pipeline. The total latency of this core is thus
+		 * PS_PIPELINE_STAGES*L_PIPELINE_STAGES. The fastest version completes the tranformation in 2*2=4 cycles, the slowest
+		 * version requires 8*64=512 cycles. S transformation substitutes bytes according to a lookup table. P transformation does
+		 * permutation of input bytes. L transformation multiplies input data by a special predefined matrix. If you don't understand
+		 * how matrices are multiplied, you should not try to understand how the following code works. This may damage your brain.
+		 * You've been warned. Seriously.
+		 *
+		 */
+
+
+		//
+		// Constants
+		//
+		
+		/*
+		 * PS transformation operates on 64-bit words. Input data contains 512/64=8 such words.
+		 * Depending on PS pipeline stage count we can transform 1, 2 or 4 words at a time.
+		 *
+		 * L transformation operates on 64-bit words. Depending on L pipeline stage count we
+		 * can transform 1, 2, 4, 8, 16 or 32 bits of a word at a time.
+		 *
+		 */
+
+	localparam	PS_WORDS_AT_ONCE	=  8 / PS_PIPELINE_STAGES;
+	localparam	L_BITS_AT_ONCE		= 64 / L_PIPELINE_STAGES;
+	
+		/*
+		 * These functions return number of bytes needed to store pipeline stage counters. They will
+		 * also prevent users from specifying illegal pipeline widths . This module will not synthesize
+		 * with invalid pipeline stage count, because counter width will not be explicitely defined.
+		 *
+		 */
+	
+	function	integer	PS_NUM_COUNT_BITS;
+		input	integer	x;
+		begin
+			case (x)
+				2:	PS_NUM_COUNT_BITS = 1;
+				4:	PS_NUM_COUNT_BITS = 2;
+				8:	PS_NUM_COUNT_BITS = 3;
+			endcase
+		end
+	endfunction
+	
+	function	integer	L_NUM_COUNT_BITS;
+		input	integer	y;
+		begin
+			case (y)
+				 2:	L_NUM_COUNT_BITS = 1;
+				 4:	L_NUM_COUNT_BITS = 2;
+				 8:	L_NUM_COUNT_BITS = 3;
+				16:	L_NUM_COUNT_BITS = 4;
+				32:	L_NUM_COUNT_BITS = 5;
+				64:	L_NUM_COUNT_BITS = 6;
+			endcase
+		end
+	endfunction
+	
+	
+		//
+		// Counter Widths
+		//
+	localparam	L_CNT_BITS	= L_NUM_COUNT_BITS(L_PIPELINE_STAGES);		// width of L counter
+	localparam	PS_CNT_BITS	= PS_NUM_COUNT_BITS(PS_PIPELINE_STAGES);	// width of PS counter
+	
+	
+		//
+		// Input Multiplexor
+		//
+	wire	[63: 0]	din_mux[0:7];		// eight 64-bit words
+	
+		/*
+		 * This multiplexor does the P transformation. P transformation is effectively a matrix
+		 * transposition. Input 512-bit word is treated as a 8x8 byte matrix. Multiplexor outputs
+		 * a set of 8 64-bit words. These words are columns of the original matrix (transposition
+		 * turns rows into colums).
+		 *
+		 */
+	
+	genvar i, j;
+	generate for (i=0; i<8; i=i+1)
+		begin: gen_din_mux_i
+			for (j=0; j<8; j=j+1) begin: gen_din_mux_j
+				assign din_mux[i][8*j + 7 : 8*j] = din[64*j + 8*i + 7 : 64*j + 8*i];
+			end
+		end
+	endgenerate
+	
+	
+		//
+		// Output Multiplexor
+		//
+	reg	[63: 0]	dout_mux[0:7];		// eight 64-bit words
+	
+		/*
+		 * Output 64-bit subwords are concatenated to form output 512-bit word.
+		 *
+		 */
+		 
+	genvar k;
+	generate for (k=0; k<8; k=k+1)
+		begin: gen_dout_mux
+			assign dout[64*k+63:64*k] = dout_mux[k];
+		end
+	endgenerate
+	
+	
+		//
+		// PS and L Counters
+		//
+		
+		/*
+		 * These counters control internal data flow of this core. For example, if PS has 2 stages and
+		 * L has 4 stages, then the count will look like this:
+		 *     ____
+		 * ENA     \\\________________________________
+		 *     _____                                 _
+		 * RDY  ^   \_______________________________/
+		 *      |   |   |   |   |   |   |   |   |   |
+		 * +----+---+---+---+---+---+---+---+---+---+-
+		 * | PS | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 |
+		 * +----+---+---+---+---+---+---+---+---+---+-
+		 * |  L | 0 | 1 | 2 | 3 | 0 | 1 | 2 | 3 | 0 |
+		 * +----+---+---+---+---+---+---+---+---+---+-
+		 *        ^               ^               |
+		 *        |               |               +--> both counters will be zero during the last cycle
+		 *        |               |
+		 *        +---------------+------------------> preloading of new word(s) into S lookup table(s)
+		 *
+		 */
+		 
+	reg	[ L_CNT_BITS-1:0]	l_count	= { L_CNT_BITS{1'b0}};
+	reg	[PS_CNT_BITS-1:0]	ps_count	= {PS_CNT_BITS{1'b0}};
+
+
+		//
+		// Handy Flags
+		//
+		
+		/*
+		 * These flags are used instead of lengthy (z_count == {Z_CNT_BITS{1'bZ}}) comparisons.
+		 *
+		 */
+
+	wire	 l_count_done	= ( l_count == { L_CNT_BITS{1'b1}}) ? 1 : 0;
+	wire	ps_count_done	= (ps_count == {PS_CNT_BITS{1'b1}}) ? 1 : 0;
+	
+	wire	 l_count_zero	= ( l_count == { L_CNT_BITS{1'b0}}) ? 1 : 0;
+	wire	ps_count_zero	= (ps_count == {PS_CNT_BITS{1'b0}}) ? 1 : 0;
+	
+	
+		//
+		// Preload Flags
+		//
+		
+		/*
+		 * These flags are used as clock enables for S lookup table.
+		 *
+		 */
+		
+	wire	ps_preload_first	= (rdy && ena);
+	wire	ps_preload_next	= (!rdy && !ps_count_zero && l_count_zero);
+	
+	
+		//
+		// Last Flag
+		//
+		
+		/*
+		 * This flag indicates that core operation is about to complete.
+		 *
+		 */
+	assign last = !rdy && ps_count_zero && l_count_zero;
+
+	
+		//
+		// Counter Logic
+		//
+	always @(posedge clk) begin
+		//
+		if (!rdy && l_count_done)								ps_count	<= ps_count + 1'b1;	// next word(s)
+		//
+		if (rdy && ena)				 							 l_count	<=  l_count + 1'b1;	// start of transformation
+		//
+		if (!rdy && !(ps_count_zero && l_count_zero))	 l_count	<=  l_count + 1'b1;	// next part of word(s)
+		//
+	end
+	
+	
+		//
+		// Ready Output Register
+		//
+	reg rdy_reg = 1'b1;
+	assign rdy = rdy_reg;
+	
+	
+		//
+		// Ready Set and Clear Logic
+		//
+	always @(posedge clk) begin
+		//
+		if (rdy && ena)										rdy_reg <= 0;	// start of transformation
+		//
+		if (!rdy && l_count_zero && ps_count_zero)	rdy_reg <= 1;	// end of transformation
+		//
+	end
+		
+		
+		//
+		// S Table Indices
+		//
+		
+		/*
+		 * To transform several words at once a set of indices is required.
+		 *
+		 */
+		
+	wire	[ 2: 0]	s_in_offset	[0:PS_WORDS_AT_ONCE-1];		// indices of words being transformed
+	wire	[63: 0]	s_out			[0:PS_WORDS_AT_ONCE-1];		// output words of S transformation
+	
+	assign s_in_offset[0] = ps_count * PS_WORDS_AT_ONCE;	// the first index is defined by PS counter,
+																			// following indices are linearly increasing
+	
+	genvar sw, sb;														// word and byte counter
+	generate for (sw=1; sw<PS_WORDS_AT_ONCE; sw=sw+1)
+		begin: gen_s_in_offset
+			assign s_in_offset[sw] = s_in_offset[sw-1] + 1'b1;
+		end
+	endgenerate
+	
+	
+		//
+		// S Lookup Table
+		//
+	generate for (sw=0; sw<PS_WORDS_AT_ONCE; sw=sw+1)
+		begin: gen_s_out_word
+			for (sb=0; sb<8; sb=sb+1) begin: gen_s_out_byte
+				//
+				(* ROM_STYLE="BLOCK" *)
+				//
+				streebog_rom_s_table s_table
+				(
+					.clk		(clk),
+					.ena		(ps_preload_first | ps_preload_next),
+					.din		(din_mux[s_in_offset[sw]][8*sb + 7 : 8*sb]),
+					.dout		(s_out[sw][8*sb + 7 : 8*sb])
+				);
+				//
+			end
+		end
+	endgenerate
+	
+	
+	
+		//
+		// A Matrix Indices
+		//
+		
+		/*
+		 * To transform several bits at once a set of indices is required.
+		 *
+		 */		
+		 
+	wire	[ 5: 0]	l_in_offset	[0:L_BITS_AT_ONCE-1];	// indices of bits being transformed
+	wire	[63: 0]	l_out			[0:L_BITS_AT_ONCE-1];	// output bits of L transformation
+
+	assign l_in_offset[0] = l_count * L_BITS_AT_ONCE;	// the first index is defined by L counter,
+																		// following indices are linearly increasing
+	
+	genvar l;
+	generate for (l=1; l<L_BITS_AT_ONCE; l=l+1)
+		begin: gen_l_in_offset
+			assign l_in_offset[l] = l_in_offset[l-1] + 1'b1;
+		end
+	endgenerate
+	
+	
+		//
+		// A Matrix
+		//
+	generate for (l=0; l<L_BITS_AT_ONCE; l=l+1)
+		begin: gen_l_out		
+			//
+			(* ROM_STYLE="BLOCK" *)
+			//
+			streebog_rom_a_matrix a_matrix
+			(
+				.clk		(clk),
+				.din		(l_in_offset[l]),
+				.dout		(l_out[l])
+			);
+			//
+		end
+	endgenerate
+	
+	
+		//
+		// Multiplication Logic
+		//
+		
+		/*
+		 * Original specification describes multiplication method that effectively adds
+		 * matrix rows based on source vector items. Instead of that multiplication is
+		 * done column-by-column.
+		 *
+		 */
+		 
+	wire	[L_BITS_AT_ONCE-1:0]	l_out_part[0:PS_WORDS_AT_ONCE-1];
+	
+	genvar lw, lb;
+	generate for (lw=0; lw<PS_WORDS_AT_ONCE; lw=lw+1)
+		begin: gen_l_out_part
+			for (lb=0; lb<L_BITS_AT_ONCE; lb=lb+1) begin: gen_l_out_bit
+				//
+				assign l_out_part[lw][lb] = ^(l_out[lb] & s_out[lw]);
+				//
+			end
+		end
+	endgenerate
+	
+	
+		/*
+		 * PS and L transformations have 1-cycle latency, so delayed versions
+		 * of offsets are needed to update output registers accordingly.
+		 *
+		 */
+		 
+	reg	[PS_CNT_BITS-1:0]	ps_count_dly	= 0;	// delayed PS counter
+	reg	[ L_CNT_BITS-1:0]	 l_count_dly	= 0;	// delayed L counter
+	
+	always @(posedge clk) ps_count_dly <= ps_count;
+	always @(posedge clk)  l_count_dly <=  l_count;
+	
+	
+		//
+		// Output Offset Tables
+		//
+	wire	[ 2: 0]	dout_offset_word	[0:PS_WORDS_AT_ONCE-1];
+	wire	[ 5: 0]	dout_offset_bit	[0:L_BITS_AT_ONCE  -1];
+
+	assign dout_offset_word[0] = ps_count_dly * PS_WORDS_AT_ONCE;
+	assign dout_offset_bit[0]  =  l_count_dly * L_BITS_AT_ONCE;
+	
+	genvar z;
+	
+	generate for (z=1; z<PS_WORDS_AT_ONCE; z=z+1)
+		begin: gen_dout_offset_word
+			assign dout_offset_word[z] = dout_offset_word[z-1] + 1'b1;
+		end
+	endgenerate
+	
+	generate for (z=1; z<L_BITS_AT_ONCE; z=z+1)
+		begin: gen_dout_offset_bit
+			assign dout_offset_bit[z] = dout_offset_bit[z-1] + 1'b1;
+		end
+	endgenerate
+	
+	
+	
+		//
+		// Output Logic
+		//
+	integer lps_w, lps_b;
+	
+	always @(posedge clk)
+		//
+		if (! rdy)
+			//
+			for (lps_w=0; lps_w<PS_WORDS_AT_ONCE; lps_w=lps_w+1)
+				for (lps_b=0; lps_b<L_BITS_AT_ONCE; lps_b=lps_b+1)
+					dout_mux[dout_offset_word[lps_w]][dout_offset_bit[lps_b]] <= l_out_part[lps_w][lps_b];
+					//dout_mux[dout_offset_word[lps_w]][L_BITS_AT_ONCE*l_count_dly+lps_b] <= l_out_part[lps_w][lps_b];
+	
+	
+endmodule
diff --git a/streebog_hash/streebog_hash_top.v b/streebog_hash/streebog_hash_top.v
new file mode 100644
index 0000000..1cd1bbe
--- /dev/null
+++ b/streebog_hash/streebog_hash_top.v
@@ -0,0 +1,421 @@
+`timescale 1ns / 1ps
+
+module streebog_hash_top
+	(
+		clock,
+		block, block_length,
+		init, update, final,
+		short_mode,
+		digest, digest_valid,
+		ready
+	);
+	
+	
+		//
+		// Parameters
+		//
+	parameter	PS_PIPELINE_STAGES	=  2;	// 2, 4, 8
+	parameter	L_PIPELINE_STAGES		=  2;	// 2, 4, 8, 16, 32, 64
+	
+	
+		//
+		// Ports
+		//
+	input		wire				clock;			// core clock
+	input		wire	[511:0]	block;			// input message block
+	input		wire	[  9:0]	block_length;	// length of input block in bits (0..512)
+	input		wire				init;				// flag to start calculation of new message hash
+	input		wire				update;			// flag to compress next message block
+	input		wire				final;			// flag to run final transformation after last message block 
+	input		wire				short_mode;		// 0 = produce 512-bit hash, 1 = produce 256-bit hash
+	output	wire	[511:0]	digest;			// message digest output
+	output	wire				digest_valid;	// hash is ready (digest output value is valid)
+	output	wire				ready;			// core is ready (init/update/final can be asserted)
+	
+	
+		//
+		// Initialization Vectors and Round Count
+		//
+	localparam	STREEBOG_IV_512		= {512{1'b0}};
+	localparam	STREEBOG_IV_256		= {64{8'h01}};
+	localparam	STREEBOG_NUM_ROUNDS	= 4'd12;
+	
+	
+		//
+		// State Registers
+		//
+	reg	[511:0]	h;			// |
+	reg	[511:0]	Sigma;	// | Internal State Registers
+	reg	[511:0]	N;			// |
+	
+	reg	[511:0]	digest_reg;
+	reg				digest_valid_reg = 1'b0;
+	reg	[  3:0]	round_count = 4'd0;
+	
+	assign digest			= digest_reg;
+	assign digest_valid	= digest_valid_reg;
+		
+		
+		//
+		// Handy Internal Flags
+		//
+	wire	round_count_active	= (round_count > 4'd0)						? 1 : 0;		// transformation has been started
+	wire	round_count_not_done = (round_count < STREEBOG_NUM_ROUNDS)	? 1 : 0;		// transformation has not been finished
+
+
+		/*
+		 * Compression procedure includes 13 rounds. To perform every round we need to know
+		 * round key. This implementation uses two parallel LPS cores. The first LPS core (key core)
+		 * is used to produce round keys, the second LPS core (data core) is used to encrypt message block.
+		 *
+		 * Data core is not activated during the first round, because round key is not yet known during
+		 * the first round. During the second round, key core computes next (second) round key, while data core encrypts
+		 * mesage block using first round key and so on. The last compression round doesn't include encryption step.
+		 * Instead of it simple XOR operation is used.
+		 *
+		 * Compression procedure requires 13 key calculations and 12 data encryptions. LPS cores operate according to
+		 * the following schedule:
+		 *
+		 *
+		 *             +----------+----------+----------+-   -+----------+
+		 * Round Count |        0 |        1 |        2 | ... |       12 |
+		 *             +----------+----------+----------+-   -+----------+
+		 * Key Core    |  KEY  #0 |  KEY  #1 |  KEY  #2 | ... |  KEY #12 |
+		 *             +----------+----------+----------+-   -+----------+
+		 * Data Core   |     Idle | DATA  #0 | DATA  #1 | ... | DATA #11 |
+		 *             +----------+----------+----------+-   -+----------+
+		 *
+		 */
+
+
+		//
+		// LPS Core for Round Key Calculation
+		//
+	reg	[511:0]	lps_key_in;		//
+	wire	[511:0]	lps_key_out;	//
+	wire				lps_key_ena;	//
+	wire				lps_key_last;	//
+	wire				lps_key_rdy;	//
+	
+	wire	lps_key_ena_update		= (fsm_state == FSM_STATE_UPDATE_LPS_TRIG)		? 1 : 0;
+	wire	lps_key_ena_final_n		= (fsm_state == FSM_STATE_FINAL_N_LPS_TRIG)		? 1 : 0;
+	wire	lps_key_ena_final_sigma	= (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_TRIG) ? 1 : 0;
+	
+	assign lps_key_ena = lps_key_ena_update || lps_key_ena_final_n || lps_key_ena_final_sigma;
+	
+	streebog_core_lps #
+	(
+		.PS_PIPELINE_STAGES	(PS_PIPELINE_STAGES),
+		.L_PIPELINE_STAGES	(L_PIPELINE_STAGES)
+	)
+	lps_key
+	(
+		.clk		(clock),
+		.ena		(lps_key_ena),
+		.rdy		(lps_key_rdy),
+		.last		(lps_key_last),
+		.din		(lps_key_in),
+		.dout		(lps_key_out)
+	);
+	
+	
+		//
+		// LPS Core for Block Compression
+		//
+	reg	[511:0]	lps_data_in;
+	wire	[511:0]	lps_data_out;
+	wire				lps_data_ena;
+	wire				lps_data_last;
+	wire				lps_data_rdy;
+
+	assign lps_data_ena = lps_key_ena & round_count_active;
+
+	streebog_core_lps #
+	(
+		.PS_PIPELINE_STAGES	(PS_PIPELINE_STAGES),
+		.L_PIPELINE_STAGES	(L_PIPELINE_STAGES)
+	)	
+	lps_data
+	(
+		.clk		(clock),
+		.ena		(lps_data_ena),
+		.rdy		(lps_data_rdy),
+		.last		(lps_data_last),
+		.din		(lps_data_in),
+		.dout		(lps_data_out)
+	);
+		
+	
+		/*
+		 * According to specification, internal state must be updated after compression, this
+		 * involves addition of two pairs of 512-bit numbers. This operation is done in two
+		 * parallel summation cores. The first core updates N register, the second core updates
+		 * Sigma register. Summation is triggered before LPS cores are activated. Actual update
+		 * of N and Sigma occurs after completion of compression procedure.
+		 *
+		 */
+	
+	
+		//
+		// Summation Trigger Flag
+		//
+	wire	adder_trig = (fsm_state == FSM_STATE_UPDATE_ADDER_TRIG) ? 1 : 0;
+	
+	
+		//
+		// Block Length Adder (N = N + |M|)
+		//
+	wire	[511:0]	adder_n_sum;
+	wire				adder_n_rdy;
+	
+	streebog_core_adder_s6 adder_n
+	(
+		.clk	(clock),
+		.ena	(adder_trig),
+		.rdy	(adder_n_rdy),
+		.x		(N),
+		.y		({{502{1'b0}}, block_length}),
+		.sum	(adder_n_sum)
+	);
+	
+	
+		//
+		// Message Adder (Sigma = Sigma + M)
+		//
+	wire	[511:0]	adder_sigma_sum;
+	wire				adder_sigma_rdy;
+	
+	streebog_core_adder_s6 adder_sigma
+	(
+		.clk	(clock),
+		.ena	(adder_trig),
+		.rdy	(adder_sigma_rdy),
+		.x		(Sigma),
+		.y		(block),
+		.sum	(adder_sigma_sum)
+	);
+	
+	
+		//
+		// Handy Flags
+		//
+	wire	lps_last_both		= lps_key_last & lps_data_last;
+	wire	lps_rdy_both		= lps_key_rdy  & lps_data_rdy;
+	wire	adder_rdy_both		= adder_n_rdy  & adder_sigma_rdy;
+	
+	
+		/*
+		 * Operation of this core is controlled by FSM logic. Ready flag is embedded in state encoding. FSM goes out of
+		 * idle state when init/update/final flags become active. Init flag has priority over update and final flags.
+		 * Update flag has priority over final flag.
+		 *
+		 */
+		 
+	
+		//
+		// FSM States
+		//
+	localparam	FSM_STATE_IDLE									= 4'b1_00_0;	// core is idle
+	//
+	localparam	FSM_STATE_UPDATE_LPS_TRIG					= 4'b0_00_0;	// core is triggering gN(h,m) transformation
+	localparam	FSM_STATE_UPDATE_LPS_WAIT					= 4'b0_00_1;	// core is waiting for transformation to complete
+	//
+	localparam	FSM_STATE_UPDATE_ADDER_TRIG				= 4'b0_11_0;	// core is triggering summation
+	localparam	FSM_STATE_UPDATE_ADDER_WAIT				= 4'b0_11_1;	// core is waiting for summation to complete
+	//
+	localparam	FSM_STATE_FINAL_N_LPS_TRIG					= 4'b0_01_0;	// core is triggering g0(h,N) transformation
+	localparam	FSM_STATE_FINAL_N_LPS_WAIT					= 4'b0_01_1;	// core is waiting for transformation to complete
+	//
+	localparam	FSM_STATE_FINAL_SIGMA_LPS_TRIG			= 4'b0_10_0;	// core is triggering g0(h,Sigma) transformation
+	localparam	FSM_STATE_FINAL_SIGMA_LPS_WAIT			= 4'b0_10_1;	// core is waiting for transformation of complete
+	
+	
+		//
+		// FSM State Register and Core Ready Flag
+		//
+	reg	[ 3: 0]	fsm_state = FSM_STATE_IDLE;
+	assign ready = fsm_state[3];
+
+	
+		//
+		// FSM Transition Logic
+		//
+	always @(posedge clock) begin
+		//
+		case (fsm_state)
+			//
+			// init
+			//
+			FSM_STATE_IDLE: begin
+				if (!init && update)					fsm_state	<= FSM_STATE_UPDATE_ADDER_TRIG;
+				if (!init && !update && final)	fsm_state	<= FSM_STATE_FINAL_N_LPS_TRIG;
+			end
+			//
+			// update -> gN(h,m)
+			//
+			FSM_STATE_UPDATE_ADDER_TRIG:			fsm_state	<= FSM_STATE_UPDATE_LPS_TRIG;
+			FSM_STATE_UPDATE_LPS_TRIG:				fsm_state	<= FSM_STATE_UPDATE_LPS_WAIT;
+			FSM_STATE_UPDATE_LPS_WAIT:
+				if (lps_rdy_both)						fsm_state	<= round_count_not_done ? FSM_STATE_UPDATE_LPS_TRIG : FSM_STATE_UPDATE_ADDER_WAIT;
+			FSM_STATE_UPDATE_ADDER_WAIT:
+				if (adder_rdy_both)					fsm_state	<= FSM_STATE_IDLE;
+			//
+			// final -> g0(h,N)
+			//
+			FSM_STATE_FINAL_N_LPS_TRIG:			fsm_state	<= FSM_STATE_FINAL_N_LPS_WAIT;
+			FSM_STATE_FINAL_N_LPS_WAIT:
+				if (lps_rdy_both)						fsm_state	<= round_count_not_done ? FSM_STATE_FINAL_N_LPS_TRIG : FSM_STATE_FINAL_SIGMA_LPS_TRIG;
+			//
+			// final -> g0(h,Sigma)
+			//
+			FSM_STATE_FINAL_SIGMA_LPS_TRIG:		fsm_state	<= FSM_STATE_FINAL_SIGMA_LPS_WAIT;
+			FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+				if (lps_rdy_both)						fsm_state	<= round_count_not_done ? FSM_STATE_FINAL_SIGMA_LPS_TRIG : FSM_STATE_IDLE;
+			//
+			// default
+			//
+			default:										fsm_state	<= FSM_STATE_IDLE;
+			//
+		endcase
+		//
+	end
+	
+	
+		/*
+		 * Key calculation involves 12 round constants. These constants are stored in an array. The first key
+		 * (calculated during the first round) does not require a constant. New constant is preloaded during the last
+		 * cycle of LPS transformation. LPS cores have dedicated output flag indicating that operation is about to complete.
+		 * This flag is used as Clock Enable. Constants are preloaded during rounds 1-12 and are used during rounds 2-13.
+		 *
+		 */
+	
+		//
+		// Round Constants
+		//
+	wire	[511:0]	c_array_out;
+	
+	wire	c_array_ena_update		= (fsm_state == FSM_STATE_UPDATE_LPS_WAIT)		? 1 : 0;
+	wire	c_array_ena_final_n		= (fsm_state == FSM_STATE_FINAL_N_LPS_WAIT)		? 1 : 0;
+	wire	c_array_ena_final_sigma	= (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_WAIT)	? 1 : 0;
+	
+	wire	c_array_ena = lps_key_last && round_count_not_done && (c_array_ena_update || c_array_ena_final_n || c_array_ena_final_sigma);
+	
+	//
+	(* ROM_STYLE="BLOCK" *)
+	//	
+	streebog_rom_c_array c_array
+	(
+		.clk		(clock),
+		.ena		(c_array_ena),
+		.din		(round_count),
+		.dout		(c_array_out)
+	);
+	
+		/*
+		 * The following pieces of code take care of LPS and summation inputs and outputs, they also take care
+		 * of output digest register and corresponding valid flag.
+		 *
+		 */
+
+
+		//
+		// Internal State Control Logic
+		//
+	always @(posedge clock)
+		//
+		case (fsm_state)
+
+			FSM_STATE_IDLE: if (init) begin
+				h			<= (short_mode == 1'b1) ? STREEBOG_IV_256 : STREEBOG_IV_512;
+				N			<= {512{1'b0}};
+				Sigma		<= {512{1'b0}};
+			end
+			
+			FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) begin
+				N			<= adder_n_sum;
+				Sigma		<= adder_sigma_sum;
+			end
+
+			FSM_STATE_UPDATE_LPS_WAIT:
+				if (lps_key_rdy && !round_count_not_done)
+					h			<= lps_key_out ^ lps_data_out ^ h ^ block;
+
+			FSM_STATE_FINAL_N_LPS_WAIT:
+				if (lps_key_rdy && !round_count_not_done)
+					h			<= lps_key_out ^ lps_data_out ^ h ^ N;
+					
+		endcase
+	
+	
+		//
+		// Output Register Control Logic
+		//
+	always @(posedge clock)
+		//
+		case (fsm_state)
+		
+			FSM_STATE_IDLE: if (init) begin
+				digest_reg			<= {512{1'bX}};
+				digest_valid_reg	<= 1'b0;
+			end
+
+			FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+				if (lps_key_rdy && !round_count_not_done) begin
+					digest_reg			<= lps_key_out ^ lps_data_out ^ h ^ Sigma;
+					digest_valid_reg	<= 1'b1;
+				end
+
+		endcase
+		
+		
+		//
+		// Round Count Logic
+		//
+	always @(posedge clock)
+		//
+		case (fsm_state)
+		
+			FSM_STATE_IDLE:
+				if (update || final) round_count <= 4'd0;
+			
+			FSM_STATE_UPDATE_LPS_WAIT,
+			FSM_STATE_FINAL_N_LPS_WAIT,
+			FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+				if (lps_key_rdy) round_count <= round_count_not_done ? round_count + 1'b1 : 4'd0;
+			
+		endcase	
+		
+		
+		//
+		// Key and Data LPS Cores Logic
+		//
+	always @(posedge clock)
+		//
+		case (fsm_state)
+		
+			FSM_STATE_IDLE: if (!init) begin
+				if (update)					lps_key_in	<= h ^ N;
+				if (!update && final)	lps_key_in	<= h;
+			end
+
+			FSM_STATE_UPDATE_LPS_WAIT:
+				if (lps_key_rdy && round_count_not_done) begin
+					lps_key_in			<= lps_key_out ^ c_array_out;
+					lps_data_in 		<= lps_key_out ^ (round_count_active ? lps_data_out : block);
+				end
+
+			FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy) begin
+				lps_key_in			<= lps_key_out ^ (round_count_not_done ? c_array_out : lps_data_out ^ h ^ N);
+				lps_data_in 		<= round_count_not_done ? lps_key_out ^ (round_count_active ? lps_data_out : N) : {512{1'bX}};
+			end
+
+			FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+				if (lps_key_rdy && round_count_not_done) begin
+					lps_key_in			<= lps_key_out ^ c_array_out;
+					lps_data_in 		<= round_count_active ? lps_key_out ^ lps_data_out : lps_key_out ^ Sigma;
+				end 
+
+		endcase
+		
+
+endmodule
diff --git a/streebog_hash/streebog_rom_a_matrix.v b/streebog_hash/streebog_rom_a_matrix.v
new file mode 100644
index 0000000..ba3607b
--- /dev/null
+++ b/streebog_hash/streebog_rom_a_matrix.v
@@ -0,0 +1,152 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_a_matrix
+	(
+		clk,
+		din, dout
+	);
+	
+	
+		//
+		// Ports
+		//
+	input		wire				clk;
+	input		wire	[ 5: 0]	din;
+	output	wire	[63: 0]	dout;
+	
+	
+		//
+		// Output Register
+		//
+	reg	[63: 0]	dout_reg;
+	assign dout = dout_reg;
+	
+	
+		//
+		// A Transformation Matrix
+		//
+		
+		/*
+		 * Original matrix from the standard was transformed to allow efficient implementation of
+		 * hardware multiplication. The following matrix is effectively the transposed version
+		 * of the original matrix A with reversed row order.
+		 *
+		 * Original 64x64 bit matrix from the standard has the following form:
+		 *
+		 * a[i,j] is 1-bit matrix element
+		 *
+		 * A_row(i) is 64-bit row of matrix
+		 * A_col(j) is 64-bit column of matrix
+		 *
+		 *
+		 *    A_col(0)  A_col(1)       A_col(62) A_col(63)
+		 *       |         |              |         |
+		 *       |         |              |         |
+		 * +----------------------------------------------+
+		 * | a[ 0,63]  a[ 0,62]  ...  a[ 0, 1]  a[ 0, 0]  | --A_row(0)
+		 * | a[ 1,63]  a[ 1,62]  ...  a[ 1, 1]  a[ 1, 0]  | --A_row(1)
+		 * |                      ...                     |
+       * | a[62,63]  a[62,62]  ...  a[62, 1]  a[62, 0]  | --A_row(62)
+		 * | a[63,63]  a[63,62]  ...  a[63, 1]  a[63, 0]  | --A_row(63)
+		 * +----------------------------------------------+
+		 *
+		 *
+		 * A_row(0)...A_row(63) are given in the original specification. Instead of row vectors we need a set of
+		 * column vectors A_col(0)...A_col(63). A_col() can be obtained by transposing A_row().
+		 *
+		 *
+		 *    A_row(0)  A_row(1)       A_row(62) A_row(63)
+		 *       |         |              |         |
+		 *       |         |              |         |
+		 * +---------------------------------------------+
+		 * | a[ 0,63]  a[ 1,63]  ...  a[62,63]  a[63,63] | --A_col(0)
+		 * | a[ 0,62]  a[ 1,62]  ...  a[62,62]  a[63,62] | --A_col(1)
+		 * |                     ...                     |
+		 * | a[ 0, 1]  a[ 1, 1]  ...  a[62, 1]  a[63, 1] | --A_col(62)
+		 * | a[ 0, 0]  a[ 1, 0]  ...  a[62, 0]  a[63, 0] | --A_col(63)
+		 * +---------------------------------------------+
+		 *
+		 *
+		 * The only problem with A_col() is that original 64-bit A_row() values in the standard are written from MSB to LSB. That implies that
+		 * original matrix columns are numbered from 63 to 0, while matrix rows are numbered from 0 to 63. Because of that we need to reverse
+		 * row order after transposition. Original matrix had element a[0,0] in A_row(0), but after transposition element a[0,0] turns out
+		 * to be in A_col(63), not in A_col(0). Because of that addresses inside of case() below are reversed. This effectively reverses
+		 * the order in which A_col() follow.
+		 *
+		 */
+		
+	always @(posedge clk) begin
+		//
+		case (din)
+			//
+			6'h3F: dout_reg <= 64'hB18285C0BA4F9506;
+			6'h3E: dout_reg <= 64'h584142605DA7CA83;
+			6'h3D: dout_reg <= 64'h2CA021302E53E5C1;
+			6'h3C: dout_reg <= 64'h16509098172972E0;
+			6'h3B: dout_reg <= 64'hBA2A4D8C315B2C76;
+			6'h3A: dout_reg <= 64'hEC172386A2E2833D;
+			6'h39: dout_reg <= 64'hC7091403EB3E5418;
+			6'h38: dout_reg <= 64'h63040A81759F2A0C;
+			6'h37: dout_reg <= 64'h025DA344601EA1B8;
+			6'h36: dout_reg <= 64'h012ED1A2308FD05C;
+			6'h35: dout_reg <= 64'h8017685198C7E8AE;
+			6'h34: dout_reg <= 64'h408BB4284C63F457;
+			6'h33: dout_reg <= 64'h2218F9D046AFDB13;
+			6'h32: dout_reg <= 64'h13515FACC3C94CB1;
+			6'h31: dout_reg <= 64'h0B758C12817A87E0;
+			6'h30: dout_reg <= 64'h05BA4689C03D4370;
+			6'h2F: dout_reg <= 64'hA1F0C986411102CC;
+			6'h2E: dout_reg <= 64'hD0F864C3A0080166;
+			6'h2D: dout_reg <= 64'hE87CB2E1508480B3;
+			6'h2C: dout_reg <= 64'hF4BED9F0A8C24059;
+			6'h2B: dout_reg <= 64'hDB2F257E95702260;
+			6'h2A: dout_reg <= 64'h4C67DB398BA913FC;
+			6'h29: dout_reg <= 64'h87C3241A04450B32;
+			6'h28: dout_reg <= 64'h43E1920D82220599;
+			6'h27: dout_reg <= 64'hE0802541868B1232;
+			6'h26: dout_reg <= 64'h704012A0C3458999;
+			6'h25: dout_reg <= 64'hB8208950E12244CC;
+			6'h24: dout_reg <= 64'h5C1044A8F011A266;
+			6'h23: dout_reg <= 64'h4E0887957E834381;
+			6'h22: dout_reg <= 64'hC704668B394AB3F2;
+			6'h21: dout_reg <= 64'h830296041A2E4BCB;
+			6'h20: dout_reg <= 64'hC1014B820D172565;
+			6'h1F: dout_reg <= 64'h7DD80C6D98218914;
+			6'h1E: dout_reg <= 64'h3E6C06B64C90440A;
+			6'h1D: dout_reg <= 64'h9F36835B26C8A285;
+			6'h1C: dout_reg <= 64'h4F1BC1AD93E45142;
+			6'h1B: dout_reg <= 64'hDA55ECBBD1D3A135;
+			6'h1A: dout_reg <= 64'h10727AB0F048598E;
+			6'h19: dout_reg <= 64'hF56131B560852553;
+			6'h18: dout_reg <= 64'hFAB018DA30421229;
+			6'h17: dout_reg <= 64'h82B12139880C7F01;
+			6'h16: dout_reg <= 64'h4158909CC4063F80;
+			6'h15: dout_reg <= 64'hA02CC8CEE2831F40;
+			6'h14: dout_reg <= 64'h5016E46771C10F20;
+			6'h13: dout_reg <= 64'h2ABAD30AB0ECF811;
+			6'h12: dout_reg <= 64'h17EC48BC507A0309;
+			6'h11: dout_reg <= 64'h09C785E72031FE05;
+			6'h10: dout_reg <= 64'h046342731018FF02;
+			6'h0F: dout_reg <= 64'h91E9E113A54E2B57;
+			6'h0E: dout_reg <= 64'h4874F009522715AB;
+			6'h0D: dout_reg <= 64'hA43AF804A9138A55;
+			6'h0C: dout_reg <= 64'hD21D7C825409C5AA;
+			6'h0B: dout_reg <= 64'h78E75F528F4A4982;
+			6'h0A: dout_reg <= 64'hAD9ACEBA62EB0F16;
+			6'h09: dout_reg <= 64'h47A4864E943BAC5C;
+			6'h08: dout_reg <= 64'h23D2C3274A9D56AE;
+			6'h07: dout_reg <= 64'h06016A5C89D498B1;
+			6'h06: dout_reg <= 64'h8380B5AE446A4C58;
+			6'h05: dout_reg <= 64'hC140DA57A2B5262C;
+			6'h04: dout_reg <= 64'hE0206DAB51DA9316;
+			6'h03: dout_reg <= 64'h7611DC09A1B9D1BA;
+			6'h02: dout_reg <= 64'h3D0984585908F0EC;
+			6'h01: dout_reg <= 64'h1805A870255060C7;
+			6'h00: dout_reg <= 64'h0C02D4B812A83063;
+			//
+		endcase // case(din)
+		//
+	end // always @(posedge clk)
+	
+	
+endmodule
diff --git a/streebog_hash/streebog_rom_c_array.v b/streebog_hash/streebog_rom_c_array.v
new file mode 100644
index 0000000..e31b5c0
--- /dev/null
+++ b/streebog_hash/streebog_rom_c_array.v
@@ -0,0 +1,58 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_c_array
+	(
+		clk, ena,
+		din, dout
+	);
+	
+	
+		//
+		// Ports
+		//
+	input		wire				clk;
+	input		wire				ena;
+	input		wire	[  3:0]	din;
+	output	wire	[511:0]	dout;
+	
+	
+		//
+		// Output Register
+		//
+	reg	[511:0]	dout_reg;
+	assign dout = dout_reg;
+	
+	
+		//
+		// C Round Constants Array
+		//
+	always @(posedge clk) begin
+		//
+		if (ena) begin
+			//
+			case (din)
+				//
+				4'h0: dout_reg <= 512'hB1085BDA1ECADAE9EBCB2F81C0657C1F2F6A76432E45D016714EB88D7585C4FC4B7CE09192676901A2422A08A460D31505767436CC744D23DD806559F2A64507;
+				4'h1: dout_reg <= 512'h6FA3B58AA99D2F1A4FE39D460F70B5D7F3FEEA720A232B9861D55E0F16B501319AB5176B12D699585CB561C2DB0AA7CA55DDA21BD7CBCD56E679047021B19BB7;
+				4'h2: dout_reg <= 512'hF574DCAC2BCE2FC70A39FC286A3D843506F15E5F529C1F8BF2EA7514B1297B7BD3E20FE490359EB1C1C93A376062DB09C2B6F443867ADB31991E96F50ABA0AB2;
+				4'h3: dout_reg <= 512'hEF1FDFB3E81566D2F948E1A05D71E4DD488E857E335C3C7D9D721CAD685E353FA9D72C82ED03D675D8B71333935203BE3453EAA193E837F1220CBEBC84E3D12E;
+				//
+				4'h4: dout_reg <= 512'h4BEA6BACAD4747999A3F410C6CA923637F151C1F1686104A359E35D7800FFFBDBFCD1747253AF5A3DFFF00B723271A167A56A27EA9EA63F5601758FD7C6CFE57;
+				4'h5: dout_reg <= 512'hAE4FAEAE1D3AD3D96FA4C33B7A3039C02D66C4F95142A46C187F9AB49AF08EC6CFFAA6B71C9AB7B40AF21F66C2BEC6B6BF71C57236904F35FA68407A46647D6E;
+				4'h6: dout_reg <= 512'hF4C70E16EEAAC5EC51AC86FEBF240954399EC6C7E6BF87C9D3473E33197A93C90992ABC52D822C3706476983284A05043517454CA23C4AF38886564D3A14D493;
+				4'h7: dout_reg <= 512'h9B1F5B424D93C9A703E7AA020C6E41414EB7F8719C36DE1E89B4443B4DDBC49AF4892BCB929B069069D18D2BD1A5C42F36ACC2355951A8D9A47F0DD4BF02E71E;
+				//
+				4'h8: dout_reg <= 512'h378F5A541631229B944C9AD8EC165FDE3A7D3A1B258942243CD955B7E00D0984800A440BDBB2CEB17B2B8A9AA6079C540E38DC92CB1F2A607261445183235ADB;
+				4'h9: dout_reg <= 512'hABBEDEA680056F52382AE548B2E4F3F38941E71CFF8A78DB1FFFE18A1B3361039FE76702AF69334B7A1E6C303B7652F43698FAD1153BB6C374B4C7FB98459CED;
+				4'hA: dout_reg <= 512'h7BCD9ED0EFC889FB3002C6CD635AFE94D8FA6BBBEBAB076120018021148466798A1D71EFEA48B9CAEFBACD1D7D476E98DEA2594AC06FD85D6BCAA4CD81F32D1B;
+				4'hB: dout_reg <= 512'h378EE767F11631BAD21380B00449B17ACDA43C32BCDF1D77F82012D430219F9B5D80EF9D1891CC86E71DA4AA88E12852FAF417D5D9B21B9948BC924AF11BD720;
+				//
+				default: dout_reg <= {512{1'bX}};
+				//
+			endcase // case (din)
+			//
+		end // if (ena)
+		//
+	end // always @(posedge clk)
+	
+endmodule
diff --git a/streebog_hash/streebog_rom_s_table.v b/streebog_hash/streebog_rom_s_table.v
new file mode 100644
index 0000000..9779b0f
--- /dev/null
+++ b/streebog_hash/streebog_rom_s_table.v
@@ -0,0 +1,299 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_s_table
+	(
+		clk, ena,
+		din, dout
+	);
+	
+	
+		//
+		// Ports
+		//
+	input		wire				clk;
+	input		wire				ena;
+	input		wire	[ 7: 0]	din;
+	output	wire	[ 7: 0]	dout;
+	
+	
+		//
+		// Output Register
+		//
+	reg	[ 7: 0]	dout_reg;
+	assign dout = dout_reg;
+	
+	
+		//
+		// S Transformation Lookup Table
+		//
+	always @(posedge clk) begin
+		//
+		if (ena) begin
+			//
+			case (din)
+				//
+				8'h00: dout_reg <= 8'hFC;
+				8'h01: dout_reg <= 8'hEE;
+				8'h02: dout_reg <= 8'hDD;
+				8'h03: dout_reg <= 8'h11;
+				8'h04: dout_reg <= 8'hCF;
+				8'h05: dout_reg <= 8'h6E;
+				8'h06: dout_reg <= 8'h31;
+				8'h07: dout_reg <= 8'h16;
+				8'h08: dout_reg <= 8'hFB;
+				8'h09: dout_reg <= 8'hC4;
+				8'h0A: dout_reg <= 8'hFA;
+				8'h0B: dout_reg <= 8'hDA;
+				8'h0C: dout_reg <= 8'h23;
+				8'h0D: dout_reg <= 8'hC5;
+				8'h0E: dout_reg <= 8'h04;
+				8'h0F: dout_reg <= 8'h4D;
+				8'h10: dout_reg <= 8'hE9;
+				8'h11: dout_reg <= 8'h77;
+				8'h12: dout_reg <= 8'hF0;
+				8'h13: dout_reg <= 8'hDB;
+				8'h14: dout_reg <= 8'h93;
+				8'h15: dout_reg <= 8'h2E;
+				8'h16: dout_reg <= 8'h99;
+				8'h17: dout_reg <= 8'hBA;
+				8'h18: dout_reg <= 8'h17;
+				8'h19: dout_reg <= 8'h36;
+				8'h1A: dout_reg <= 8'hF1;
+				8'h1B: dout_reg <= 8'hBB;
+				8'h1C: dout_reg <= 8'h14;
+				8'h1D: dout_reg <= 8'hCD;
+				8'h1E: dout_reg <= 8'h5F;
+				8'h1F: dout_reg <= 8'hC1;
+				8'h20: dout_reg <= 8'hF9;
+				8'h21: dout_reg <= 8'h18;
+				8'h22: dout_reg <= 8'h65;
+				8'h23: dout_reg <= 8'h5A;
+				8'h24: dout_reg <= 8'hE2;
+				8'h25: dout_reg <= 8'h5C;
+				8'h26: dout_reg <= 8'hEF;
+				8'h27: dout_reg <= 8'h21;
+				8'h28: dout_reg <= 8'h81;
+				8'h29: dout_reg <= 8'h1C;
+				8'h2A: dout_reg <= 8'h3C;
+				8'h2B: dout_reg <= 8'h42;
+				8'h2C: dout_reg <= 8'h8B;
+				8'h2D: dout_reg <= 8'h01;
+				8'h2E: dout_reg <= 8'h8E;
+				8'h2F: dout_reg <= 8'h4F;
+				8'h30: dout_reg <= 8'h05;
+				8'h31: dout_reg <= 8'h84;
+				8'h32: dout_reg <= 8'h02;
+				8'h33: dout_reg <= 8'hAE;
+				8'h34: dout_reg <= 8'hE3;
+				8'h35: dout_reg <= 8'h6A;
+				8'h36: dout_reg <= 8'h8F;
+				8'h37: dout_reg <= 8'hA0;
+				8'h38: dout_reg <= 8'h06;
+				8'h39: dout_reg <= 8'h0B;
+				8'h3A: dout_reg <= 8'hED;
+				8'h3B: dout_reg <= 8'h98;
+				8'h3C: dout_reg <= 8'h7F;
+				8'h3D: dout_reg <= 8'hD4;
+				8'h3E: dout_reg <= 8'hD3;
+				8'h3F: dout_reg <= 8'h1F;
+				8'h40: dout_reg <= 8'hEB;
+				8'h41: dout_reg <= 8'h34;
+				8'h42: dout_reg <= 8'h2C;
+				8'h43: dout_reg <= 8'h51;
+				8'h44: dout_reg <= 8'hEA;
+				8'h45: dout_reg <= 8'hC8;
+				8'h46: dout_reg <= 8'h48;
+				8'h47: dout_reg <= 8'hAB;
+				8'h48: dout_reg <= 8'hF2;
+				8'h49: dout_reg <= 8'h2A;
+				8'h4A: dout_reg <= 8'h68;
+				8'h4B: dout_reg <= 8'hA2;
+				8'h4C: dout_reg <= 8'hFD;
+				8'h4D: dout_reg <= 8'h3A;
+				8'h4E: dout_reg <= 8'hCE;
+				8'h4F: dout_reg <= 8'hCC;
+				8'h50: dout_reg <= 8'hB5;
+				8'h51: dout_reg <= 8'h70;
+				8'h52: dout_reg <= 8'h0E;
+				8'h53: dout_reg <= 8'h56;
+				8'h54: dout_reg <= 8'h08;
+				8'h55: dout_reg <= 8'h0C;
+				8'h56: dout_reg <= 8'h76;
+				8'h57: dout_reg <= 8'h12;
+				8'h58: dout_reg <= 8'hBF;
+				8'h59: dout_reg <= 8'h72;
+				8'h5A: dout_reg <= 8'h13;
+				8'h5B: dout_reg <= 8'h47;
+				8'h5C: dout_reg <= 8'h9C;
+				8'h5D: dout_reg <= 8'hB7;
+				8'h5E: dout_reg <= 8'h5D;
+				8'h5F: dout_reg <= 8'h87;
+				8'h60: dout_reg <= 8'h15;
+				8'h61: dout_reg <= 8'hA1;
+				8'h62: dout_reg <= 8'h96;
+				8'h63: dout_reg <= 8'h29;
+				8'h64: dout_reg <= 8'h10;
+				8'h65: dout_reg <= 8'h7B;
+				8'h66: dout_reg <= 8'h9A;
+				8'h67: dout_reg <= 8'hC7;
+				8'h68: dout_reg <= 8'hF3;
+				8'h69: dout_reg <= 8'h91;
+				8'h6A: dout_reg <= 8'h78;
+				8'h6B: dout_reg <= 8'h6F;
+				8'h6C: dout_reg <= 8'h9D;
+				8'h6D: dout_reg <= 8'h9E;
+				8'h6E: dout_reg <= 8'hB2;
+				8'h6F: dout_reg <= 8'hB1;
+				8'h70: dout_reg <= 8'h32;
+				8'h71: dout_reg <= 8'h75;
+				8'h72: dout_reg <= 8'h19;
+				8'h73: dout_reg <= 8'h3D;
+				8'h74: dout_reg <= 8'hFF;
+				8'h75: dout_reg <= 8'h35;
+				8'h76: dout_reg <= 8'h8A;
+				8'h77: dout_reg <= 8'h7E;
+				8'h78: dout_reg <= 8'h6D;
+				8'h79: dout_reg <= 8'h54;
+				8'h7A: dout_reg <= 8'hC6;
+				8'h7B: dout_reg <= 8'h80;
+				8'h7C: dout_reg <= 8'hC3;
+				8'h7D: dout_reg <= 8'hBD;
+				8'h7E: dout_reg <= 8'h0D;
+				8'h7F: dout_reg <= 8'h57;
+				8'h80: dout_reg <= 8'hDF;
+				8'h81: dout_reg <= 8'hF5;
+				8'h82: dout_reg <= 8'h24;
+				8'h83: dout_reg <= 8'hA9;
+				8'h84: dout_reg <= 8'h3E;
+				8'h85: dout_reg <= 8'hA8;
+				8'h86: dout_reg <= 8'h43;
+				8'h87: dout_reg <= 8'hC9;
+				8'h88: dout_reg <= 8'hD7;
+				8'h89: dout_reg <= 8'h79;
+				8'h8A: dout_reg <= 8'hD6;
+				8'h8B: dout_reg <= 8'hF6;
+				8'h8C: dout_reg <= 8'h7C;
+				8'h8D: dout_reg <= 8'h22;
+				8'h8E: dout_reg <= 8'hB9;
+				8'h8F: dout_reg <= 8'h03;
+				8'h90: dout_reg <= 8'hE0;
+				8'h91: dout_reg <= 8'h0F;
+				8'h92: dout_reg <= 8'hEC;
+				8'h93: dout_reg <= 8'hDE;
+				8'h94: dout_reg <= 8'h7A;
+				8'h95: dout_reg <= 8'h94;
+				8'h96: dout_reg <= 8'hB0;
+				8'h97: dout_reg <= 8'hBC;
+				8'h98: dout_reg <= 8'hDC;
+				8'h99: dout_reg <= 8'hE8;
+				8'h9A: dout_reg <= 8'h28;
+				8'h9B: dout_reg <= 8'h50;
+				8'h9C: dout_reg <= 8'h4E;
+				8'h9D: dout_reg <= 8'h33;
+				8'h9E: dout_reg <= 8'h0A;
+				8'h9F: dout_reg <= 8'h4A;
+				8'hA0: dout_reg <= 8'hA7;
+				8'hA1: dout_reg <= 8'h97;
+				8'hA2: dout_reg <= 8'h60;
+				8'hA3: dout_reg <= 8'h73;
+				8'hA4: dout_reg <= 8'h1E;
+				8'hA5: dout_reg <= 8'h00;
+				8'hA6: dout_reg <= 8'h62;
+				8'hA7: dout_reg <= 8'h44;
+				8'hA8: dout_reg <= 8'h1A;
+				8'hA9: dout_reg <= 8'hB8;
+				8'hAA: dout_reg <= 8'h38;
+				8'hAB: dout_reg <= 8'h82;
+				8'hAC: dout_reg <= 8'h64;
+				8'hAD: dout_reg <= 8'h9F;
+				8'hAE: dout_reg <= 8'h26;
+				8'hAF: dout_reg <= 8'h41;
+				8'hB0: dout_reg <= 8'hAD;
+				8'hB1: dout_reg <= 8'h45;
+				8'hB2: dout_reg <= 8'h46;
+				8'hB3: dout_reg <= 8'h92;
+				8'hB4: dout_reg <= 8'h27;
+				8'hB5: dout_reg <= 8'h5E;
+				8'hB6: dout_reg <= 8'h55;
+				8'hB7: dout_reg <= 8'h2F;
+				8'hB8: dout_reg <= 8'h8C;
+				8'hB9: dout_reg <= 8'hA3;
+				8'hBA: dout_reg <= 8'hA5;
+				8'hBB: dout_reg <= 8'h7D;
+				8'hBC: dout_reg <= 8'h69;
+				8'hBD: dout_reg <= 8'hD5;
+				8'hBE: dout_reg <= 8'h95;
+				8'hBF: dout_reg <= 8'h3B;
+				8'hC0: dout_reg <= 8'h07;
+				8'hC1: dout_reg <= 8'h58;
+				8'hC2: dout_reg <= 8'hB3;
+				8'hC3: dout_reg <= 8'h40;
+				8'hC4: dout_reg <= 8'h86;
+				8'hC5: dout_reg <= 8'hAC;
+				8'hC6: dout_reg <= 8'h1D;
+				8'hC7: dout_reg <= 8'hF7;
+				8'hC8: dout_reg <= 8'h30;
+				8'hC9: dout_reg <= 8'h37;
+				8'hCA: dout_reg <= 8'h6B;
+				8'hCB: dout_reg <= 8'hE4;
+				8'hCC: dout_reg <= 8'h88;
+				8'hCD: dout_reg <= 8'hD9;
+				8'hCE: dout_reg <= 8'hE7;
+				8'hCF: dout_reg <= 8'h89;
+				8'hD0: dout_reg <= 8'hE1;
+				8'hD1: dout_reg <= 8'h1B;
+				8'hD2: dout_reg <= 8'h83;
+				8'hD3: dout_reg <= 8'h49;
+				8'hD4: dout_reg <= 8'h4C;
+				8'hD5: dout_reg <= 8'h3F;
+				8'hD6: dout_reg <= 8'hF8;
+				8'hD7: dout_reg <= 8'hFE;
+				8'hD8: dout_reg <= 8'h8D;
+				8'hD9: dout_reg <= 8'h53;
+				8'hDA: dout_reg <= 8'hAA;
+				8'hDB: dout_reg <= 8'h90;
+				8'hDC: dout_reg <= 8'hCA;
+				8'hDD: dout_reg <= 8'hD8;
+				8'hDE: dout_reg <= 8'h85;
+				8'hDF: dout_reg <= 8'h61;
+				8'hE0: dout_reg <= 8'h20;
+				8'hE1: dout_reg <= 8'h71;
+				8'hE2: dout_reg <= 8'h67;
+				8'hE3: dout_reg <= 8'hA4;
+				8'hE4: dout_reg <= 8'h2D;
+				8'hE5: dout_reg <= 8'h2B;
+				8'hE6: dout_reg <= 8'h09;
+				8'hE7: dout_reg <= 8'h5B;
+				8'hE8: dout_reg <= 8'hCB;
+				8'hE9: dout_reg <= 8'h9B;
+				8'hEA: dout_reg <= 8'h25;
+				8'hEB: dout_reg <= 8'hD0;
+				8'hEC: dout_reg <= 8'hBE;
+				8'hED: dout_reg <= 8'hE5;
+				8'hEE: dout_reg <= 8'h6C;
+				8'hEF: dout_reg <= 8'h52;
+				8'hF0: dout_reg <= 8'h59;
+				8'hF1: dout_reg <= 8'hA6;
+				8'hF2: dout_reg <= 8'h74;
+				8'hF3: dout_reg <= 8'hD2;
+				8'hF4: dout_reg <= 8'hE6;
+				8'hF5: dout_reg <= 8'hF4;
+				8'hF6: dout_reg <= 8'hB4;
+				8'hF7: dout_reg <= 8'hC0;
+				8'hF8: dout_reg <= 8'hD1;
+				8'hF9: dout_reg <= 8'h66;
+				8'hFA: dout_reg <= 8'hAF;
+				8'hFB: dout_reg <= 8'hC2;
+				8'hFC: dout_reg <= 8'h39;
+				8'hFD: dout_reg <= 8'h4B;
+				8'hFE: dout_reg <= 8'h63;
+				8'hFF: dout_reg <= 8'hB6;
+				//
+			endcase // case (din)
+			//
+		end // if (ena)
+		//
+	end // always @(posedge clk)
+	
+	
+endmodule
diff --git a/streebog_hash/tb/streebog_tb.v b/streebog_hash/tb/streebog_tb.v
new file mode 100644
index 0000000..291f11c
--- /dev/null
+++ b/streebog_hash/tb/streebog_tb.v
@@ -0,0 +1,198 @@
+`timescale 1ns / 1ps
+
+module streebog_tb;
+
+
+	localparam	STREEBOG_MODE_SHORT	= 1;
+	localparam	STREEBOG_MODE_LONG	= 0;
+	
+		// short message that fits into one block
+	localparam	[511:0]	MSG_SINGLE						= 512'h01323130393837363534333231303938373635343332313039383736353433323130393837363534333231303938373635343332313039383736353433323130;
+	
+		// length of short message in bits
+	localparam	[  9:0]	MSG_SINGLE_LENGTH				= 10'd504;
+	
+		// correct 512-bit digest of short message
+	localparam	[511:0]	MSG_SINGLE_DIGEST_LONG		= 512'h486f64c1917879417fef082b3381a4e211c324f074654c38823a7b76f830ad00fa1fbae42b1285c0352f227524bc9ab16254288dd6863dccd5b9f54a1ad0541b;
+	
+		// correct 256-bit digest of short message
+	localparam	[255:0]	MSG_SINGLE_DIGEST_SHORT		= 256'h00557be5e584fd52a449b16b0251d05d27f94ab76cbaa6da890b59d8ef1e159d;
+	
+	
+		// first block of long message
+	localparam	[511:0]	MSG_DOUBLE_FIRST				= 512'hfbeafaebef20fffbf0e1e0f0f520e0ed20e8ece0ebe5f0f2f120fff0eeec20f120faf2fee5e2202ce8f6f3ede220e8e6eee1e8f0f2d1202ce8f0f2e5e220e5d1;
+	
+		// second block of long message
+	localparam	[511:0]	MSG_DOUBLE_SECOND				= 512'h0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001fbe2e5f0eee3c820;
+	
+		// length of first part of long message in bits
+	localparam	[  9:0]	MSG_DOUBLE_FIRST_LENGTH		= 10'd512;
+	
+		// length of second part of long message in bits
+	localparam	[  9:0]	MSG_DOUBLE_SECOND_LENGTH	= 10'd64;
+	
+		// correct 512-bit digest of long message
+	localparam	[511:0]	MSG_DOUBLE_DIGEST_LONG		= 512'h28fbc9bada033b1460642bdcddb90c3fb3e56c497ccd0f62b8a2ad4935e85f037613966de4ee00531ae60f3b5a47f8dae06915d5f2f194996fcabf2622e6881e;
+	
+		// correct 256-bit digest of short message
+	localparam	[511:0]	MSG_DOUBLE_DIGEST_SHORT		= 256'h508f7e553c06501d749a66fc28c6cac0b005746d97537fa85d9e40904efed29d;
+	
+	
+		//
+		// Inputs
+		//
+	reg				clock;
+	reg	[511:0]	block;
+	reg	[  9:0]	block_length;
+	reg				init		= 0;
+	reg				update	= 0;
+	reg				final		= 0;
+	reg				short_mode;
+
+
+		//
+		// Outputs
+		//
+	wire	[511:0]	digest;
+	wire				digest_valid;
+	wire				ready;
+
+
+		//
+		// UUT
+		//
+	streebog_hash_top uut
+	(
+		.clock			(clock), 
+		
+		.block			(block), 
+		.block_length	(block_length), 
+		.init				(init), 
+		.update			(update), 
+		.final			(final), 
+		.short_mode		(short_mode), 
+		.digest			(digest), 
+		.digest_valid	(digest_valid), 
+		.ready			(ready)
+	);
+	
+		//
+		// Clock
+		//
+	initial clock = 1'b0;
+	always #5 clock = ~clock;
+	
+	reg	[511:0]	hash;
+	wire	[255:0]	hash_short = hash[511:256];
+
+	initial begin
+		//
+		#100;
+		//
+		$display("Checking 512-bit mode on short message...");
+		//
+		streebog_init(STREEBOG_MODE_LONG);
+		streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH);
+		streebog_update();
+		streebog_final();
+		//
+		if (hash == MSG_SINGLE_DIGEST_LONG) $display("OK");
+		else $display("ERROR: hash == %0128h", hash);
+		//
+		#100;
+		//
+		$display("Checking 256-bit mode on short message...");
+		//
+		streebog_init(STREEBOG_MODE_SHORT);
+		streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH);
+		streebog_update();
+		streebog_final();
+		//
+		if (hash_short == MSG_SINGLE_DIGEST_SHORT) $display("OK");
+		else $display("ERROR: hash_short == %064h", hash_short);
+		//
+		#100;
+		//
+		$display("Checking 512-bit mode on long message...");
+		//
+		streebog_init(STREEBOG_MODE_LONG);
+		streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH);
+		streebog_update();
+		streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH);
+		streebog_update();		
+		streebog_final();
+		//
+		if (hash == MSG_DOUBLE_DIGEST_LONG) $display("OK");
+		else $display("ERROR: hash == %0128h", hash);		
+		//
+		#100;
+		//
+		$display("Checking 256-bit mode on long message...");
+		//
+		streebog_init(STREEBOG_MODE_SHORT);
+		streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH);
+		streebog_update();
+		streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH);
+		streebog_update();		
+		streebog_final();
+		//
+		if (hash_short == MSG_DOUBLE_DIGEST_SHORT) $display("OK");
+		else $display("ERROR: hash_short == %064h", hash_short);
+		//
+		#100;
+		//
+		$finish;
+	end
+      
+		
+	task streebog_init;
+		input	use_short_mode;
+		begin
+			short_mode	= use_short_mode;
+			init			= 1;
+			#10;
+			init			= 0;
+			#10;
+		end
+	endtask
+	
+	
+	task streebog_set_block;
+		input		[511:0]	new_block;
+		input		[  9:0]	new_block_length;
+		begin
+			block				= new_block;
+			block_length	= new_block_length;
+			
+		end
+	endtask;
+	
+	
+	task streebog_update;
+		begin
+			update	= 1;
+			#10;
+			update	= 0;
+			#10
+			while (!ready) #10;
+			#10;
+		end
+	endtask
+	
+	
+	task streebog_final;
+		begin
+			final		= 1;
+			#10;
+			final		= 0;
+			#10
+			while (!digest_valid) #10;
+			hash = digest;
+			#10;
+			while (!ready) #10;
+			#10;
+		end
+	endtask
+	
+endmodule
+
diff --git a/streebog_wrapper.v b/streebog_wrapper.v
new file mode 100644
index 0000000..a2ef47d
--- /dev/null
+++ b/streebog_wrapper.v
@@ -0,0 +1,241 @@
+module streebog_wrapper
+	(
+		input		wire           clk,
+		input		wire           rst,
+
+		input		wire           cs,
+		input		wire           we,
+
+		input		wire  [ 7: 0]	address,
+		input		wire  [31: 0]	write_data,
+		output	wire	[31: 0]	read_data
+	);
+
+	  //----------------------------------------------------------------
+	  // Internal constant and parameter definitions.
+	  //----------------------------------------------------------------
+	localparam ADDR_NAME0		= 8'h00;
+	localparam ADDR_NAME1		= 8'h01;
+	localparam ADDR_VERSION		= 8'h02;
+
+	localparam ADDR_CTRL			= 8'h08;		// {short, final, update, init}
+	localparam ADDR_STATUS		= 8'h09;		// {valid, ready}
+	localparam ADDR_BLOCK_BITS	= 8'h0a;		// block length in bits
+	localparam ADDR_MODE			= 8'h0b;		// 0=long (512-bit) mode, 1=short (256-bit) mode
+
+	localparam ADDR_BLOCK0		= 8'h10;
+	localparam ADDR_BLOCK1		= 8'h11;
+	localparam ADDR_BLOCK2		= 8'h12;
+	localparam ADDR_BLOCK3		= 8'h13;
+	localparam ADDR_BLOCK4		= 8'h14;
+	localparam ADDR_BLOCK5		= 8'h15;
+	localparam ADDR_BLOCK6		= 8'h16;
+	localparam ADDR_BLOCK7		= 8'h17;
+	localparam ADDR_BLOCK8		= 8'h18;
+	localparam ADDR_BLOCK9		= 8'h19;
+	localparam ADDR_BLOCK10		= 8'h1a;
+	localparam ADDR_BLOCK11		= 8'h1b;
+	localparam ADDR_BLOCK12		= 8'h1c;
+	localparam ADDR_BLOCK13		= 8'h1d;
+	localparam ADDR_BLOCK14		= 8'h1e;
+	localparam ADDR_BLOCK15		= 8'h1f;
+
+	localparam ADDR_DIGEST0		= 8'h20;
+	localparam ADDR_DIGEST1		= 8'h21;
+	localparam ADDR_DIGEST2		= 8'h22;
+	localparam ADDR_DIGEST3		= 8'h23;
+	localparam ADDR_DIGEST4		= 8'h24;
+	localparam ADDR_DIGEST5		= 8'h25;
+	localparam ADDR_DIGEST6		= 8'h26;
+	localparam ADDR_DIGEST7		= 8'h27;
+	localparam ADDR_DIGEST8		= 8'h28;
+	localparam ADDR_DIGEST9		= 8'h29;
+	localparam ADDR_DIGEST10	= 8'h2a;
+	localparam ADDR_DIGEST11	= 8'h2b;
+	localparam ADDR_DIGEST12	= 8'h2c;
+	localparam ADDR_DIGEST13	= 8'h2d;
+	localparam ADDR_DIGEST14	= 8'h2e;
+	localparam ADDR_DIGEST15	= 8'h2f;
+
+
+	localparam CTRL_INIT_BIT		= 0;
+	localparam CTRL_UPDATE_BIT		= 1;
+	localparam CTRL_FINAL_BIT		= 2;
+
+	localparam STATUS_READY_BIT	= 0;
+	localparam STATUS_VALID_BIT	= 1;
+
+	localparam CORE_NAME0     = 32'h73747265;	// "stre"
+	localparam CORE_NAME1     = 32'h65626F67;	// "ebog"
+	localparam CORE_VERSION   = 32'h302E3130;	// "0.10"
+
+
+		//----------------------------------------------------------------
+		// Control register
+		//----------------------------------------------------------------
+	reg	[2:0]	reg_ctrl;			// core input
+	reg	[9:0]	reg_block_bits;	// input block length in bits
+	reg			reg_mode;			// long/short mode
+	
+
+		//----------------------------------------------------------------
+		// Init, Update and Final 1-Cycle Pulses
+		//----------------------------------------------------------------
+	reg	[2:0]	reg_ctrl_dly;
+	always @(posedge clk) reg_ctrl_dly <= reg_ctrl;
+
+	wire core_init_pulse		= (reg_ctrl[CTRL_INIT_BIT]   == 1'b1) && (reg_ctrl_dly[CTRL_INIT_BIT]   == 1'b0);
+	wire core_update_pulse	= (reg_ctrl[CTRL_UPDATE_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_UPDATE_BIT] == 1'b0);
+	wire core_final_pulse	= (reg_ctrl[CTRL_FINAL_BIT]  == 1'b1) && (reg_ctrl_dly[CTRL_FINAL_BIT]  == 1'b0);
+
+
+		//----------------------------------------------------------------
+		// Status register
+		//----------------------------------------------------------------
+	wire  core_ready;		// core output
+	wire  digest_valid;	// core output
+
+	wire [1:0] reg_status = {digest_valid, core_ready};
+
+
+		//----------------------------------------------------------------
+		// Block and Digest
+		//----------------------------------------------------------------
+	reg  [511 : 0] core_block;		// core input
+	wire [511 : 0] core_digest;	// core output
+
+
+	//----------------------------------------------------------------
+	// core instantiation.
+	//----------------------------------------------------------------
+	streebog_hash_top streebog
+	(
+		.clock			(clk),
+		
+		.block			(core_block),
+		.block_length	(reg_block_bits),
+		
+		.init				(core_init_pulse),
+		.update			(core_update_pulse),
+		.final			(core_final_pulse),
+		
+		.short_mode		(reg_mode),
+		
+		.digest			(core_digest),
+		.digest_valid	(digest_valid),
+		
+		.ready			(core_ready)
+	);
+
+		//----------------------------------------------------------------
+		// Read Latch
+		//----------------------------------------------------------------
+	reg [31: 0] tmp_read_data;
+
+	assign read_data = tmp_read_data;
+
+
+	//----------------------------------------------------------------
+	// Read/Write Interface
+	//----------------------------------------------------------------
+	always @(posedge clk)
+		//
+		if (rst) begin
+			//
+			reg_ctrl			<= 3'b000;
+			reg_block_bits	<= 10'd0;
+			reg_mode			<= 1'b0;
+			core_block		<= {512{1'b0}};
+			tmp_read_data	<= 32'h00000000;
+			//
+		end else if (cs) begin
+			//
+			if (we) begin
+				//
+				// Write Handler
+				//
+				case (address)
+					ADDR_CTRL:			reg_ctrl					<= write_data[2:0];
+					ADDR_BLOCK_BITS:	reg_block_bits			<= write_data[9:0];
+					ADDR_MODE:			reg_mode					<= write_data[0];
+					ADDR_BLOCK0:		core_block[511:480]	<= write_data;
+					ADDR_BLOCK1:		core_block[479:448]	<= write_data;
+					ADDR_BLOCK2:		core_block[447:416]	<= write_data;
+					ADDR_BLOCK3:		core_block[415:384]	<= write_data;
+					ADDR_BLOCK4:		core_block[383:352]	<= write_data;
+					ADDR_BLOCK5:		core_block[351:320]	<= write_data;
+					ADDR_BLOCK6:		core_block[319:288]	<= write_data;
+					ADDR_BLOCK7:		core_block[287:256]	<= write_data;
+					ADDR_BLOCK8:		core_block[255:224]	<= write_data;
+					ADDR_BLOCK9:		core_block[223:192]	<= write_data;
+					ADDR_BLOCK10:		core_block[191:160]	<= write_data;
+					ADDR_BLOCK11:		core_block[159:128]	<= write_data;
+					ADDR_BLOCK12:		core_block[127: 96]	<= write_data;
+					ADDR_BLOCK13:		core_block[ 95: 64]	<= write_data;
+					ADDR_BLOCK14:		core_block[ 63: 32]	<= write_data;
+					ADDR_BLOCK15:		core_block[ 31:  0]	<= write_data;
+				endcase
+				//	
+			end else begin
+				//
+				// Read Handler
+				//
+				case (address)
+					//
+					ADDR_NAME0:			tmp_read_data <= CORE_NAME0;
+					ADDR_NAME1:			tmp_read_data <= CORE_NAME1;
+					ADDR_VERSION:		tmp_read_data <= CORE_VERSION;
+					ADDR_CTRL:			tmp_read_data <= {{28{1'b0}}, reg_ctrl};
+					ADDR_STATUS:		tmp_read_data <= {{30{1'b0}}, reg_status};
+					ADDR_BLOCK_BITS:	tmp_read_data <= {{22{1'b0}}, reg_block_bits};
+					ADDR_MODE:			tmp_read_data <= {{31{1'b0}}, reg_mode};
+					//
+					ADDR_BLOCK0:		tmp_read_data <= core_block[511:480];
+					ADDR_BLOCK1:		tmp_read_data <= core_block[479:448];
+					ADDR_BLOCK2:		tmp_read_data <= core_block[447:416];
+					ADDR_BLOCK3:		tmp_read_data <= core_block[415:384];
+					ADDR_BLOCK4:		tmp_read_data <= core_block[383:352];
+					ADDR_BLOCK5:		tmp_read_data <= core_block[351:320];
+					ADDR_BLOCK6:		tmp_read_data <= core_block[319:288];
+					ADDR_BLOCK7:		tmp_read_data <= core_block[287:256];
+					ADDR_BLOCK8:		tmp_read_data <= core_block[255:224];
+					ADDR_BLOCK9:		tmp_read_data <= core_block[223:192];
+					ADDR_BLOCK10:		tmp_read_data <= core_block[191:160];
+					ADDR_BLOCK11:		tmp_read_data <= core_block[159:128];
+					ADDR_BLOCK12:		tmp_read_data <= core_block[127: 96];
+					ADDR_BLOCK13:		tmp_read_data <= core_block[ 95: 64];
+					ADDR_BLOCK14:		tmp_read_data <= core_block[ 63: 32];
+					ADDR_BLOCK15:		tmp_read_data <= core_block[ 31:  0];
+					//
+					ADDR_DIGEST0:		tmp_read_data <= core_digest[511:480];
+					ADDR_DIGEST1:		tmp_read_data <= core_digest[479:448];
+					ADDR_DIGEST2:		tmp_read_data <= core_digest[447:416];
+					ADDR_DIGEST3:		tmp_read_data <= core_digest[415:384];
+					ADDR_DIGEST4:		tmp_read_data <= core_digest[383:352];
+					ADDR_DIGEST5:		tmp_read_data <= core_digest[351:320];
+					ADDR_DIGEST6:		tmp_read_data <= core_digest[319:288];
+					ADDR_DIGEST7:		tmp_read_data <= core_digest[287:256];
+					ADDR_DIGEST8:		tmp_read_data <= core_digest[255:224];
+					ADDR_DIGEST9:		tmp_read_data <= core_digest[223:192];
+					ADDR_DIGEST10:		tmp_read_data <= core_digest[191:160];
+					ADDR_DIGEST11:		tmp_read_data <= core_digest[159:128];
+					ADDR_DIGEST12:		tmp_read_data <= core_digest[127: 96];
+					ADDR_DIGEST13:		tmp_read_data <= core_digest[ 95: 64];
+					ADDR_DIGEST14:		tmp_read_data <= core_digest[ 63: 32];
+					ADDR_DIGEST15:		tmp_read_data <= core_digest[ 31:  0];
+					//
+					default:				tmp_read_data <= 32'h00000000;
+					//
+				endcase
+				//
+			end
+			//
+		end
+
+
+endmodule // streebog_wrapper
+
+
+//======================================================================
+// EOF streebog_wrapper.v
+//======================================================================



More information about the Commits mailing list