[Cryptech-Commits] [core/math/modexpa7] branch systolic updated: Work in progress.

git at cryptech.is git at cryptech.is
Tue Jul 25 20:55:09 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.

The following commit(s) were added to refs/heads/systolic by this push:
     new 76e2279  Work in progress.
76e2279 is described below

commit 76e22798dfa410c655dcec555f73bb3025f521e4
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Tue Jul 25 23:54:11 2017 +0300

    Work in progress.
---
 src/rtl/modexpa7_systolic_multiplier.v       | 152 ++++++---
 src/rtl/modexpa7_systolic_multiplier_array.v | 452 +++++++++++++++++++++++++++
 2 files changed, 565 insertions(+), 39 deletions(-)

diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index a1e141e..9d96f98 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -143,6 +143,7 @@ module modexpa7_systolic_multiplier #
 		 * Parameters Latch
 		 */
 	reg	[OPERAND_ADDR_WIDTH-1:0]	n_num_words_latch;
+	reg	[OPERAND_ADDR_WIDTH  :0]	p_num_words_latch;
 
 		// save number of words in n when new operation starts
 	always @(posedge clk)
@@ -200,20 +201,25 @@ module modexpa7_systolic_multiplier #
 	
 	
 		// loader input
-	reg	[SYSTOLIC_CNTR_WIDTH-1:0]	loader_addr[0:SYSTOLIC_ARRAY_LENGTH-1];
-	reg										loader_wren[0:SYSTOLIC_ARRAY_LENGTH-1];
+	reg	[SYSTOLIC_CNTR_WIDTH-1:0]	loader_addr_wr;
+	wire	[SYSTOLIC_CNTR_WIDTH-1:0]	loader_addr_rd;
+	reg										loader_wren;
 	reg	[                 32-1:0]	loader_din [0:SYSTOLIC_ARRAY_LENGTH-1];
 	
 		// loader output
-	wire	[                 32-1:0]	loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[                 32-1:0]	loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1];
+	
+		// array_input
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	pe_a_wide;
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	pe_b_wide;
 			
 		// generate parallelized loader		
 	genvar i;
 	generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
 		//
-		begin : gen_bram_1rw_readfirst_loader
+		begin : gen_bram_1rw_1ro_readfirst_loader
 			//
-			bram_1rw_readfirst #
+			bram_1rw_1ro_readfirst #
 			(
 				.MEM_WIDTH		(32),
 				.MEM_ADDR_BITS	(SYSTOLIC_CNTR_WIDTH)
@@ -221,11 +227,15 @@ module modexpa7_systolic_multiplier #
 			bram_loader
 			(
 				.clk		(clk),
-				.a_addr	(loader_addr[i]),
-				.a_wr		(loader_wren[i]),
+				.a_addr	(loader_addr_wr),
+				.a_wr		(loader_wren),
 				.a_in		(loader_din[i]),
-				.a_out	(loader_dout[i])
-			);
+				.a_out	(),
+				.b_addr	(loader_addr_rd),
+				.b_out	(loader_dout[i])
+			);
+			//
+			assign pe_b_wide[32 * (i + 1) - 1 -: 32] = loader_dout[i];
 			//
 		end
 		//
@@ -250,22 +260,40 @@ module modexpa7_systolic_multiplier #
 
 		// address registers
 	reg	[OPERAND_ADDR_WIDTH-1:0]	b_addr;
+	wire	[OPERAND_ADDR_WIDTH  :0]	p_addr_ext_wr;
+	reg	[OPERAND_ADDR_WIDTH  :0]	p_addr_ext_rd;
 		
 		// handy increment values
-	wire	[OPERAND_ADDR_WIDTH-1:0]	b_addr_next		= b_addr       + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH-1:0]	b_addr_next				= b_addr       + 1'b1;
+	wire	[OPERAND_ADDR_WIDTH  :0]	p_addr_ext_rd_next	= b_addr       + 1'b1;
+	
+		// write enables
+	wire	p_wren;
+	
+		// data buses
+	wire	[31: 0]	p_data_in;
+	wire	[31: 0]	p_data_out;
 	
 		// handy stop flags
-	wire	b_addr_done			= (b_addr      == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	b_addr_done        = (b_addr        == bram_addr_last)     ? 1'b1 : 1'b0;
+	wire	p_addr_ext_rd_done = (p_addr_ext_rd == bram_addr_ext_last) ? 1'b1 : 1'b0;
 
 		// delayed addresses
 	reg	[OPERAND_ADDR_WIDTH-1:0]	b_addr_dly;
-		
+	
 	always @(posedge clk) b_addr_dly <= b_addr;
 
 				
 		// map registers to top-level ports
 	assign b_bram_addr = b_addr;
 
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
+	bram_p
+	(	.clk(clk),
+		.a_addr(p_addr_ext_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+		.b_addr(p_addr_ext_rd), .b_out(p_data_out));
+
 				
 		/*
 		 * Loader Data Input 
@@ -297,17 +325,8 @@ module modexpa7_systolic_multiplier #
 	always @(posedge clk)
 		//
 		case (fsm_next_state)
-		
-			FSM_STATE_LOAD_WRITE:
-				//
-				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
-					loader_wren[j] <= 1'b1;
-					
-			default:
-				//
-				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
-					loader_wren[j] <= 1'b0;
-					
+			FSM_STATE_LOAD_WRITE:	loader_wren <= 1'b1;
+			default:						loader_wren <= 1'b0;
 		endcase
 
 
@@ -317,17 +336,15 @@ module modexpa7_systolic_multiplier #
 
 	always @(posedge clk)
 		//
-		case (fsm_state)
+		case (fsm_state)
 		
-			FSM_STATE_LOAD_START:
-				//
-				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
-					loader_addr[j] <= load_syst_cnt_zero;
-					
-			FSM_STATE_LOAD_WRITE:
-				//
-				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
-					loader_addr[j] <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt;
+			FSM_STATE_LOAD_START:
+				//
+				loader_addr_wr <= load_syst_cnt_zero;
+				
+			FSM_STATE_LOAD_WRITE:
+				//
+				loader_addr_wr <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt;
 					
 		endcase
 	
@@ -344,12 +361,68 @@ module modexpa7_systolic_multiplier #
 		//
 	end
 
+
+		/*
+		 * Multiplier Array
+		 */
+	reg	pe_array_ena;
+	wire	pe_array_rdy;
+
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+			FSM_STATE_MULT_START:	pe_array_ena <= 1'b1;
+			default:						pe_array_ena <= 1'b0;
+		endcase
+		
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+			FSM_STATE_MULT_START:	p_num_words_latch <= {n_num_words_latch, 1'b1};
+		endcase
+			
+	
+	modexpa7_systolic_multiplier_array #
+	(
+		.OPERAND_ADDR_WIDTH		(OPERAND_ADDR_WIDTH),
+		.SYSTOLIC_ARRAY_POWER	(SYSTOLIC_ARRAY_POWER)
+	)
+	systolic_pe_array
+	(
+		.clk					(clk),
+		.rst_n				(rst_n),
+
+		.ena					(pe_array_ena),
+		.rdy					(pe_array_rdy),
+
+		.loader_addr_rd	(loader_addr_rd),
+		
+		.pe_a_wide			(),
+		.pe_b_wide			(pe_b_wide),
+		
+		.p_bram_addr		(p_addr_ext_wr),
+		.p_bram_in			(p_data_in),
+		.p_bram_wr			(p_wren),
+
+
+		.n_num_words		(n_num_words_latch),
+		.p_num_words		(p_num_words_latch)
+	);
+	
+
+
+
+
+
+
+
+
 
 		
 			
 		/*
 		 * FSM Process
-		 */
+	-	 */
 	always @(posedge clk or negedge rst_n)
 		//
 		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_IDLE;
@@ -373,13 +446,14 @@ module modexpa7_systolic_multiplier #
 												else								fsm_next_state = FSM_STATE_LOAD_SHIFT;
 			FSM_STATE_LOAD_WRITE:		if (load_syst_cnt_done)		fsm_next_state = FSM_STATE_LOAD_FINAL;
 												else								fsm_next_state = FSM_STATE_LOAD_SHIFT;
-			FSM_STATE_LOAD_FINAL:											fsm_next_state = FSM_STATE_STOP;
+			FSM_STATE_LOAD_FINAL:											fsm_next_state = FSM_STATE_MULT_START;
 			//
-			//FSM_STATE_MULT_START:
-			//FSM_STATE_MULT_CRUNCH:
-			//FSM_STATE_MULT_FINAL:
+			FSM_STATE_MULT_START:											fsm_next_state = FSM_STATE_MULT_CRUNCH;
+			FSM_STATE_MULT_CRUNCH:		if (pe_array_rdy)				fsm_next_state = FSM_STATE_MULT_FINAL;
+												else								fsm_next_state = FSM_STATE_MULT_CRUNCH;
+			FSM_STATE_MULT_FINAL:											fsm_next_state = FSM_STATE_STOP;
 			//
-			FSM_STATE_STOP:												fsm_next_state = FSM_STATE_IDLE;
+			FSM_STATE_STOP:													fsm_next_state = FSM_STATE_IDLE;
 			//
 		endcase
 		//
diff --git a/src/rtl/modexpa7_systolic_multiplier_array.v b/src/rtl/modexpa7_systolic_multiplier_array.v
new file mode 100644
index 0000000..029d9d6
--- /dev/null
+++ b/src/rtl/modexpa7_systolic_multiplier_array.v
@@ -0,0 +1,452 @@
+//======================================================================
+//
+// modexpa7_systolic_multiplier_array.v
+// -----------------------------------------------------------------------------
+// Systolic Montgomery multiplier Processing Element Array
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2017, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+//   be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+module modexpa7_systolic_multiplier_array #
+	(
+		parameter	OPERAND_ADDR_WIDTH		= 4,
+		parameter	SYSTOLIC_ARRAY_POWER		= 2
+	)
+	(
+		input																	clk,
+		input																	rst_n,
+
+		input																	ena,
+		output																rdy,
+
+		output	[OPERAND_ADDR_WIDTH - SYSTOLIC_ARRAY_POWER - 1 : 0]	loader_addr_rd,
+
+		input		[32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0]	pe_a_wide,
+		input		[32 * (2 ** SYSTOLIC_ARRAY_POWER) - 1 : 0]	pe_b_wide,
+		
+		output	[              OPERAND_ADDR_WIDTH     : 0]	p_bram_addr,
+		output	[                              32 - 1 : 0]	p_bram_in,
+		output																p_bram_wr,
+
+		input		[              OPERAND_ADDR_WIDTH - 1 : 0]	n_num_words,
+		input		[              OPERAND_ADDR_WIDTH     : 0]	p_num_words
+	);
+	
+		
+		/*
+		 * Include Settings
+		 */
+	`include "pe/modexpa7_primitive_switch.v"
+	`include "modexpa7_settings.v"
+		
+
+		/*
+		 * FSM Declaration
+		 */
+	localparam	[ 7: 0]	FSM_STATE_IDLE				= 8'h00;
+	
+	localparam	[ 7: 0]	FSM_STATE_MULT_START		= 8'h11;
+	localparam	[ 7: 0]	FSM_STATE_MULT_CRUNCH	= 8'h12;
+	localparam	[ 7: 0]	FSM_STATE_MULT_RELOAD	= 8'h13;
+	localparam	[ 7: 0]	FSM_STATE_MULT_FINAL		= 8'h14;
+	
+	localparam	[ 7: 0]	FSM_STATE_STOP				= 8'hFF;
+	
+	
+		/*
+		 * FSM State / Next State
+		 */
+	reg	[ 7: 0]	fsm_state = FSM_STATE_IDLE;
+	reg	[ 7: 0]	fsm_next_state;
+
+
+		/*
+		 * Enable Delay and Trigger
+		 */
+   reg ena_dly = 1'b0;
+	
+		// delay enable by one clock cycle
+   always @(posedge clk) ena_dly <= ena;
+
+		// trigger new operation when enable goes high
+   wire ena_trig = ena && !ena_dly;
+	
+	
+		/*
+		 * Ready Flag Logic
+		 */
+	reg rdy_reg = 1'b1;
+	assign rdy = rdy_reg;
+
+   always @(posedge clk or negedge rst_n)
+		
+			// reset flag
+		if (rst_n == 1'b0) rdy_reg <= 1'b1;
+		else begin
+		
+				// clear flag when operation is started
+			if (fsm_state == FSM_STATE_IDLE)	rdy_reg <= ~ena_trig;
+			
+				// set flag after operation is finished
+			if (fsm_state == FSM_STATE_STOP)	rdy_reg <= 1'b1;			
+			
+		end
+		
+		
+		/*
+		 * Parameters Latch
+		 */
+	reg	[OPERAND_ADDR_WIDTH-1:0]	n_num_words_latch;
+	reg	[OPERAND_ADDR_WIDTH  :0]	p_num_words_latch;
+
+		// save number of words in n when new operation starts
+	always @(posedge clk)
+		//
+		if ((fsm_state == FSM_STATE_IDLE) && ena_trig) begin
+			n_num_words_latch <= n_num_words;
+			p_num_words_latch <= p_num_words;
+		end
+			
+			
+		/*
+		 * Systolic Array of Processing Elements
+		 */
+	reg	[31: 0]	pe_a        [0:SYSTOLIC_ARRAY_LENGTH-1];
+	reg	[31: 0]	pe_b        [0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[31: 0]	pe_t        [0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[31: 0]	pe_c_in     [0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[31: 0]	pe_p        [0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[31: 0]	pe_c_out    [0:SYSTOLIC_ARRAY_LENGTH-1];
+	
+
+		/*
+		 * FIFOs
+		 */
+	reg	fifo_c_rst;
+	reg	fifo_t_rst;
+
+	wire	fifo_c_wren;
+	wire	fifo_c_rden;
+	
+	wire	fifo_t_wren;
+	wire	fifo_t_rden;
+		
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_c_din;
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_c_dout;
+	
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_t_din;
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_t_dout;
+	
+	modexpa7_simple_fifo #
+	(
+		.BUS_WIDTH	(32 * SYSTOLIC_ARRAY_LENGTH),
+		.DEPTH_BITS	(SYSTOLIC_CNTR_WIDTH)
+	)
+	fifo_c
+	(
+		.clk			(clk),
+		.rst			(fifo_c_rst),
+		.wr_en		(fifo_c_wren),
+		.d_in			(fifo_c_din),
+		.rd_en		(fifo_c_rden),
+		.d_out		(fifo_c_dout)
+	);
+	
+	modexpa7_simple_fifo #
+	(
+		.BUS_WIDTH	(32 * SYSTOLIC_ARRAY_LENGTH),
+		.DEPTH_BITS	(SYSTOLIC_CNTR_WIDTH)
+	)
+	fifo_t
+	(
+		.clk			(clk),
+		.rst			(fifo_t_rst),
+		.wr_en		(fifo_t_wren),
+		.d_in			(fifo_t_din),
+		.rd_en		(fifo_t_rden),
+		.d_out		(fifo_t_dout)
+	);
+
+	genvar i;
+	generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
+		//
+		begin : gen_modexpa7_systolic_pe
+			//
+			modexpa7_systolic_pe systolic_pe_inst
+			(
+				.clk		(clk),
+				.a			(pe_a[i]),
+				.b			(pe_b[i]),
+				.t			(pe_t[i]),
+				.c_in		(pe_c_in[i]),
+				.p			(pe_p[i]),
+				.c_out	(pe_c_out[i])
+			);
+			//
+			assign pe_c_in[i] = fifo_c_dout[32 * (i + 1) - 1 -: 32];
+			assign pe_t[i]    = fifo_t_dout[32 * (i + 1) - 1 -: 32];
+			//
+			//assign fifo_c_din[32 * (i + 1) - 1 -: 32] = pe_c_out_dly[i];
+			//
+			//always @(posedge clk) pe_c_out_dly[i] <= pe_c_out[i];
+			//
+		end
+		//
+	endgenerate
+
+		
+		/*
+		 * Block Memory Interface
+		 */
+
+		// the very first address
+	wire	[OPERAND_ADDR_WIDTH:0]	bram_addr_zero = {OPERAND_ADDR_WIDTH+1{1'b0}};
+	
+		// the very last address
+	wire	[OPERAND_ADDR_WIDTH:0]	bram_addr_last = p_num_words_latch;
+		
+		// registers
+	reg	[OPERAND_ADDR_WIDTH:0]	p_addr;
+	reg	[                31:0]	p_data_in;
+	reg									p_wren;
+
+		// handy values 
+	wire	[OPERAND_ADDR_WIDTH:0]	p_addr_next = p_addr + 1'b1;
+	
+		// handy flags
+	wire	p_addr_done =  (p_addr == bram_addr_last) ? 1'b1 : 1'b0;
+
+	
+		// map top-level ports to internal registers
+	assign p_bram_addr	= p_addr;
+	assign p_bram_in		= p_data_in;
+	assign p_bram_wr		= p_wren;
+
+
+		/*
+		 * Systolic Cycle Counters
+		 */
+		
+		// handy values 
+	wire	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_zero = {SYSTOLIC_CNTR_WIDTH{1'b0}};
+	wire	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_last = n_num_words_latch[OPERAND_ADDR_WIDTH-1:SYSTOLIC_ARRAY_POWER];
+	
+		// counters
+	reg	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_load;
+	reg	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_unload;
+		
+		// handy increment values
+	wire	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_load_next		= syst_cnt_load   + 1'b1;
+	wire	[SYSTOLIC_CNTR_WIDTH-1:0]	syst_cnt_unload_next		= syst_cnt_unload + 1'b1;
+
+		// handy stop flags
+	wire										syst_cnt_load_done		= (syst_cnt_load   == syst_cnt_last) ? 1'b1 : 1'b0;
+	wire										syst_cnt_unload_done		= (syst_cnt_unload == syst_cnt_last) ? 1'b1 : 1'b0;
+
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+			FSM_STATE_MULT_START,
+			FSM_STATE_MULT_RELOAD:
+				//
+				syst_cnt_load <= syst_cnt_zero;
+			
+			FSM_STATE_MULT_CRUNCH,
+				//
+				syst_cnt_load <= !syst_cnt_load_done ? syst_cnt_load_next : syst_cnt_load;
+				
+		endcase
+		
+	always @(posedge clk)
+		//
+		if (fsm_state == FSM_STATE_MULT_CRUNCH) begin
+			//	
+			if (shreg_done_latency)
+				syst_cnt_unload <= syst_cnt_zero;
+			else if (shreg_now_unloading)
+				syst_cnt_unload <= !syst_cnt_unload_done ? syst_cnt_unload_next : syst_cnt_unload;
+			//
+		end
+
+
+
+		/*
+		 * Shift Registers
+		 */
+	reg	[SYSTOLIC_NUM_CYCLES-1:0]	shreg_load;
+	reg	[SYSTOLIC_PE_LATENCY  :0]	shreg_latency;
+	reg	[SYSTOLIC_NUM_CYCLES-1:0]	shreg_unload;
+
+	wire	shreg_done_load		= shreg_load[syst_cnt_last];
+	wire	shreg_done_latency	= shreg_latency[SYSTOLIC_PE_LATENCY];
+	wire	shreg_done_unload		= shreg_unload[syst_cnt_last];
+
+	reg	shreg_now_loading;
+	reg	shreg_now_latency;
+	reg	shreg_now_unloading;
+	
+	always @(posedge clk)
+		//
+		case (fsm_state)
+			//
+			//FSM_STATE_IDLE: begin
+				//shreg_load		<= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0};
+				//shreg_latency	<= {{SYSTOLIC_PE_LATENCY{1'b0}}, 1'b0};
+				//shreg_unload	<= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0};
+			//end
+			//
+			FSM_STATE_MULT_START,
+			FSM_STATE_MULT_RELOAD: begin
+				//
+				shreg_now_loading		<= 1'b1;
+				shreg_now_latency		<= 1'b1;
+				shreg_now_unloading	<= 1'b0;
+				//
+				shreg_load		<= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b1};
+				shreg_latency	<= {{SYSTOLIC_PE_LATENCY  {1'b0}}, 1'b1};
+				shreg_unload	<= {{SYSTOLIC_NUM_CYCLES-1{1'b0}}, 1'b0};
+				//
+			end
+			//
+			FSM_STATE_MULT_CRUNCH: begin
+				//
+				shreg_load		<= {shreg_load   [SYSTOLIC_NUM_CYCLES-2:0], 1'b0};
+				shreg_latency	<= {shreg_latency[SYSTOLIC_PE_LATENCY-1:0], 1'b0};
+				shreg_unload	<= {shreg_unload [SYSTOLIC_NUM_CYCLES-2:0], shreg_latency[SYSTOLIC_PE_LATENCY]};
+				//
+				if (shreg_done_load)				shreg_now_loading <= 1'b0;
+				if (shreg_done_latency)			shreg_now_latency <= 1'b0;
+				if (shreg_done_latency)			shreg_now_unloading <= 1'b1;
+				else if (shreg_done_unload)	shreg_now_unloading <= 1'b0;
+				
+			end
+			//
+			default: begin
+				shreg_now_loading		<= 1'b0;
+				shreg_now_latency		<= 1'b0;
+				shreg_now_unloading	<= 1'b0;
+			end
+			//
+		endcase
+
+
+
+		/*
+		 *
+		 */
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+			FSM_STATE_MULT_RELOAD:	p_wren <= 1'b1;
+			default:						p_wren <= 1'b0;
+		endcase
+
+		/*
+		 *
+		 */
+	always @(posedge clk)
+		//
+		case (fsm_state)
+			FSM_STATE_MULT_START:	p_addr <= bram_addr_zero;
+			FSM_STATE_MULT_RELOAD:	p_addr <= p_addr_next;
+		endcase
+
+		
+		/*
+		 * Loader Control
+		 */
+	reg	[SYSTOLIC_CNTR_WIDTH-1:0]	loader_addr;
+
+	assign loader_addr_rd = loader_addr;
+
+	integer j;
+	always @(posedge clk)
+		//
+		case (fsm_next_state)
+		
+			FSM_STATE_MULT_START,
+			FSM_STATE_MULT_RELOAD:
+				//
+				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+					loader_addr <= syst_cnt_zero;
+													
+			FSM_STATE_MULT_CRUNCH:
+				//
+				for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+					loader_addr <= !syst_cnt_load_done ? syst_cnt_load_next : syst_cnt_load;
+					
+		endcase
+
+
+
+		
+			
+		/*
+		 * FSM Process
+		 */
+	always @(posedge clk or negedge rst_n)
+		//
+		if (rst_n == 1'b0)	fsm_state <= FSM_STATE_IDLE;
+		else						fsm_state <= fsm_next_state;
+	
+	
+		/*
+		 * FSM Transition Logic
+		 */
+	always @* begin
+		//
+		fsm_next_state = FSM_STATE_STOP;
+		//
+		case (fsm_state)
+			//
+			FSM_STATE_IDLE:			if (ena_trig)				fsm_next_state = FSM_STATE_MULT_START;
+											else							fsm_next_state = FSM_STATE_IDLE;
+			//
+			FSM_STATE_MULT_START:									fsm_next_state = FSM_STATE_MULT_CRUNCH;
+			FSM_STATE_MULT_CRUNCH:	if (shreg_done_unload)	fsm_next_state = FSM_STATE_MULT_RELOAD;
+											else							fsm_next_state = FSM_STATE_MULT_CRUNCH;
+			FSM_STATE_MULT_RELOAD:	if (p_addr_done)			fsm_next_state = FSM_STATE_MULT_FINAL;
+											else							fsm_next_state = FSM_STATE_MULT_CRUNCH;
+			FSM_STATE_MULT_FINAL:									fsm_next_state = FSM_STATE_STOP;
+			//
+			FSM_STATE_STOP:											fsm_next_state = FSM_STATE_IDLE;
+			//
+		endcase
+		//
+	end
+
+
+endmodule
+
+//======================================================================
+// End of file
+//======================================================================

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Commits mailing list