[Cryptech-Commits] [core/math/modexpa7] 01/02: Converted pe_c_out_mem two-dimensional array into a FIFO.

git at cryptech.is git at cryptech.is
Thu Jul 20 00:41:17 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic
in repository core/math/modexpa7.

commit ce4b5740615d9097986f5149e53e4e053674b674
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Jul 20 03:36:32 2017 +0300

    Converted pe_c_out_mem two-dimensional array into a FIFO.
---
 src/rtl/modexpa7_simple_fifo.v         | 209 +++++++++++++++++++++++++++++++++
 src/rtl/modexpa7_systolic_multiplier.v |  83 +++++++++++--
 2 files changed, 280 insertions(+), 12 deletions(-)

diff --git a/src/rtl/modexpa7_simple_fifo.v b/src/rtl/modexpa7_simple_fifo.v
new file mode 100644
index 0000000..84c21a9
--- /dev/null
+++ b/src/rtl/modexpa7_simple_fifo.v
@@ -0,0 +1,209 @@
+`timescale 1ns / 1ps
+
+module modexpa7_simple_fifo #
+	(
+		parameter	BUS_WIDTH	= 128,
+		parameter	DEPTH_BITS	= 2
+	)
+	(
+		input								clk,
+		input								rst,
+		input								wr_en,
+		input								rd_en,
+		input		[BUS_WIDTH-1:0]	d_in,
+		output	[BUS_WIDTH-1:0]	d_out
+	);
+	
+		//
+		// Locals
+		//
+	localparam						NUM_WORDS = 2 ** DEPTH_BITS;
+	
+	localparam	[DEPTH_BITS:0]	PTR_ZERO = {DEPTH_BITS{1'b0}};
+	localparam	[DEPTH_BITS:0]	PTR_LAST = {DEPTH_BITS{1'b1}};
+	
+		//
+		// Memory
+		//
+	reg	[BUS_WIDTH-1:0]	fifo[0:NUM_WORDS-1];
+	
+		//
+		// Pointers
+		//
+	reg	[DEPTH_BITS-1:0]	ptr_wr;
+	reg	[DEPTH_BITS-1:0]	ptr_rd;
+	
+		//
+		// Output
+		//
+	reg	[BUS_WIDTH-1:0]	d_out_reg;
+	assign d_out = d_out_reg;
+	
+		//
+		// Write Pointer
+		//
+	always @(posedge clk)
+		//
+		if (rst)				ptr_wr <= PTR_ZERO;
+		else if (wr_en)	ptr_wr <= ptr_wr + 1'b1;
+
+		//
+		// Read Pointer
+		//
+	always @(posedge clk)
+		//
+		if (rst)				ptr_rd <= PTR_ZERO;
+		else if (rd_en)	ptr_rd <= ptr_rd + 1'b1;
+
+		//
+		// Read Logic
+		//
+	always @(posedge clk)
+		//
+		if (rst)				d_out_reg <= {BUS_WIDTH{1'b0}};
+		else if (rd_en)	d_out_reg <= fifo[ptr_rd];
+		
+		//
+		// Write Logic
+		//
+	always @(posedge clk)
+		//
+		if (!rst && wr_en)	fifo[ptr_wr] <= d_in;
+	
+
+/*
+generic_dpram  #(aw,dw) u0(
+	.rclk(		clk		),
+	.rrst(		!rst		),
+	.rce(		1'b1		),
+	.oe(		1'b1		),
+	.raddr(		rp		),
+	.do(		dout		),
+	.wclk(		clk		),
+	.wrst(		!rst		),
+	.wce(		1'b1		),
+	.we(		we		),
+	.waddr(		wp		),
+	.di(		din		)
+	);
+
+////////////////////////////////////////////////////////////////////
+//
+// Misc Logic
+//
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)	wp <= #1 {aw{1'b0}};
+	else
+	if(clr)		wp <= #1 {aw{1'b0}};
+	else
+	if(we)		wp <= #1 wp_pl1;
+
+assign wp_pl1 = wp + { {aw-1{1'b0}}, 1'b1};
+assign wp_pl2 = wp + { {aw-2{1'b0}}, 2'b10};
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)	rp <= #1 {aw{1'b0}};
+	else
+	if(clr)		rp <= #1 {aw{1'b0}};
+	else
+	if(re)		rp <= #1 rp_pl1;
+
+assign rp_pl1 = rp + { {aw-1{1'b0}}, 1'b1};
+
+////////////////////////////////////////////////////////////////////
+//
+// Combinatorial Full & Empty Flags
+//
+
+assign empty = ((wp == rp) & !gb);
+assign full  = ((wp == rp) &  gb);
+
+// Guard Bit ...
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)			gb <= #1 1'b0;
+	else
+	if(clr)				gb <= #1 1'b0;
+	else
+	if((wp_pl1 == rp) & we)		gb <= #1 1'b1;
+	else
+	if(re)				gb <= #1 1'b0;
+
+////////////////////////////////////////////////////////////////////
+//
+// Registered Full & Empty Flags
+//
+
+// Guard Bit ...
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)			gb2 <= #1 1'b0;
+	else
+	if(clr)				gb2 <= #1 1'b0;
+	else
+	if((wp_pl2 == rp) & we)		gb2 <= #1 1'b1;
+	else
+	if((wp != rp) & re)		gb2 <= #1 1'b0;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)				full_r <= #1 1'b0;
+	else
+	if(clr)					full_r <= #1 1'b0;
+	else
+	if(we & ((wp_pl1 == rp) & gb2) & !re)	full_r <= #1 1'b1;
+	else
+	if(re & ((wp_pl1 != rp) | !gb2) & !we)	full_r <= #1 1'b0;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)				empty_r <= #1 1'b1;
+	else
+	if(clr)					empty_r <= #1 1'b1;
+	else
+	if(we & ((wp != rp_pl1) | gb2) & !re)	empty_r <= #1 1'b0;
+	else
+	if(re & ((wp == rp_pl1) & !gb2) & !we)	empty_r <= #1 1'b1;
+
+////////////////////////////////////////////////////////////////////
+//
+// Combinatorial Full_n & Empty_n Flags
+//
+
+assign empty_n = cnt < n;
+assign full_n  = !(cnt < (max_size-n+1));
+assign level = {2{cnt[aw]}} | cnt[aw-1:aw-2];
+
+// N entries status
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)	cnt <= #1 {aw+1{1'b0}};
+	else
+	if(clr)		cnt <= #1 {aw+1{1'b0}};
+	else
+	if( re & !we)	cnt <= #1 cnt + { {aw{1'b1}}, 1'b1};
+	else
+	if(!re &  we)	cnt <= #1 cnt + { {aw{1'b0}}, 1'b1};
+
+////////////////////////////////////////////////////////////////////
+//
+// Registered Full_n & Empty_n Flags
+//
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)				empty_n_r <= #1 1'b1;
+	else
+	if(clr)					empty_n_r <= #1 1'b1;
+	else
+	if(we & (cnt >= (n-1) ) & !re)		empty_n_r <= #1 1'b0;
+	else
+	if(re & (cnt <= n ) & !we)		empty_n_r <= #1 1'b1;
+
+always @(posedge clk `SC_FIFO_ASYNC_RESET)
+	if(!rst)				full_n_r <= #1 1'b0;
+	else
+	if(clr)					full_n_r <= #1 1'b0;
+	else
+	if(we & (cnt >= (max_size-n) ) & !re)	full_n_r <= #1 1'b1;
+	else
+	if(re & (cnt <= (max_size-n+1)) & !we)	full_n_r <= #1 1'b0;
+*/
+
+
+endmodule
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 8cd28ff..382019c 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -600,7 +600,7 @@ module modexpa7_systolic_multiplier #
 	reg	[31: 0]	pe_a    [0:SYSTOLIC_ARRAY_LENGTH-1];
 	reg	[31: 0]	pe_b    [0:SYSTOLIC_ARRAY_LENGTH-1];
 	reg	[31: 0]	pe_t    [0:SYSTOLIC_ARRAY_LENGTH-1];
-	reg	[31: 0]	pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
+	wire	[31: 0]	pe_c_in [0:SYSTOLIC_ARRAY_LENGTH-1];
 	wire	[31: 0]	pe_p    [0:SYSTOLIC_ARRAY_LENGTH-1];
 	wire	[31: 0]	pe_c_out[0:SYSTOLIC_ARRAY_LENGTH-1];
 	
@@ -608,9 +608,49 @@ module modexpa7_systolic_multiplier #
 		//
 		// These can be turned into a FIFO (maybe later?)...
 		//
-	reg	[31: 0]	pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
+	//reg	[31: 0]	pe_c_out_mem[0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
 	reg	[31: 0]	pe_t_mem    [0:SYSTOLIC_ARRAY_LENGTH-1][0:SYSTOLIC_NUM_CYCLES-1];
 
+	reg	fifo_c_rst;
+
+	wire	fifo_c_wren;
+	wire	fifo_c_rden;
+	
+	wire	debug_fifo_full;
+	wire	debug_fifo_empty;
+	
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_c_din;
+	wire	[32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0]	fifo_c_dout;
+	
+	/**/
+	modexpa7_simple_fifo #
+	(
+		.BUS_WIDTH	(32 * SYSTOLIC_ARRAY_LENGTH),
+		.DEPTH_BITS	(SYSTOLIC_CNTR_WIDTH)
+	)
+	fifo_c
+	(
+		.clk			(clk),
+		.rst			(fifo_c_rst),
+		.wr_en		(fifo_c_wren),
+		.d_in			(fifo_c_din),
+		.rd_en		(fifo_c_rden),
+		.d_out		(fifo_c_dout)
+	);
+	/**/
+	/*
+	ip_fifo_c fifo_c
+	(
+		.clk		(clk),
+		.srst		(fifo_c_rst),
+		.wr_en	(fifo_c_wren),
+		.din		(fifo_c_din),
+		.rd_en	(fifo_c_rden),
+		.dout		(fifo_c_dout),
+		.full		(debug_fifo_full),
+		.empty	(debug_fifo_empty)
+	);*/
+
 	generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
 		begin : modexpa7_systolic_pe_multiplier		
 			modexpa7_systolic_pe systolic_pe_inst
@@ -623,10 +663,13 @@ module modexpa7_systolic_multiplier #
 				.p			(pe_p[i]),
 				.c_out	(pe_c_out[i])
 			);
+			assign pe_c_in[i] = fifo_c_dout[32 * (i + 1) - 1 -: 32];
+			assign fifo_c_din[32 * (i + 1) - 1 -: 32] = pe_c_out[i];
 		end
 	endgenerate
 
 
+
 		
 			
 			//
@@ -695,7 +738,23 @@ module modexpa7_systolic_multiplier #
 		endcase
 		
 		
-		
+	always @(posedge clk)
+		//
+		case (fsm_state)
+			FSM_STATE_MULT_A_B_START,
+			FSM_STATE_MULT_AB_N_COEFF_START,
+			FSM_STATE_MULT_Q_N_START:			fifo_c_rst <= 1'b1;
+			
+			FSM_STATE_MULT_A_B_CRUNCH,
+			FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
+			FSM_STATE_MULT_Q_N_CRUNCH:		if (shreg_done_load)	fifo_c_rst <= 1'b0;
+		endcase
+	
+	
+	assign fifo_c_wren = shreg_now_unloading;
+	assign fifo_c_rden = shreg_now_loading;
+	
+	
 		
 		
 	always @(posedge clk) begin
@@ -835,9 +894,9 @@ module modexpa7_systolic_multiplier #
 			FSM_STATE_MULT_AB_N_COEFF_CRUNCH,
 			FSM_STATE_MULT_Q_N_CRUNCH: begin
 		
-				if (shreg_now_unloading)
-					for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
-						pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
+				//if (shreg_now_unloading)
+					//for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
+						//pe_c_out_mem[j][syst_cnt_unload] <= pe_c_out[j];
 						
 				if (shreg_now_unloading) begin
 				
@@ -867,12 +926,12 @@ module modexpa7_systolic_multiplier #
 					pe_a[j]		<= (ab_addr_ext > {1'b0, a_addr}) ? 32'd0 : a_bram_out;
 					pe_b[j]		<= loader_dout[j];
 					pe_t[j]		<= (a_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
-					pe_c_in[j]	<= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+					//pe_c_in[j]	<= (a_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
 				end else begin
 					pe_a[j]		<= 32'hXXXXXXXX;				
 					pe_b[j]		<= 32'hXXXXXXXX;
 					pe_t[j]		<= 32'hXXXXXXXX;
-					pe_c_in[j]	<= 32'hXXXXXXXX;
+					//pe_c_in[j]	<= 32'hXXXXXXXX;
 				end
 		//
 		if (fsm_state == FSM_STATE_MULT_AB_N_COEFF_CRUNCH)
@@ -883,12 +942,12 @@ module modexpa7_systolic_multiplier #
 					pe_a[j]		<= ab_data_out;
 					pe_b[j]		<= loader_dout[j];
 					pe_t[j]		<= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
-					pe_c_in[j]	<= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+					//pe_c_in[j]	<= (ab_addr_ext == bram_addr_ext_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
 				end else begin
 					pe_a[j]		<= 32'hXXXXXXXX;				
 					pe_b[j]		<= 32'hXXXXXXXX;
 					pe_t[j]		<= 32'hXXXXXXXX;
-					pe_c_in[j]	<= 32'hXXXXXXXX;
+					//pe_c_in[j]	<= 32'hXXXXXXXX;
 				end
 		//
 		if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
@@ -899,12 +958,12 @@ module modexpa7_systolic_multiplier #
 					pe_a[j]		<= (qn_addr_ext > {1'b0, q_addr}) ? 32'd0 : q_data_out;
 					pe_b[j]		<= loader_dout[j];
 					pe_t[j]		<= (q_addr == bram_addr_zero) ? 32'd0 : pe_t_mem[j][syst_cnt_load_dly];
-					pe_c_in[j]	<= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
+					//pe_c_in[j]	<= (q_addr == bram_addr_zero) ? 32'd0 : pe_c_out_mem[j][syst_cnt_load_dly];
 				end else begin
 					pe_a[j]		<= 32'hXXXXXXXX;				
 					pe_b[j]		<= 32'hXXXXXXXX;
 					pe_t[j]		<= 32'hXXXXXXXX;
-					pe_c_in[j]	<= 32'hXXXXXXXX;
+					//pe_c_in[j]	<= 32'hXXXXXXXX;
 				end
 		//
 	



More information about the Commits mailing list