[Cryptech-Commits] [core/math/modexpa7] 02/06: Exposed internal buffers for Montgomery factor F and modulus-dependent speed-up coefficient N_COEFF so that they can be retireved and stored along with the modulus. Split coreesponding buffers into "input" and "output" banks, during pre-computation F and N_COEFF are written to read-only "output" banks, during exponentiation F and N_COEFF are read from read-write "input" banks and must be supplied by user.

git at cryptech.is git at cryptech.is
Sun Sep 3 21:36:49 UTC 2017


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch systolic_crt
in repository core/math/modexpa7.

commit 3538f0350c462cef3326b29a180536be0a403390
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Mon Aug 28 15:02:30 2017 +0300

    Exposed internal buffers for Montgomery factor F and modulus-dependent speed-up coefficient N_COEFF
    so that they can be retireved and stored along with the modulus. Split coreesponding buffers into
    "input" and "output" banks, during pre-computation F and N_COEFF are written to read-only "output" banks,
    during exponentiation F and N_COEFF are read from read-write "input" banks and must be supplied by user.
---
 src/rtl/modexpa7_top.v     | 80 +++++++++++++++++++++++++++++++---------------
 src/rtl/modexpa7_wrapper.v | 30 ++++++++---------
 2 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/src/rtl/modexpa7_top.v b/src/rtl/modexpa7_top.v
index ad101dd..7723b88 100644
--- a/src/rtl/modexpa7_top.v
+++ b/src/rtl/modexpa7_top.v
@@ -54,7 +54,7 @@ module modexpa7_top #
 
 		input 										bus_cs,
 		input 										bus_we,
-		input		[OPERAND_ADDR_WIDTH+1:0]	bus_addr,
+		input		[OPERAND_ADDR_WIDTH+2:0]	bus_addr,
 		input		[                32-1:0]	bus_data_wr,
 		output 	[                32-1:0]	bus_data_rd
 	);
@@ -154,17 +154,21 @@ module modexpa7_top #
 		/*
 		 * Split bus address into bank/word parts.
 		 */
-	wire	[                 2 - 1 : 0]	bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+1:OPERAND_ADDR_WIDTH];
+	wire	[                 3 - 1 : 0]	bus_addr_bank = bus_addr[OPERAND_ADDR_WIDTH+2:OPERAND_ADDR_WIDTH];
 	wire	[OPERAND_ADDR_WIDTH - 1 : 0]	bus_addr_word = bus_addr[OPERAND_ADDR_WIDTH-1:0];
 	
 	
 		/*
 		 * Define bank offsets.
 		 */
-	localparam	[ 1: 0]	BANK_MODULUS	= 2'b00;	// 0
-	localparam	[ 1: 0]	BANK_MESSAGE	= 2'b01;	// 1
-	localparam	[ 1: 0]	BANK_EXPONENT	= 2'b10;	// 2
-	localparam	[ 1: 0]	BANK_RESULT		= 2'b11;	// 3
+	localparam	[ 2: 0]	BANK_MODULUS					= 3'b000;	// 0
+	localparam	[ 2: 0]	BANK_MESSAGE					= 3'b001;	// 1
+	localparam	[ 2: 0]	BANK_EXPONENT					= 3'b010;	// 2
+	localparam	[ 2: 0]	BANK_RESULT						= 3'b011;	// 3
+	localparam	[ 2: 0]	BANK_MODULUS_COEFF_OUT		= 3'b100;	// 5
+	localparam	[ 2: 0]	BANK_MODULUS_COEFF_IN		= 3'b101;	// 4
+	localparam	[ 2: 0]	BANK_MONTGOMERY_FACTOR_OUT	= 3'b110;	// 7
+	localparam	[ 2: 0]	BANK_MONTGOMERY_FACTOR_IN	= 3'b111;	// 6
 	
 	
 		/*
@@ -176,7 +180,7 @@ module modexpa7_top #
 		 *
 		 * Note, that the core does squaring and multiplication simultaneously, so
 		 * there are two identical systolic multipliers inside. It's better to have two
-		 * copies of modulus to give router some freeding in placing the multipliers,
+		 * copies of modulus to give router some freedom in placing the multipliers,
 		 * that's why there are actually two identical block memories N1 and N2 instead of N.
 		 * User reads from the first one, but writes to both of them. Note that the synthesis
 		 * tool might get too clever and find out that N1 and N2 are identical and decide
@@ -250,14 +254,18 @@ module modexpa7_top #
 
 		
 		/*
-		 * Instantiate internal memories.
+		 * Instantiate more block memories.
+		 *
+		 * Fast modular exponentiation requires two pre-calculated helper quantities: Montgomery
+		 * factor F and modulus-dependent speed-up coefficient N_COEFF. This core has two separate
+		 * buffers for each of those quantities, during pre-computation F and N_COEFF are written to
+		 * the "output" buffers, so that user can retrieve them and store along with the key for
+		 * future use. During exponentiation F and N_COEFF are read from the "input" buffers and
+		 * must be supplied by user along with the modulus.
 		 *
-		 * We have two block memories: F for Montgomery factor and N_COEFF for modulus-dependent
-		 * coefficient, they are written to during pre-calculation and read from during exponentiation.
-		 *
-		 * Note, that there are actually two identical block memories N_COEFF1 and N_COEFF2 instead of
-		 * just one N_COEFF, read the explanation above. F is only used by one of the multipliers, so
-		 * we don't need F1 and F2.
+		 * Note, that there are actually two identical input block memories N_COEFF1 and N_COEFF2
+		 * instead of just one N_COEFF, read the explanation above. F is only used by one of
+		 * the multipliers, so we don't need F1 and F2.
 		 */
 
 	wire	[OPERAND_ADDR_WIDTH-1:0]	core_f_addr_wr;
@@ -274,20 +282,38 @@ module modexpa7_top #
 		
 	wire										core_f_wren;
 	wire										core_n_coeff_wren;
+
+	wire	[                32-1:0]	user_f_out_data;
+	wire	[                32-1:0]	user_f_in_data;
+	wire	[                32-1:0]	user_n_coeff_out_data;
+	wire	[                32-1:0]	user_n_coeff_in_data;
+
+	wire										user_f_in_wren       = bus_cs && bus_we && (bus_addr_bank == BANK_MONTGOMERY_FACTOR_IN);
+	wire										user_n_coeff_in_wren = bus_cs && bus_we && (bus_addr_bank == BANK_MODULUS_COEFF_IN);
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
-	bram_f (.clk(clk),
+	bram_f_out (.clk(clk),
 		.a_addr(core_f_addr_wr), .a_out(), .a_wr(core_f_wren), .a_in(core_f_data_wr),
+		.b_addr(bus_addr_word), .b_out(user_f_out_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_f_in (.clk(clk),
+		.a_addr(bus_addr_word), .a_out(user_f_in_data), .a_wr(user_f_in_wren), .a_in(bus_data_wr),
 		.b_addr(core_f_addr_rd), .b_out(core_f_data_rd));
-
+
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
-	bram_n_coeff1 (.clk(clk),
+	bram_n_coeff_out (.clk(clk),
 		.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+		.b_addr(bus_addr_word), .b_out(user_n_coeff_out_data));
+
+	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+	bram_n_coeff_in1 (.clk(clk),
+		.a_addr(bus_addr_word), .a_out(user_n_coeff_in_data), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
 		.b_addr(core_n_coeff1_addr_rd), .b_out(core_n_coeff1_data_rd));
 
 	bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
-	bram_n_coeff2 (.clk(clk),
-		.a_addr(core_n_coeff_addr_wr), .a_out(), .a_wr(core_n_coeff_wren), .a_in(core_n_coeff_data_wr),
+	bram_n_coeff_in2 (.clk(clk),
+		.a_addr(bus_addr_word), .a_out(), .a_wr(user_n_coeff_in_wren), .a_in(bus_data_wr),
 		.b_addr(core_n_coeff2_addr_rd), .b_out(core_n_coeff2_data_rd));
 		
 
@@ -461,7 +487,7 @@ module modexpa7_top #
 		 */
 		 
 		// delay bus_addr_bank by 1 clock cycle to remember from where we've just been reading
-	reg	[1: 0]	bus_addr_bank_dly;
+	reg	[2: 0]	bus_addr_bank_dly;
 	always @(posedge clk)
 		if (bus_cs) bus_addr_bank_dly <= bus_addr_bank;
 
@@ -474,12 +500,16 @@ module modexpa7_top #
 		//
 		case (bus_addr_bank_dly)
 			//
-			BANK_MODULUS:		bus_data_rd_mux = user_n_data;
-			BANK_MESSAGE:		bus_data_rd_mux = user_m_data;
-			BANK_EXPONENT:		bus_data_rd_mux = user_d_data;
-			BANK_RESULT:		bus_data_rd_mux = user_r_data;
+			BANK_MODULUS:						bus_data_rd_mux = user_n_data;
+			BANK_MESSAGE:						bus_data_rd_mux = user_m_data;
+			BANK_EXPONENT:						bus_data_rd_mux = user_d_data;
+			BANK_RESULT:						bus_data_rd_mux = user_r_data;
+			//
+			BANK_MODULUS_COEFF_OUT:			bus_data_rd_mux = user_n_coeff_out_data;
+			BANK_MODULUS_COEFF_IN:			bus_data_rd_mux = user_n_coeff_in_data;
+			BANK_MONTGOMERY_FACTOR_OUT:	bus_data_rd_mux = user_f_out_data;
+			BANK_MONTGOMERY_FACTOR_IN:		bus_data_rd_mux = user_f_in_data;
 			//
 		endcase
-		
 
 endmodule
diff --git a/src/rtl/modexpa7_wrapper.v b/src/rtl/modexpa7_wrapper.v
index a4e2319..8ebc22a 100644
--- a/src/rtl/modexpa7_wrapper.v
+++ b/src/rtl/modexpa7_wrapper.v
@@ -42,7 +42,7 @@ module modexpa7_wrapper #
 		input											cs,
 		input											we,
 
-		input		[OPERAND_ADDR_WIDTH+2:0]	address,
+		input		[OPERAND_ADDR_WIDTH+3:0]	address,
 		input		[                32-1:0]	write_data,
 		output	[                32-1:0]	read_data
 	);
@@ -54,8 +54,8 @@ module modexpa7_wrapper #
 	localparam	ADDR_MSB_REGS	= 1'b0;
 	localparam	ADDR_MSB_CORE	= 1'b1;
 	
-	wire										address_msb = address[OPERAND_ADDR_WIDTH+2];
-	wire	[OPERAND_ADDR_WIDTH+1:0]	address_lsb = address[OPERAND_ADDR_WIDTH+1:0];
+	wire										address_msb = address[OPERAND_ADDR_WIDTH+3];
+	wire	[OPERAND_ADDR_WIDTH+2:0]	address_lsb = address[OPERAND_ADDR_WIDTH+2:0];
 
 
 		/*
@@ -68,17 +68,17 @@ module modexpa7_wrapper #
 		/*
 		 * Registers
 		 */
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_NAME0				= 'h00;	//
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_NAME1				= 'h01;	//
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_VERSION			= 'h02;	//
-
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_CONTROL			= 'h08;	// {next, init}
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_STATUS				= 'h09;	// {valid, ready}
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_MODE				= 'h10;	// {crt, dummy}
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_MODULUS_BITS		= 'h11;	// number of bits in modulus
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_EXPONENT_BITS	= 'h12;	// number of bits in exponent
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_BUFFER_BITS		= 'h13;	// largest supported number of bits
-	localparam	[OPERAND_ADDR_WIDTH+1:0]	ADDR_ARRAY_BITS		= 'h14;	// number of bits in systolic array
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_NAME0				= 'h00;	//
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_NAME1				= 'h01;	//
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_VERSION			= 'h02;	//
+
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_CONTROL			= 'h08;	// {next, init}
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_STATUS				= 'h09;	// {valid, ready}
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_MODE				= 'h10;	// {crt, dummy}
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_MODULUS_BITS		= 'h11;	// number of bits in modulus
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_EXPONENT_BITS	= 'h12;	// number of bits in exponent
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_BUFFER_BITS		= 'h13;	// largest supported number of bits
+	localparam	[OPERAND_ADDR_WIDTH+2:0]	ADDR_ARRAY_BITS		= 'h14;	// number of bits in systolic array
 
 	localparam	CONTROL_INIT_BIT	= 0;
 	localparam	CONTROL_NEXT_BIT	= 1;
@@ -91,7 +91,7 @@ module modexpa7_wrapper #
 
 	localparam	CORE_NAME0			= 32'h6D6F6465;	// "mode"
 	localparam	CORE_NAME1			= 32'h78706137;	// "xpa7"
-	localparam	CORE_VERSION		= 32'h302E3230;	// "0.20"
+	localparam	CORE_VERSION		= 32'h302E3235;	// "0.25"
 
 
 		/*



More information about the Commits mailing list