[Cryptech-Commits] [user/shatov/modexpng] 05/05: Added more micro-operations, entire Montgomery exponentiation ladder works now.

git at cryptech.is git at cryptech.is
Thu Oct 3 13:51:37 UTC 2019


This is an automated email from the git hooks/post-receive script.

meisterpaul1 at yandex.ru pushed a commit to branch master
in repository user/shatov/modexpng.

commit 02247784f18dc683d5873a52c1650e72f02273b5
Author: Pavel V. Shatov (Meister) <meisterpaul1 at yandex.ru>
AuthorDate: Thu Oct 3 16:50:25 2019 +0300

    Added more micro-operations, entire Montgomery exponentiation ladder works now.
---
 bench/tb_core_full.v          | 511 +++++++++++++++++++++++
 bench/tb_mmm_dual_x8.v        | 940 ++++++++++++++++++++++++++++++++++++++++++
 bench/tb_mmm_x8_dual.v        | 327 ---------------
 rtl/modexpng_core_top.v       | 138 +++++--
 rtl/modexpng_general_worker.v | 402 ++++++++++++++++--
 rtl/modexpng_io_manager.v     | 170 +++++++-
 rtl/modexpng_microcode.vh     |  42 +-
 rtl/modexpng_mmm_dual.v       |  10 +-
 rtl/modexpng_parameters.vh    |   8 +-
 rtl/modexpng_uop_rom.v        |  82 ++--
 10 files changed, 2171 insertions(+), 459 deletions(-)

diff --git a/bench/tb_core_full.v b/bench/tb_core_full.v
new file mode 100644
index 0000000..248634e
--- /dev/null
+++ b/bench/tb_core_full.v
@@ -0,0 +1,511 @@
+`timescale 1ns / 1ps
+
+module tb_core_full;
+
+
+    //
+    // Headers
+    //
+    `include "../rtl/modexpng_parameters.vh"
+
+
+    //
+    // Test Vectors
+    //
+    localparam TB_MODULUS_LENGTH_N  = 1024;
+    localparam TB_MODULUS_LENGTH_PQ = TB_MODULUS_LENGTH_N  / 2;
+    localparam TB_NUM_WORDS_PQ      = TB_MODULUS_LENGTH_PQ / BUS_DATA_W;
+    localparam TB_NUM_WORDS_N       = TB_MODULUS_LENGTH_N  / BUS_DATA_W;
+    localparam CORE_NUM_WORDS_PQ    = TB_MODULUS_LENGTH_PQ / WORD_W;
+    localparam CORE_NUM_WORDS_N     = TB_MODULUS_LENGTH_N  / WORD_W;
+
+    reg [31:0] M[0:TB_NUM_WORDS_N-1];
+    reg [31:0] N[0:TB_NUM_WORDS_N-1];
+    reg [31:0] N_FACTOR[0:TB_NUM_WORDS_N-1];
+    reg [31:0] N_COEFF[0:TB_NUM_WORDS_N];
+    reg [31:0] X[0:TB_NUM_WORDS_N-1];
+    reg [31:0] Y[0:TB_NUM_WORDS_N-1];
+    reg [31:0] P[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] Q[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] P_FACTOR[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] Q_FACTOR[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] P_COEFF[0:TB_NUM_WORDS_PQ];
+    reg [31:0] Q_COEFF[0:TB_NUM_WORDS_PQ];
+    reg [31:0] DP[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] DQ[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] QINV[0:TB_NUM_WORDS_PQ-1];
+    reg [31:0] XM[0:TB_NUM_WORDS_N-1];
+    reg [31:0] YM[0:TB_NUM_WORDS_N-1];
+    reg [31:0] S[0:TB_NUM_WORDS_N-1];
+    reg [31:0] XM_READBACK[0:TB_NUM_WORDS_N-1];
+    reg [31:0] YM_READBACK[0:TB_NUM_WORDS_N-1];
+    reg [31:0] S_READBACK[0:TB_NUM_WORDS_N-1];
+    
+    initial begin
+        M[  0] = 32'he1b3c6ac; M[  1] = 32'haa2c5d8c; M[  2] = 32'hbecc676a; M[  3] = 32'hda087a3e;
+        M[  4] = 32'hf0816496; M[  5] = 32'hf9e17fd8; M[  6] = 32'h304d4896; M[  7] = 32'h81d4e9ab;
+        M[  8] = 32'h80eff76c; M[  9] = 32'he5b8f9b6; M[ 10] = 32'h4b1ebe55; M[ 11] = 32'ha1feb9dc;
+        M[ 12] = 32'heca4192f; M[ 13] = 32'h6ad6ea8e; M[ 14] = 32'hf34aed05; M[ 15] = 32'had38c275;
+        M[ 16] = 32'h8d3b583b; M[ 17] = 32'hc370f07e; M[ 18] = 32'hb9078738; M[ 19] = 32'haf37f86c;
+        M[ 20] = 32'h02f0e161; M[ 21] = 32'h0506a68a; M[ 22] = 32'h1ae65107; M[ 23] = 32'hcd3a97f1;
+        M[ 24] = 32'hb27244b8; M[ 25] = 32'h9bc3c400; M[ 26] = 32'he4d5636e; M[ 27] = 32'h35187c07;
+        M[ 28] = 32'h78a661c9; M[ 29] = 32'h1e7ec273; M[ 30] = 32'hcdc31041; M[ 31] = 32'h002291d8;
+        N[  0] = 32'h6719997f; N[  1] = 32'hef2df706; N[  2] = 32'h9ba95792; N[  3] = 32'h747e0580;
+        N[  4] = 32'h7507684c; N[  5] = 32'h7a10d0d1; N[  6] = 32'h83a33941; N[  7] = 32'haef9fda5;
+        N[  8] = 32'h17972933; N[  9] = 32'h0a98251a; N[ 10] = 32'h7dce3d13; N[ 11] = 32'hdad49a60;
+        N[ 12] = 32'h9f98006b; N[ 13] = 32'h46fd4a05; N[ 14] = 32'h51966e1d; N[ 15] = 32'hb1c59fab;
+        N[ 16] = 32'h8ab3096e; N[ 17] = 32'hef1f0436; N[ 18] = 32'heeed776f; N[ 19] = 32'h106d9d82;
+        N[ 20] = 32'hdd2a44af; N[ 21] = 32'h17c32585; N[ 22] = 32'hc854e454; N[ 23] = 32'h600fb6df;
+        N[ 24] = 32'h25c2d4bb; N[ 25] = 32'h5f09d790; N[ 26] = 32'he5a2bb93; N[ 27] = 32'h5bc6b044;
+        N[ 28] = 32'h2ecbb15f; N[ 29] = 32'h464817f5; N[ 30] = 32'h05cae32b; N[ 31] = 32'hde97bb85;
+        N_FACTOR[  0] = 32'ha06a1113; N_FACTOR[  1] = 32'hc9974806; N_FACTOR[  2] = 32'h572d7a20; N_FACTOR[  3] = 32'h04000838;
+        N_FACTOR[  4] = 32'hb275c37a; N_FACTOR[  5] = 32'hea78a046; N_FACTOR[  6] = 32'h029e13b8; N_FACTOR[  7] = 32'hae540753;
+        N_FACTOR[  8] = 32'h1e98bc21; N_FACTOR[  9] = 32'h34ede47a; N_FACTOR[ 10] = 32'h0c565ecd; N_FACTOR[ 11] = 32'h027ff3bf;
+        N_FACTOR[ 12] = 32'h08290d30; N_FACTOR[ 13] = 32'hb92857df; N_FACTOR[ 14] = 32'he6c59eb3; N_FACTOR[ 15] = 32'h09e53d6a;
+        N_FACTOR[ 16] = 32'h980d127e; N_FACTOR[ 17] = 32'h4dd6ced0; N_FACTOR[ 18] = 32'h3b9400d0; N_FACTOR[ 19] = 32'h276c6711;
+        N_FACTOR[ 20] = 32'h72eaf2e6; N_FACTOR[ 21] = 32'h749f81eb; N_FACTOR[ 22] = 32'h17b7d05f; N_FACTOR[ 23] = 32'h41a3a2cd;
+        N_FACTOR[ 24] = 32'h1ba098f3; N_FACTOR[ 25] = 32'h9b884af9; N_FACTOR[ 26] = 32'hdafd920c; N_FACTOR[ 27] = 32'h7b1f5cc6;
+        N_FACTOR[ 28] = 32'hb0a0d098; N_FACTOR[ 29] = 32'h4ee55bcf; N_FACTOR[ 30] = 32'haed9b905; N_FACTOR[ 31] = 32'h42d541fb;
+        N_COEFF[  0] = 32'hb383d981; N_COEFF[  1] = 32'h9bf1c20c; N_COEFF[  2] = 32'h268999ff; N_COEFF[  3] = 32'h11a3c01a;
+        N_COEFF[  4] = 32'h12665495; N_COEFF[  5] = 32'h515b0d96; N_COEFF[  6] = 32'hb704fb07; N_COEFF[  7] = 32'h8e1bd1d6;
+        N_COEFF[  8] = 32'h62c5f506; N_COEFF[  9] = 32'hfdcd0163; N_COEFF[ 10] = 32'h8dd55dee; N_COEFF[ 11] = 32'h6d79c8b1;
+        N_COEFF[ 12] = 32'hca16d0b9; N_COEFF[ 13] = 32'h88bead48; N_COEFF[ 14] = 32'hbcdb1e94; N_COEFF[ 15] = 32'h950c171d;
+        N_COEFF[ 16] = 32'h4fa810af; N_COEFF[ 17] = 32'h9b63e6d2; N_COEFF[ 18] = 32'ha2d0c26b; N_COEFF[ 19] = 32'hafa1ef25;
+        N_COEFF[ 20] = 32'h111bd21e; N_COEFF[ 21] = 32'hc2d896f0; N_COEFF[ 22] = 32'h189dc2cf; N_COEFF[ 23] = 32'h6144156a;
+        N_COEFF[ 24] = 32'hd1c67123; N_COEFF[ 25] = 32'ha127e4f3; N_COEFF[ 26] = 32'h40d342ef; N_COEFF[ 27] = 32'hee476d42;
+        N_COEFF[ 28] = 32'hee05f26a; N_COEFF[ 29] = 32'h4fc717bd; N_COEFF[ 30] = 32'h6baa4d60; N_COEFF[ 31] = 32'h1d6b10db;
+        N_COEFF[ 32] = 32'h00006545;
+        X[  0] = 32'ha838f053; X[  1] = 32'h8eb9747c; X[  2] = 32'h5991b9eb; X[  3] = 32'h74e6e776;
+        X[  4] = 32'hcb5aa9e2; X[  5] = 32'h7f8083d4; X[  6] = 32'h3f7d47ec; X[  7] = 32'hbd76a787;
+        X[  8] = 32'hf4c166b7; X[  9] = 32'hdbf67229; X[ 10] = 32'h975a5cfb; X[ 11] = 32'he8c35dca;
+        X[ 12] = 32'h6abc86e8; X[ 13] = 32'hfee472cb; X[ 14] = 32'h83ac8f2e; X[ 15] = 32'h82825cff;
+        X[ 16] = 32'h2d532c22; X[ 17] = 32'h2d3c3b06; X[ 18] = 32'he2862a8f; X[ 19] = 32'he8616ce4;
+        X[ 20] = 32'h5d77ee51; X[ 21] = 32'he609de07; X[ 22] = 32'hef718044; X[ 23] = 32'h82f35f8b;
+        X[ 24] = 32'hcdb9dcfe; X[ 25] = 32'hff6ea364; X[ 26] = 32'h0994ae28; X[ 27] = 32'h409b369b;
+        X[ 28] = 32'hcfabda4e; X[ 29] = 32'h5cd52bbc; X[ 30] = 32'hd90e1715; X[ 31] = 32'h00f4dcf2;
+        Y[  0] = 32'h01b2730a; Y[  1] = 32'h04ff1664; Y[  2] = 32'h6d55dc06; Y[  3] = 32'h1cda0da7;
+        Y[  4] = 32'h98c812b4; Y[  5] = 32'ha8f79f3b; Y[  6] = 32'hb18d9ee1; Y[  7] = 32'ha53e97db;
+        Y[  8] = 32'hfbbfd687; Y[  9] = 32'h6b8a8bf6; Y[ 10] = 32'h59fe5575; Y[ 11] = 32'he6ee62ca;
+        Y[ 12] = 32'h9fe3f32a; Y[ 13] = 32'h6d758eaa; Y[ 14] = 32'h121e3dac; Y[ 15] = 32'h31d77884;
+        Y[ 16] = 32'h8f2701dd; Y[ 17] = 32'hca5e7ac3; Y[ 18] = 32'h731977a3; Y[ 19] = 32'hc3c1af70;
+        Y[ 20] = 32'h5606786a; Y[ 21] = 32'h94b71191; Y[ 22] = 32'hd044c7e2; Y[ 23] = 32'h7d899cd7;
+        Y[ 24] = 32'hb17d4f5d; Y[ 25] = 32'h446e04de; Y[ 26] = 32'h9c40b33d; Y[ 27] = 32'habc2e23e;
+        Y[ 28] = 32'hbb98b1f6; Y[ 29] = 32'hf1f87f7e; Y[ 30] = 32'hf19f3050; Y[ 31] = 32'h91305f4c;
+        P[  0] = 32'h18bb0f97; P[  1] = 32'h08588a44; P[  2] = 32'h042c6647; P[  3] = 32'hc8d3fa09;
+        P[  4] = 32'he2ddbbc7; P[  5] = 32'hef4a17fd; P[  6] = 32'h90c102ef; P[  7] = 32'h28b132db;
+        P[  8] = 32'hebfd5f0a; P[  9] = 32'h958717ca; P[ 10] = 32'h563cd266; P[ 11] = 32'h433f41af;
+        P[ 12] = 32'hbc198e83; P[ 13] = 32'h5b5300b2; P[ 14] = 32'h9bc50e5d; P[ 15] = 32'hefffa822;
+        Q[  0] = 32'h25de0259; Q[  1] = 32'hd81461d0; Q[  2] = 32'h613815b3; Q[  3] = 32'h9bf274e0;
+        Q[  4] = 32'hbfd89a48; Q[  5] = 32'hc53e71ac; Q[  6] = 32'hcce7aed3; Q[  7] = 32'hce1d017c;
+        Q[  8] = 32'h646547e1; Q[  9] = 32'hd6779694; Q[ 10] = 32'h20ae39c0; Q[ 11] = 32'hef0d4b5b;
+        Q[ 12] = 32'h8e5f59d6; Q[ 13] = 32'h7e267974; Q[ 14] = 32'h14c86644; Q[ 15] = 32'hed6efcd0;
+        P_FACTOR[  0] = 32'h614f99ce; P_FACTOR[  1] = 32'hbcee5381; P_FACTOR[  2] = 32'h10b70a9a; P_FACTOR[  3] = 32'h1a697756;
+        P_FACTOR[  4] = 32'h1a972b27; P_FACTOR[  5] = 32'hd7c43f9e; P_FACTOR[  6] = 32'h48cbad9c; P_FACTOR[  7] = 32'hc350e206;
+        P_FACTOR[  8] = 32'h51098b50; P_FACTOR[  9] = 32'h93388ec6; P_FACTOR[ 10] = 32'h548960b5; P_FACTOR[ 11] = 32'h5ecd6b04;
+        P_FACTOR[ 12] = 32'h04d1d543; P_FACTOR[ 13] = 32'ha53994af; P_FACTOR[ 14] = 32'hd390be70; P_FACTOR[ 15] = 32'h0acdced0;
+        Q_FACTOR[  0] = 32'h8a19423f; Q_FACTOR[  1] = 32'h9d729c78; Q_FACTOR[  2] = 32'h26ed5239; Q_FACTOR[  3] = 32'h5a7eba92;
+        Q_FACTOR[  4] = 32'h8465f60f; Q_FACTOR[  5] = 32'hd50817dd; Q_FACTOR[  6] = 32'hba703ab1; Q_FACTOR[  7] = 32'h3d59bd42;
+        Q_FACTOR[  8] = 32'h2c197fcc; Q_FACTOR[  9] = 32'hed14f573; Q_FACTOR[ 10] = 32'hb860c105; Q_FACTOR[ 11] = 32'h89af91e7;
+        Q_FACTOR[ 12] = 32'h13a3742d; Q_FACTOR[ 13] = 32'h96e41677; Q_FACTOR[ 14] = 32'h86506b4d; Q_FACTOR[ 15] = 32'h4a834535;
+        P_COEFF[  0] = 32'hb3679fd9; P_COEFF[  1] = 32'hde24e467; P_COEFF[  2] = 32'hf0e82022; P_COEFF[  3] = 32'h99012919;
+        P_COEFF[  4] = 32'h023bda43; P_COEFF[  5] = 32'hf04eebf8; P_COEFF[  6] = 32'h29e9942f; P_COEFF[  7] = 32'h8c257cb0;
+        P_COEFF[  8] = 32'h5cdc4e60; P_COEFF[  9] = 32'h279bdff7; P_COEFF[ 10] = 32'hf04a610d; P_COEFF[ 11] = 32'h342901dc;
+        P_COEFF[ 12] = 32'hc3e2f78c; P_COEFF[ 13] = 32'h39c00ed8; P_COEFF[ 14] = 32'hf7032ac2; P_COEFF[ 15] = 32'h22d9c54e;
+        P_COEFF[ 16] = 32'h0000f994;
+        Q_COEFF[  0] = 32'h95beda17; Q_COEFF[  1] = 32'ha4b101fa; Q_COEFF[  2] = 32'hd0b854bc; Q_COEFF[  3] = 32'h5c952a67;
+        Q_COEFF[  4] = 32'h56722aa8; Q_COEFF[  5] = 32'h2176cace; Q_COEFF[  6] = 32'h69beef2d; Q_COEFF[  7] = 32'h95bf6eb2;
+        Q_COEFF[  8] = 32'h0cf1175c; Q_COEFF[  9] = 32'h4911b74e; Q_COEFF[ 10] = 32'h331e61cb; Q_COEFF[ 11] = 32'he9527ead;
+        Q_COEFF[ 12] = 32'h8d6a5911; Q_COEFF[ 13] = 32'hae42d654; Q_COEFF[ 14] = 32'he10d29a8; Q_COEFF[ 15] = 32'h50a5dd76;
+        Q_COEFF[ 16] = 32'h0000ed75;
+        DP[  0] = 32'h63d165e5; DP[  1] = 32'h856ac81e; DP[  2] = 32'hc4b8779d; DP[  3] = 32'h8b119544;
+        DP[  4] = 32'had780837; DP[  5] = 32'h3e920266; DP[  6] = 32'he9d10f2e; DP[  7] = 32'h7c1b42b2;
+        DP[  8] = 32'hc7daca3b; DP[  9] = 32'h7883be11; DP[ 10] = 32'ha384548d; DP[ 11] = 32'he0848b23;
+        DP[ 12] = 32'h0b62bdff; DP[ 13] = 32'h11c64350; DP[ 14] = 32'h2aa1e225; DP[ 15] = 32'h9c2bcaa7;
+        DQ[  0] = 32'hd7ffdc71; DQ[  1] = 32'hed01b8aa; DQ[  2] = 32'h2f99d3a6; DQ[  3] = 32'h8ccb4428;
+        DQ[  4] = 32'hb1574616; DQ[  5] = 32'hfc218e36; DQ[  6] = 32'h4fe24f91; DQ[  7] = 32'h9c367c42;
+        DQ[  8] = 32'h69dfa208; DQ[  9] = 32'h3ee3de79; DQ[ 10] = 32'h54ded59b; DQ[ 11] = 32'hcb3b487d;
+        DQ[ 12] = 32'hbcc0db4e; DQ[ 13] = 32'hb3e6678c; DQ[ 14] = 32'h3d13ec03; DQ[ 15] = 32'h99e0f684;        QINV[  0] = 32'h9a2f0db2; QINV[  1] = 32'h4a8075a5; QINV[  2] = 32'hb61201fa; QINV[  3] = 32'h0e876a42;
+        QINV[  4] = 32'h94667476; QINV[  5] = 32'h7538b796; QINV[  6] = 32'h8d8dfa35; QINV[  7] = 32'h689ee4a7;
+        QINV[  8] = 32'h6779dd63; QINV[  9] = 32'he15b6b5e; QINV[ 10] = 32'h8275500c; QINV[ 11] = 32'he4dcd058;
+        QINV[ 12] = 32'haf54b86c; QINV[ 13] = 32'hba76dc50; QINV[ 14] = 32'h473d0d6d; QINV[ 15] = 32'ha023ba44;
+        XM[  0] = 32'h9b067dd2; XM[  1] = 32'hf47b497a; XM[  2] = 32'he8044305; XM[  3] = 32'hf74f1735;
+        XM[  4] = 32'h494825f4; XM[  5] = 32'h077bf4a3; XM[  6] = 32'h637a9f36; XM[  7] = 32'h3c3821a2;
+        XM[  8] = 32'haa1fe167; XM[  9] = 32'h01c7289a; XM[ 10] = 32'hb463d63d; XM[ 11] = 32'hc992252e;
+        XM[ 12] = 32'he43762bf; XM[ 13] = 32'h351d9416; XM[ 14] = 32'h10e7f813; XM[ 15] = 32'h33187c87;
+        XM[ 16] = 32'h9eb98306; XM[ 17] = 32'hb29be7b6; XM[ 18] = 32'h32b237a8; XM[ 19] = 32'h6c1d5e46;
+        XM[ 20] = 32'h1cf10b4a; XM[ 21] = 32'hd874a710; XM[ 22] = 32'h7d2df198; XM[ 23] = 32'h463701cc;
+        XM[ 24] = 32'h9b648da0; XM[ 25] = 32'hdc5d3b10; XM[ 26] = 32'hef88e7fd; XM[ 27] = 32'hcb888210;
+        XM[ 28] = 32'h24397651; XM[ 29] = 32'h9b9bd5a2; XM[ 30] = 32'hbc796763; XM[ 31] = 32'h5be48377;
+        YM[  0] = 32'h78aba2bd; YM[  1] = 32'h6885ed1d; YM[  2] = 32'h0d4983a2; YM[  3] = 32'h3b775d20;
+        YM[  4] = 32'hf83145f4; YM[  5] = 32'h66e52536; YM[  6] = 32'h25c2377e; YM[  7] = 32'h91ef1342;
+        YM[  8] = 32'h73013f57; YM[  9] = 32'h3862aa1a; YM[ 10] = 32'h37846437; YM[ 11] = 32'ha6ddd3c9;
+        YM[ 12] = 32'h3974d1b2; YM[ 13] = 32'h02aea3f6; YM[ 14] = 32'h2e71b229; YM[ 15] = 32'hb898d5b6;
+        YM[ 16] = 32'h71258bb8; YM[ 17] = 32'h654f94e8; YM[ 18] = 32'h5539e56e; YM[ 19] = 32'hd49567f2;
+        YM[ 20] = 32'he73efaa1; YM[ 21] = 32'h3e4e2162; YM[ 22] = 32'h772d786a; YM[ 23] = 32'hc27be96a;
+        YM[ 24] = 32'h9911c92d; YM[ 25] = 32'hddc1b0fd; YM[ 26] = 32'h829186bb; YM[ 27] = 32'h1bab454e;
+        YM[ 28] = 32'h2f9fd9ce; YM[ 29] = 32'ha57103d4; YM[ 30] = 32'h1a93390c; YM[ 31] = 32'hc0376429;
+        S[  0] = 32'h8eb4aa6e; S[  1] = 32'hababa077; S[  2] = 32'h8758f3f6; S[  3] = 32'h8282e4f4;
+        S[  4] = 32'h747947ce; S[  5] = 32'h9ac7dbb0; S[  6] = 32'h9184f0b5; S[  7] = 32'h4b572f47;
+        S[  8] = 32'hf4807458; S[  9] = 32'h6da8dcd4; S[ 10] = 32'h9f331c40; S[ 11] = 32'h65e2b7a2;
+        S[ 12] = 32'hd3704e85; S[ 13] = 32'h3366f4f0; S[ 14] = 32'h035044b1; S[ 15] = 32'h54758bc4;
+        S[ 16] = 32'h2a7e0970; S[ 17] = 32'hbcc7783c; S[ 18] = 32'hf62193e6; S[ 19] = 32'h5d7bb220;
+        S[ 20] = 32'hb0fcabdd; S[ 21] = 32'he6dc5c88; S[ 22] = 32'h8e4d5e53; S[ 23] = 32'haa40acba;
+        S[ 24] = 32'h1dfc9178; S[ 25] = 32'h842821bc; S[ 26] = 32'h318fc8e1; S[ 27] = 32'h0f8161fe;
+        S[ 28] = 32'hbf3d7945; S[ 29] = 32'he33612c7; S[ 30] = 32'h7eec7f9d; S[ 31] = 32'h66da2c5a;
+    end
+    
+
+
+    //
+    // Clocks
+    //
+    `define CLK_FREQUENCY_MHZ   (100.0)
+    `define CLK_PERIOD_NS       (1000.0 / `CLK_FREQUENCY_MHZ)
+    `define CLK_PERIOD_HALF_NS  (0.5 * `CLK_PERIOD_NS)
+
+    `define CLK_BUS_FREQUENCY_MHZ   (50.0)
+    `define CLK_BUS_PERIOD_NS       (1000.0 / `CLK_BUS_FREQUENCY_MHZ)
+    `define CLK_BUS_PERIOD_HALF_NS  (0.5 * `CLK_BUS_PERIOD_NS)
+    
+	reg clk = 1'b1;
+	reg clk_bus = 1'b0;
+
+    always #`CLK_PERIOD_HALF_NS clk = ~clk;
+    
+    always #`CLK_BUS_PERIOD_HALF_NS clk_bus = ~clk_bus;
+    
+    
+    //
+    // Reset
+    //
+    reg rst = 1'b1;
+    
+    
+    //
+    // Control / Status
+    //
+    reg [ 7:0] word_index_last_n;
+    reg [ 7:0] word_index_last_pq;
+    reg [11:0] bit_index_last_n;
+    reg [11:0] bit_index_last_pq;
+    reg        core_next = 1'b0;
+    wire       core_valid;
+    reg        core_crt_mode;
+    
+    
+    //
+    // System Bus
+    //
+    reg         bus_ready;
+    reg         bus_cs = 1'b0;
+    reg         bus_we = 1'b0;
+    reg  [11:0] bus_addr;
+    reg  [31:0] bus_data_wr;
+    wire [31:0] bus_data_rd;
+
+    wire [ 1:0] bus_addr_sel  = bus_addr[11:10];
+    wire [ 2:0] bus_addr_bank = bus_addr[9:7];
+    wire [ 6:0] bus_addr_data = bus_addr[6:0];
+
+
+    //
+    // UUT
+    //
+    modexpng_core_top uut
+    (
+        .clk                (clk),
+        .clk_bus            (clk_bus),
+        
+        .rst                (rst),
+        
+        .next               (core_next),
+        .valid              (core_valid),
+        
+        .crt_mode           (core_crt_mode),
+        
+        .word_index_last_n  (word_index_last_n),
+        .word_index_last_pq (word_index_last_pq),
+        
+        .bit_index_last_n   (bit_index_last_n),
+        .bit_index_last_pq  (bit_index_last_pq),
+        
+        .bus_cs             (bus_cs),
+        .bus_we             (bus_we),
+        .bus_addr           (bus_addr),
+        .bus_data_wr        (bus_data_wr),
+        .bus_data_rd        (bus_data_rd)
+    );
+
+
+    //
+    // Routine (Bus)
+    //
+    initial begin
+    
+        bus_ready = 1'b0;
+    
+        while (rst) wait_clock_bus_tick;
+        wait_clock_bus_ticks(10);
+        $display("Core came out of reset.");
+         
+        set_input_1;
+        set_input_2;
+        
+        wait_clock_bus_ticks(10);
+        bus_ready = 1'b1;
+    
+    end
+    
+    
+    //
+    // Routine (Control/Status, Bus)
+    //
+    initial begin
+    
+        _wait_half_clock_tick;
+        wait_clock_ticks(100);
+        rst = 1'b0;
+        
+        while (!bus_ready) wait_clock_tick;
+        wait_clock_ticks(10);
+        $display("Core input banks written.");
+        
+        word_index_last_n  = CORE_NUM_WORDS_N - 1;
+        word_index_last_pq = CORE_NUM_WORDS_PQ - 1;
+        
+        bit_index_last_n  = TB_MODULUS_LENGTH_N - 1;
+        bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1;
+        
+        core_crt_mode      = 1'b1;
+        
+        core_next = 1'b1;
+        wait_clock_tick;
+        core_next = 1'b0;
+        $display("Pulsed 'next' control signal.");
+    
+        while (!core_valid) wait_clock_tick;
+        wait_clock_ticks(10);
+        
+        $display("Detected high 'valid' status signal.");
+        core_crt_mode      = 1'bX;
+
+        wait_clock_ticks(10);
+        get_output;
+        wait_clock_ticks(10);
+        
+        $display("Core output banks read.");
+        
+        //verify;
+    
+    end
+    
+    
+    //
+    // Variables
+    //    
+    integer _w, _n;
+    
+
+    //
+    // set_input_1;
+    //
+    task set_input_1;
+        reg [9:0] _tn; 
+        begin
+            _tn = BANK_IN_1_N_COEFF * 2 ** BUS_OP_ADDR_W + TB_NUM_WORDS_N; // trick to write extra trailer word
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M,        _w[6:0],  M[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N,        _w[6:0],  N[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_FACTOR, _w[6:0],  N_FACTOR[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_N_COEFF,  _w[6:0],  N_COEFF[_w]);
+                                                   bus_write(2'd0, _tn[9:7],           _tn[6:0], N_COEFF[TB_NUM_WORDS_N]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_X,        _w[6:0],  X[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_Y,        _w[6:0],  Y[_w]);
+        end
+    endtask
+    
+    
+    //
+    // set_input_2;
+    //
+    task set_input_2;
+        begin
+//            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_write(2'd0, BANK_IN_1_M,        _w[6:0], M[_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P,        {1'b0, _w[5:0]}, P       [_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P,        {1'b1, _w[5:0]}, DP      [_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_FACTOR, {      _w[6:0]}, P_FACTOR[_w]);
+            for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_P_COEFF,  {      _w[6:0]}, P_COEFF [_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q,        {1'b0, _w[5:0]}, Q       [_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q,        {1'b1, _w[5:0]}, DQ      [_w]);
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_FACTOR, {      _w[6:0]}, Q_FACTOR[_w]);
+            for (_w=0; _w<=TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_Q_COEFF,  {      _w[6:0]}, Q_COEFF [_w]);            
+            for (_w=0; _w< TB_NUM_WORDS_PQ; _w=_w+1) bus_write(2'd1, BANK_IN_2_QINV,     {      _w[6:0]}, QINV    [_w]);
+        end
+    endtask
+    
+    
+    //
+    // get_output;
+    //
+    task get_output;
+        begin
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_XM, _w[6:0], XM_READBACK[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_YM, _w[6:0], YM_READBACK[_w]);
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) bus_read(2'd2, BANK_OUT_S,  _w[6:0], S_READBACK[_w]);
+        end
+    endtask
+
+
+    //
+    // verify;
+    //
+    task verify;
+        //
+        reg xm_ok;
+        reg ym_ok;
+        reg s_ok;
+            //
+        begin
+            //
+            xm_ok = 1;
+            ym_ok = 1;
+            s_ok = 1;
+            //
+            for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+               if (XM_READBACK[_w] !== XM[_w]) xm_ok = 0; 
+               if (YM_READBACK[_w] !== YM[_w]) ym_ok = 0;
+               if (S_READBACK[_w] !== S[_w]) s_ok = 0;
+            end
+            //
+            if (!xm_ok)
+                //
+                for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+                    $write("XM / XM_READBACK [%3d] = 0x%08x / 0x%08x", _w, XM[_w], XM_READBACK[_w]);
+                    if (XM[_w] !== XM_READBACK[_w]) $write(" <???: 0x%08x> ", XM[_w] ^ XM_READBACK[_w]);
+                    $write("\n");
+                end
+            //
+            if (!ym_ok)
+                //
+                for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+                    $write("YM / YM_READBACK [%3d] = 0x%08x / 0x%08x", _w, YM[_w], YM_READBACK[_w]);
+                    if (YM[_w] !== YM_READBACK[_w]) $write(" <???: 0x%08x> ", YM[_w] ^ YM_READBACK[_w]);
+                    $write("\n");
+                end
+            //
+            if (!s_ok)
+                //
+                for (_w=0; _w<TB_NUM_WORDS_N; _w=_w+1) begin
+                    $write("S / S_READBACK [%3d] = 0x%08x / 0x%08x", _w, S[_w], S_READBACK[_w]);
+                    if (S[_w] !== S_READBACK[_w]) $write(" <???: 0x%08x> ", S[_w] ^ S_READBACK[_w]);
+                    $write("\n");
+                end
+            //
+            $write("XM is ");
+            if (xm_ok) $write("OK.\n");
+            else       $write("WRONG!\n");
+            //
+            $write("YM is ");
+            if (ym_ok) $write("OK.\n");
+            else       $write("WRONG!\n");
+            //
+            $write("S is ");
+            if (s_ok) $write("OK.\n");
+            else       $write("WRONG!\n");                        
+            //
+        end
+        //
+    endtask
+    
+    
+
+    //
+    // _bus_drive()
+    //    
+    task _bus_drive;
+        input cs;
+        input we;
+        input [11:0] addr;
+        input [31:0] data;
+        {bus_cs, bus_we, bus_addr, bus_data_wr} <= {cs, we, addr, data};
+    endtask
+    
+    
+    //
+    // bus_write()
+    //
+    task bus_write;
+        input  [ 1:0] sel;
+        input  [ 2:0] bank;
+        input  [ 6:0] addr;
+        input  [31:0] data;
+        begin
+            _bus_drive(1'b1, 1'b1, {sel, bank, addr}, data);
+            wait_clock_bus_tick;
+            _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+        end
+    endtask
+    
+    
+    //
+    // bus_read()
+    //
+    task bus_read;
+        input  [ 1:0] sel;
+        input  [ 2:0] bank;
+        input  [ 6:0] addr;
+        output [31:0] data;
+        begin
+            _bus_drive(1'b1, 1'b0, {sel, bank, addr}, 32'hXXXXXXXX);
+            wait_clock_bus_tick;
+            data = bus_data_rd;
+            _bus_drive(1'b0, 1'b0, 12'hXXX, 32'hXXXXXXXX);
+        end
+    endtask
+    
+    
+    //
+    // _wait_half_clock_tick()
+    //
+    task _wait_half_clock_tick;
+        #`CLK_PERIOD_HALF_NS;
+    endtask
+    
+    //
+    // wait_clock_tick()
+    //
+    task wait_clock_tick;
+        begin
+            _wait_half_clock_tick;
+            _wait_half_clock_tick;
+        end
+    endtask
+    
+    
+    //
+    // wait_clock_bus_tick()
+    //
+    task wait_clock_bus_tick;
+        #`CLK_BUS_PERIOD_NS;
+    endtask
+
+
+    //
+    // wait_clock_ticks()
+    //    
+    task wait_clock_ticks;
+        input integer num_ticks;
+        for (_n=0; _n<num_ticks; _n=_n+1)
+            wait_clock_tick;
+    endtask
+    
+    
+    //
+    // wait_clock_bus_ticks()
+    //    
+    task wait_clock_bus_ticks;
+        input integer num_ticks;
+        for (_n=0; _n<num_ticks; _n=_n+1)
+            wait_clock_bus_tick;
+    endtask
+    
+endmodule
diff --git a/bench/tb_mmm_dual_x8.v b/bench/tb_mmm_dual_x8.v
new file mode 100644
index 0000000..7e54d09
--- /dev/null
+++ b/bench/tb_mmm_dual_x8.v
@@ -0,0 +1,940 @@
+`timescale 1ns / 1ps
+
+module tb_mmm_dual_x8;
+
+
+    //
+    // Headers
+    //
+    `include "../rtl/modexpng_parameters.vh"
+    //`include "../rtl_1/modexpng_mmm_fsm_old.vh"
+
+
+    //
+    // Clock
+    //
+    `define CLK_FREQUENCY_MHZ   (100.0)
+    `define CLK_PERIOD_NS       (1000.0 / `CLK_FREQUENCY_MHZ)
+    `define CLK_PERIOD_HALF_NS  (0.5 * `CLK_PERIOD_NS)
+    
+	reg clk = 1'b0;
+
+    always begin
+        #`CLK_PERIOD_HALF_NS clk = 1'b1;
+        #`CLK_PERIOD_HALF_NS clk = 1'b0;
+    end
+    
+    
+    //
+    // Reset
+    //
+    reg rst = 1'b1;
+    
+
+    //
+    // Test Vectors
+    //
+    localparam                  PQ_NUM_WORDS       = 32;
+    localparam [OP_ADDR_W -1:0] PQ_WORD_INDEX_LAST = PQ_NUM_WORDS - 1;
+    
+    localparam P_LADDER_MODE = 1'b0;
+    
+    reg [WORD_EXT_W -1:0] P_T1     [0:PQ_NUM_WORDS -1];
+    reg [WORD_EXT_W -1:0] P_T2     [0:PQ_NUM_WORDS -1];
+
+    reg [WORD_EXT_W -1:0] P_N      [0:PQ_NUM_WORDS -1];
+    reg [WORD_EXT_W -1:0] P_N_COEFF[0:PQ_NUM_WORDS   ];
+
+    reg [WORD_EXT_W -1:0] P_X_AB   [0:2*PQ_NUM_WORDS -1];
+    reg [WORD_EXT_W -1:0] P_Y_AB   [0:2*PQ_NUM_WORDS -1];
+
+    reg [WORD_EXT_W -1:0] P_X_Q    [0:PQ_NUM_WORDS];
+    reg [WORD_EXT_W -1:0] P_Y_Q    [0:PQ_NUM_WORDS];
+
+    reg [WORD_EXT_W -1:0] P_X_M    [0:2*PQ_NUM_WORDS];
+    reg [WORD_EXT_W -1:0] P_Y_M    [0:2*PQ_NUM_WORDS];
+
+    reg [WORD_EXT_W -1:0] P_X      [0:PQ_NUM_WORDS -1];
+    reg [WORD_EXT_W -1:0] P_Y      [0:PQ_NUM_WORDS -1];
+    
+
+    //
+    // Test Vector Components
+    //
+    initial begin
+        //
+        P_T1[ 0] = 18'h191c5; P_T1[ 1] = 18'h1a118; P_T1[ 2] = 18'h16e06; P_T1[ 3] = 18'h0ea68;
+        P_T1[ 4] = 18'h12944; P_T1[ 5] = 18'h0c242; P_T1[ 6] = 18'h2fc64; P_T1[ 7] = 18'h14efc;
+        P_T1[ 8] = 18'h113da; P_T1[ 9] = 18'h16ff7; P_T1[10] = 18'h1ef0c; P_T1[11] = 18'h18580;
+        P_T1[12] = 18'h1a62c; P_T1[13] = 18'h352b7; P_T1[14] = 18'h114f4; P_T1[15] = 18'h1c53e;
+        P_T1[16] = 18'h0c63e; P_T1[17] = 18'h0dd14; P_T1[18] = 18'h2fba8; P_T1[19] = 18'h1b8e4;
+        P_T1[20] = 18'h2d944; P_T1[21] = 18'h10290; P_T1[22] = 18'h1d276; P_T1[23] = 18'h327b0;
+        P_T1[24] = 18'h1c0c4; P_T1[25] = 18'h100a8; P_T1[26] = 18'h2a9ab; P_T1[27] = 18'h0e694;
+        P_T1[28] = 18'h10798; P_T1[29] = 18'h1ae91; P_T1[30] = 18'h38d4c; P_T1[31] = 18'h00808;
+        //
+        P_T2[ 0] = 18'h1193b; P_T2[ 1] = 18'h0de9c; P_T2[ 2] = 18'h0b993; P_T2[ 3] = 18'h0d2cd;
+        P_T2[ 4] = 18'h106ad; P_T2[ 5] = 18'h076da; P_T2[ 6] = 18'h10cab; P_T2[ 7] = 18'h15cd5;
+        P_T2[ 8] = 18'h15425; P_T2[ 9] = 18'h16287; P_T2[10] = 18'h0fd64; P_T2[11] = 18'h06ee0;
+        P_T2[12] = 18'h1b0c9; P_T2[13] = 18'h01a5e; P_T2[14] = 18'h1855c; P_T2[15] = 18'h17bf9;
+        P_T2[16] = 18'h1c83c; P_T2[17] = 18'h158ed; P_T2[18] = 18'h086df; P_T2[19] = 18'h16676;
+        P_T2[20] = 18'h0a0f8; P_T2[21] = 18'h14545; P_T2[22] = 18'h09641; P_T2[23] = 18'h16863;
+        P_T2[24] = 18'h17e20; P_T2[25] = 18'h0d457; P_T2[26] = 18'h05a9b; P_T2[27] = 18'h1a4cf;
+        P_T2[28] = 18'h1582a; P_T2[29] = 18'h1686c; P_T2[30] = 18'h1394e; P_T2[31] = 18'h0bdbc;
+        //
+        P_N[ 0] = 18'h00f97; P_N[ 1] = 18'h018bb; P_N[ 2] = 18'h08a44; P_N[ 3] = 18'h00858;
+        P_N[ 4] = 18'h06647; P_N[ 5] = 18'h0042c; P_N[ 6] = 18'h0fa09; P_N[ 7] = 18'h0c8d3;
+        P_N[ 8] = 18'h0bbc7; P_N[ 9] = 18'h0e2dd; P_N[10] = 18'h017fd; P_N[11] = 18'h0ef4a;
+        P_N[12] = 18'h002ef; P_N[13] = 18'h090c1; P_N[14] = 18'h032db; P_N[15] = 18'h028b1;
+        P_N[16] = 18'h05f0a; P_N[17] = 18'h0ebfd; P_N[18] = 18'h017ca; P_N[19] = 18'h09587;
+        P_N[20] = 18'h0d266; P_N[21] = 18'h0563c; P_N[22] = 18'h041af; P_N[23] = 18'h0433f;
+        P_N[24] = 18'h08e83; P_N[25] = 18'h0bc19; P_N[26] = 18'h000b2; P_N[27] = 18'h05b53;
+        P_N[28] = 18'h00e5d; P_N[29] = 18'h09bc5; P_N[30] = 18'h0a822; P_N[31] = 18'h0efff;
+        //
+        P_N_COEFF[ 0] = 18'h09fd9; P_N_COEFF[ 1] = 18'h0b367; P_N_COEFF[ 2] = 18'h0e467; P_N_COEFF[ 3] = 18'h0de24;
+        P_N_COEFF[ 4] = 18'h02022; P_N_COEFF[ 5] = 18'h0f0e8; P_N_COEFF[ 6] = 18'h02919; P_N_COEFF[ 7] = 18'h09901;
+        P_N_COEFF[ 8] = 18'h0da43; P_N_COEFF[ 9] = 18'h0023b; P_N_COEFF[10] = 18'h0ebf8; P_N_COEFF[11] = 18'h0f04e;
+        P_N_COEFF[12] = 18'h0942f; P_N_COEFF[13] = 18'h029e9; P_N_COEFF[14] = 18'h07cb0; P_N_COEFF[15] = 18'h08c25;
+        P_N_COEFF[16] = 18'h04e60; P_N_COEFF[17] = 18'h05cdc; P_N_COEFF[18] = 18'h0dff7; P_N_COEFF[19] = 18'h0279b;
+        P_N_COEFF[20] = 18'h0610d; P_N_COEFF[21] = 18'h0f04a; P_N_COEFF[22] = 18'h001dc; P_N_COEFF[23] = 18'h03429;
+        P_N_COEFF[24] = 18'h0f78c; P_N_COEFF[25] = 18'h0c3e2; P_N_COEFF[26] = 18'h00ed8; P_N_COEFF[27] = 18'h039c0;
+        P_N_COEFF[28] = 18'h02ac2; P_N_COEFF[29] = 18'h0f703; P_N_COEFF[30] = 18'h0c54e; P_N_COEFF[31] = 18'h022d9;
+        P_N_COEFF[32] = 18'h0f994;
+        //
+        P_X_AB[ 0] = 18'h0c199; P_X_AB[ 1] = 18'h0957a; P_X_AB[ 2] = 18'h070ad; P_X_AB[ 3] = 18'h0e5a6;
+        P_X_AB[ 4] = 18'h0fec9; P_X_AB[ 5] = 18'h00b73; P_X_AB[ 6] = 18'h09c72; P_X_AB[ 7] = 18'h0cdf0;
+        P_X_AB[ 8] = 18'h08755; P_X_AB[ 9] = 18'h07560; P_X_AB[10] = 18'h084b1; P_X_AB[11] = 18'h0ad3f;
+        P_X_AB[12] = 18'h074fe; P_X_AB[13] = 18'h04d74; P_X_AB[14] = 18'h00e16; P_X_AB[15] = 18'h0d3b3;
+        P_X_AB[16] = 18'h0d418; P_X_AB[17] = 18'h02f12; P_X_AB[18] = 18'h0c301; P_X_AB[19] = 18'h0be2b;
+        P_X_AB[20] = 18'h08222; P_X_AB[21] = 18'h0056c; P_X_AB[22] = 18'h01c7c; P_X_AB[23] = 18'h0bc95;
+        P_X_AB[24] = 18'h03427; P_X_AB[25] = 18'h0c65a; P_X_AB[26] = 18'h089ac; P_X_AB[27] = 18'h02117;
+        P_X_AB[28] = 18'h0ff7d; P_X_AB[29] = 18'h01cde; P_X_AB[30] = 18'h02709; P_X_AB[31] = 18'h01c56;
+        P_X_AB[32] = 18'h0f35a; P_X_AB[33] = 18'h08ce6; P_X_AB[34] = 18'h0a8e5; P_X_AB[35] = 18'h0d6d4;
+        P_X_AB[36] = 18'h06868; P_X_AB[37] = 18'h09105; P_X_AB[38] = 18'h0219e; P_X_AB[39] = 18'h0bc40;
+        P_X_AB[40] = 18'h00e0a; P_X_AB[41] = 18'h07783; P_X_AB[42] = 18'h0187a; P_X_AB[43] = 18'h0b922;
+        P_X_AB[44] = 18'h02609; P_X_AB[45] = 18'h0c64b; P_X_AB[46] = 18'h06b4b; P_X_AB[47] = 18'h04b79;
+        P_X_AB[48] = 18'h0fed6; P_X_AB[49] = 18'h03eac; P_X_AB[50] = 18'h04cac; P_X_AB[51] = 18'h0d47d;
+        P_X_AB[52] = 18'h045fd; P_X_AB[53] = 18'h04fa8; P_X_AB[54] = 18'h0597c; P_X_AB[55] = 18'h0a10d;
+        P_X_AB[56] = 18'h0bf44; P_X_AB[57] = 18'h08671; P_X_AB[58] = 18'h0112a; P_X_AB[59] = 18'h08ccf;
+        P_X_AB[60] = 18'h0cae5; P_X_AB[61] = 18'h04d94; P_X_AB[62] = 18'h0b95a; P_X_AB[63] = 18'h00040;
+        //
+        P_X_Q[ 0] = 18'h021b1; P_X_Q[ 1] = 18'h0d2db; P_X_Q[ 2] = 18'h0754b; P_X_Q[ 3] = 18'h01fc1;
+        P_X_Q[ 4] = 18'h063f7; P_X_Q[ 5] = 18'h086e5; P_X_Q[ 6] = 18'h0bcea; P_X_Q[ 7] = 18'h02260;
+        P_X_Q[ 8] = 18'h0c54c; P_X_Q[ 9] = 18'h0e298; P_X_Q[10] = 18'h05d07; P_X_Q[11] = 18'h0f978;
+        P_X_Q[12] = 18'h0e742; P_X_Q[13] = 18'h0a3f0; P_X_Q[14] = 18'h0b31e; P_X_Q[15] = 18'h041b7;
+        P_X_Q[16] = 18'h06ed9; P_X_Q[17] = 18'h03ac5; P_X_Q[18] = 18'h0f8eb; P_X_Q[19] = 18'h0c619;
+        P_X_Q[20] = 18'h067e9; P_X_Q[21] = 18'h00350; P_X_Q[22] = 18'h00376; P_X_Q[23] = 18'h02ebf;
+        P_X_Q[24] = 18'h0b125; P_X_Q[25] = 18'h05f7d; P_X_Q[26] = 18'h0f121; P_X_Q[27] = 18'h07ba4;
+        P_X_Q[28] = 18'h03050; P_X_Q[29] = 18'h0642e; P_X_Q[30] = 18'h0c2fc; P_X_Q[31] = 18'h0dfcf;
+        P_X_Q[32] = 18'h03f9e;
+        //
+        P_X_M[ 0] = 18'h03e67; P_X_M[ 1] = 18'h06a85; P_X_M[ 2] = 18'h08f52; P_X_M[ 3] = 18'h01a59;
+        P_X_M[ 4] = 18'h00136; P_X_M[ 5] = 18'h0f48c; P_X_M[ 6] = 18'h0638d; P_X_M[ 7] = 18'h0320f;
+        P_X_M[ 8] = 18'h078aa; P_X_M[ 9] = 18'h08a9f; P_X_M[10] = 18'h07b4e; P_X_M[11] = 18'h052c0;
+        P_X_M[12] = 18'h08b01; P_X_M[13] = 18'h0b28b; P_X_M[14] = 18'h0f1e9; P_X_M[15] = 18'h02c4c;
+        P_X_M[16] = 18'h02be7; P_X_M[17] = 18'h0d0ed; P_X_M[18] = 18'h03cfe; P_X_M[19] = 18'h041d4;
+        P_X_M[20] = 18'h07ddd; P_X_M[21] = 18'h0fa93; P_X_M[22] = 18'h0e383; P_X_M[23] = 18'h0436a;
+        P_X_M[24] = 18'h0cbd8; P_X_M[25] = 18'h039a5; P_X_M[26] = 18'h07653; P_X_M[27] = 18'h0dee8;
+        P_X_M[28] = 18'h00082; P_X_M[29] = 18'h0e321; P_X_M[30] = 18'h0d8f6; P_X_M[31] = 18'h0e3a9;
+        P_X_M[32] = 18'h00ca5; P_X_M[33] = 18'h035ed; P_X_M[34] = 18'h02b8f; P_X_M[35] = 18'h063bd;
+        P_X_M[36] = 18'h0ec9f; P_X_M[37] = 18'h0b8bb; P_X_M[38] = 18'h00389; P_X_M[39] = 18'h0ca27;
+        P_X_M[40] = 18'h0bea7; P_X_M[41] = 18'h0df1e; P_X_M[42] = 18'h0d685; P_X_M[43] = 18'h0cc1b;
+        P_X_M[44] = 18'h036c4; P_X_M[45] = 18'h01ce9; P_X_M[46] = 18'h0c43b; P_X_M[47] = 18'h05f58;
+        P_X_M[48] = 18'h02c77; P_X_M[49] = 18'h03a12; P_X_M[50] = 18'h0eea8; P_X_M[51] = 18'h0ac31;
+        P_X_M[52] = 18'h05838; P_X_M[53] = 18'h093ac; P_X_M[54] = 18'h0fd54; P_X_M[55] = 18'h06e13;
+        P_X_M[56] = 18'h002e2; P_X_M[57] = 18'h06af4; P_X_M[58] = 18'h0ea18; P_X_M[59] = 18'h083b3;
+        P_X_M[60] = 18'h059f7; P_X_M[61] = 18'h016d3; P_X_M[62] = 18'h0c3ad; P_X_M[63] = 18'h0dbfc;
+        P_X_M[64] = 18'h03ba4;
+        //
+        P_Y_AB[ 0] = 18'h0d567; P_Y_AB[ 1] = 18'h0dbf1; P_Y_AB[ 2] = 18'h024b3; P_Y_AB[ 3] = 18'h0bb34;
+        P_Y_AB[ 4] = 18'h03ad4; P_Y_AB[ 5] = 18'h08997; P_Y_AB[ 6] = 18'h0d369; P_Y_AB[ 7] = 18'h0ebbc;
+        P_Y_AB[ 8] = 18'h09502; P_Y_AB[ 9] = 18'h01b76; P_Y_AB[10] = 18'h0a28f; P_Y_AB[11] = 18'h0c577;
+        P_Y_AB[12] = 18'h05f2f; P_Y_AB[13] = 18'h08c45; P_Y_AB[14] = 18'h0dbb8; P_Y_AB[15] = 18'h036bf;
+        P_Y_AB[16] = 18'h05086; P_Y_AB[17] = 18'h0437e; P_Y_AB[18] = 18'h08e3d; P_Y_AB[19] = 18'h0ec97;
+        P_Y_AB[20] = 18'h0195c; P_Y_AB[21] = 18'h02e75; P_Y_AB[22] = 18'h0d94f; P_Y_AB[23] = 18'h0ce1e;
+        P_Y_AB[24] = 18'h0fd8d; P_Y_AB[25] = 18'h0ec03; P_Y_AB[26] = 18'h058a0; P_Y_AB[27] = 18'h05fc4;
+        P_Y_AB[28] = 18'h0f83f; P_Y_AB[29] = 18'h09a60; P_Y_AB[30] = 18'h0f047; P_Y_AB[31] = 18'h05ee6;
+        P_Y_AB[32] = 18'h02a39; P_Y_AB[33] = 18'h08b08; P_Y_AB[34] = 18'h0f66d; P_Y_AB[35] = 18'h0b2fb;
+        P_Y_AB[36] = 18'h02f3f; P_Y_AB[37] = 18'h092b2; P_Y_AB[38] = 18'h09b4e; P_Y_AB[39] = 18'h0ce4f;
+        P_Y_AB[40] = 18'h04428; P_Y_AB[41] = 18'h00483; P_Y_AB[42] = 18'h0f595; P_Y_AB[43] = 18'h031cb;
+        P_Y_AB[44] = 18'h0d292; P_Y_AB[45] = 18'h0ded9; P_Y_AB[46] = 18'h0ef15; P_Y_AB[47] = 18'h0da51;
+        P_Y_AB[48] = 18'h0ed93; P_Y_AB[49] = 18'h03969; P_Y_AB[50] = 18'h05efc; P_Y_AB[51] = 18'h004e7;
+        P_Y_AB[52] = 18'h09434; P_Y_AB[53] = 18'h02b91; P_Y_AB[54] = 18'h0d3db; P_Y_AB[55] = 18'h0c4cf;
+        P_Y_AB[56] = 18'h09d34; P_Y_AB[57] = 18'h0cea8; P_Y_AB[58] = 18'h0de0d; P_Y_AB[59] = 18'h0f190;
+        P_Y_AB[60] = 18'h0b95a; P_Y_AB[61] = 18'h0bd8a; P_Y_AB[62] = 18'h079a6; P_Y_AB[63] = 18'h005f6;
+        //
+        P_Y_Q[ 0] = 18'h0dd4f; P_Y_Q[ 1] = 18'h084f9; P_Y_Q[ 2] = 18'h00105; P_Y_Q[ 3] = 18'h0cdff;
+        P_Y_Q[ 4] = 18'h0973c; P_Y_Q[ 5] = 18'h0440c; P_Y_Q[ 6] = 18'h0450b; P_Y_Q[ 7] = 18'h09e70;
+        P_Y_Q[ 8] = 18'h0d686; P_Y_Q[ 9] = 18'h0e21a; P_Y_Q[10] = 18'h02d26; P_Y_Q[11] = 18'h0b117;
+        P_Y_Q[12] = 18'h08556; P_Y_Q[13] = 18'h002ee; P_Y_Q[14] = 18'h0083d; P_Y_Q[15] = 18'h079fa;
+        P_Y_Q[16] = 18'h0f25d; P_Y_Q[17] = 18'h0cd26; P_Y_Q[18] = 18'h0bb7e; P_Y_Q[19] = 18'h07676;
+        P_Y_Q[20] = 18'h0f4bb; P_Y_Q[21] = 18'h02b87; P_Y_Q[22] = 18'h02909; P_Y_Q[23] = 18'h05e2d;
+        P_Y_Q[24] = 18'h09c80; P_Y_Q[25] = 18'h098f3; P_Y_Q[26] = 18'h0f08b; P_Y_Q[27] = 18'h0255b;
+        P_Y_Q[28] = 18'h0fbe5; P_Y_Q[29] = 18'h0ae8e; P_Y_Q[30] = 18'h0ba22; P_Y_Q[31] = 18'h0f2ea;
+        P_Y_Q[32] = 18'h0530e;
+        //
+        P_Y_M[ 0] = 18'h02a99; P_Y_M[ 1] = 18'h0240e; P_Y_M[ 2] = 18'h0db4c; P_Y_M[ 3] = 18'h044cb;
+        P_Y_M[ 4] = 18'h0c52b; P_Y_M[ 5] = 18'h07668; P_Y_M[ 6] = 18'h02c96; P_Y_M[ 7] = 18'h01443;
+        P_Y_M[ 8] = 18'h06afd; P_Y_M[ 9] = 18'h0e489; P_Y_M[10] = 18'h05d70; P_Y_M[11] = 18'h03a88;
+        P_Y_M[12] = 18'h0a0d0; P_Y_M[13] = 18'h073ba; P_Y_M[14] = 18'h02447; P_Y_M[15] = 18'h0c940;
+        P_Y_M[16] = 18'h0af79; P_Y_M[17] = 18'h0bc81; P_Y_M[18] = 18'h071c2; P_Y_M[19] = 18'h01368;
+        P_Y_M[20] = 18'h0e6a3; P_Y_M[21] = 18'h0d18a; P_Y_M[22] = 18'h026b0; P_Y_M[23] = 18'h031e1;
+        P_Y_M[24] = 18'h00272; P_Y_M[25] = 18'h013fc; P_Y_M[26] = 18'h0a75f; P_Y_M[27] = 18'h0a03b;
+        P_Y_M[28] = 18'h007c0; P_Y_M[29] = 18'h0659f; P_Y_M[30] = 18'h00fb8; P_Y_M[31] = 18'h0a119;
+        P_Y_M[32] = 18'h0d5c6; P_Y_M[33] = 18'h09926; P_Y_M[34] = 18'h0d69f; P_Y_M[35] = 18'h085cd;
+        P_Y_M[36] = 18'h0591a; P_Y_M[37] = 18'h0e6dd; P_Y_M[38] = 18'h0981f; P_Y_M[39] = 18'h087b4;
+        P_Y_M[40] = 18'h015b3; P_Y_M[41] = 18'h09421; P_Y_M[42] = 18'h0ea9d; P_Y_M[43] = 18'h013af;
+        P_Y_M[44] = 18'h096ac; P_Y_M[45] = 18'h06f86; P_Y_M[46] = 18'h0cab7; P_Y_M[47] = 18'h06ab1;
+        P_Y_M[48] = 18'h0903e; P_Y_M[49] = 18'h06203; P_Y_M[50] = 18'h0751a; P_Y_M[51] = 18'h02fce;
+        P_Y_M[52] = 18'h0d0c9; P_Y_M[53] = 18'h00522; P_Y_M[54] = 18'h096f8; P_Y_M[55] = 18'h03aee;
+        P_Y_M[56] = 18'h0a034; P_Y_M[57] = 18'h0e52e; P_Y_M[58] = 18'h07b7b; P_Y_M[59] = 18'h06bad;
+        P_Y_M[60] = 18'h0016d; P_Y_M[61] = 18'h01315; P_Y_M[62] = 18'h02586; P_Y_M[63] = 18'h0e73a;
+        P_Y_M[64] = 18'h04ddd;
+        //
+        P_X[ 0] = 18'h0c2d4; P_X[ 1] = 18'h0d474; P_X[ 2] = 18'h13a91; P_X[ 3] = 18'h15507;
+        P_X[ 4] = 18'h149c0; P_X[ 5] = 18'h02527; P_X[ 6] = 18'h18667; P_X[ 7] = 18'h0ccb1;
+        P_X[ 8] = 18'h156a1; P_X[ 9] = 18'h0eeff; P_X[10] = 18'h1853d; P_X[11] = 18'h05ccd;
+        P_X[12] = 18'h0e334; P_X[13] = 18'h12f86; P_X[14] = 18'h0aad1; P_X[15] = 18'h12b4d;
+        P_X[16] = 18'h078be; P_X[17] = 18'h13b54; P_X[18] = 18'h180ae; P_X[19] = 18'h09e35;
+        P_X[20] = 18'h0e354; P_X[21] = 18'h156d0; P_X[22] = 18'h10f20; P_X[23] = 18'h0c226;
+        P_X[24] = 18'h0f165; P_X[25] = 18'h0fb42; P_X[26] = 18'h11082; P_X[27] = 18'h124dc;
+        P_X[28] = 18'h06467; P_X[29] = 18'h17d07; P_X[30] = 18'h0dc3c; P_X[31] = 18'h03ba4;
+        //
+        P_Y[ 0] = 18'h1242f; P_Y[ 1] = 18'h1cd0c; P_Y[ 2] = 18'h138c8; P_Y[ 3] = 18'h08859;
+        P_Y[ 4] = 18'h1798f; P_Y[ 5] = 18'h1336d; P_Y[ 6] = 18'h15603; P_Y[ 7] = 18'h059db;
+        P_Y[ 8] = 18'h098a4; P_Y[ 9] = 18'h1e032; P_Y[10] = 18'h0457a; P_Y[11] = 18'h1693e;
+        P_Y[12] = 18'h14e5f; P_Y[13] = 18'h1b9cc; P_Y[14] = 18'h14502; P_Y[15] = 18'h17dd1;
+        P_Y[16] = 18'h09b6c; P_Y[17] = 18'h0d416; P_Y[18] = 18'h034b5; P_Y[19] = 18'h164fd;
+        P_Y[20] = 18'h030b3; P_Y[21] = 18'h16ad3; P_Y[22] = 18'h0ffbd; P_Y[23] = 18'h13d68;
+        P_Y[24] = 18'h1b3d6; P_Y[25] = 18'h15988; P_Y[26] = 18'h15d3d; P_Y[27] = 18'h0bac7;
+        P_Y[28] = 18'h0d09f; P_Y[29] = 18'h09f2c; P_Y[30] = 18'h0ed30; P_Y[31] = 18'h04ddd;
+        //
+    end
+    
+    
+    //
+    // Enable, Ready
+    //
+    reg  ena = 1'b0;
+    wire rdy;
+    
+    
+    //
+    // Settings
+    //
+    reg                  p_ladder_mode;
+    reg [OP_ADDR_W -1:0] word_index_last;
+    reg [OP_ADDR_W -1:0] word_index_last_minus1;
+
+
+    //
+    // Script
+    //
+    integer i;
+    initial begin
+
+        wait_clock_ticks(10);
+        rst = 1'b0;
+        wait_clock_ticks(10);
+        
+        word_index_last = PQ_WORD_INDEX_LAST;
+        word_index_last_minus1 = word_index_last - 1'b1;
+        
+        p_prefill;
+        
+        p_ladder_mode = P_LADDER_MODE;
+        
+        wait_clock_ticks(10);
+        
+        ena = 1'b1;
+        wait_clock_ticks(1);
+        ena = 1'b0;
+    
+        while (!rdy)
+            wait_clock_ticks(1);
+  
+        wait_clock_ticks(1000);
+        p_verify_ab;
+        p_verify_q;
+        p_verify_m;
+        p_verify_p;
+    end
+
+
+    //
+    // Storage Interfaces
+    //
+    wire                    wr_wide_xy_ena;
+    wire [BANK_ADDR_W -1:0] wr_wide_xy_bank;
+    wire [  OP_ADDR_W -1:0] wr_wide_xy_addr;
+    wire [ WORD_EXT_W -1:0] wr_wide_x_din;
+    wire [ WORD_EXT_W -1:0] wr_wide_y_din;
+
+    wire                    wr_narrow_xy_ena;
+    wire [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+    wire [  OP_ADDR_W -1:0] wr_narrow_xy_addr;
+    wire [ WORD_EXT_W -1:0] wr_narrow_x_din;
+    wire [ WORD_EXT_W -1:0] wr_narrow_y_din;
+
+    wire                                     rd_wide_xy_ena;
+    wire                                     rd_wide_xy_ena_aux;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank;
+    wire [                 BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+    wire [NUM_MULTS_HALF * OP_ADDR_W   -1:0] rd_wide_xy_addr;
+    wire [                 OP_ADDR_W   -1:0] rd_wide_xy_addr_aux;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_x_dout;
+    wire [NUM_MULTS_HALF * WORD_EXT_W  -1:0] rd_wide_y_dout;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_x_dout_aux;
+    wire [                 WORD_EXT_W  -1:0] rd_wide_y_dout_aux;
+    
+    wire                                     rd_narrow_xy_ena;
+    wire [                 BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+    wire [                 OP_ADDR_W   -1:0] rd_narrow_xy_addr;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_x_dout;
+    wire [                 WORD_EXT_W  -1:0] rd_narrow_y_dout;
+    
+    reg                    ext_wide_xy_ena = 1'b0;
+    reg [BANK_ADDR_W -1:0] ext_wide_xy_bank;
+    reg [  OP_ADDR_W -1:0] ext_wide_xy_addr;
+    reg [ WORD_EXT_W -1:0] ext_wide_x_din;
+    reg [ WORD_EXT_W -1:0] ext_wide_y_din;
+
+    reg                    ext_narrow_xy_ena = 1'b0;
+    reg [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
+    reg [  OP_ADDR_W -1:0] ext_narrow_xy_addr;
+    reg [ WORD_EXT_W -1:0] ext_narrow_x_din;
+    reg [ WORD_EXT_W -1:0] ext_narrow_y_din;
+
+    //
+    // Recombinator Interface
+    //
+    wire [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
+    wire [  OP_ADDR_W -1:0] rcmb_wide_xy_addr;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
+    wire [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
+    wire                    rcmb_wide_xy_valid;
+
+    wire [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
+    wire [  OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
+    wire [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
+    wire                    rcmb_narrow_xy_valid;
+
+    wire [BANK_ADDR_W -1:0] rcmb_final_xy_bank;
+    wire [  OP_ADDR_W -1:0] rcmb_final_xy_addr;
+    wire [ WORD_EXT_W -1:0] rcmb_final_x_dout;
+    wire [ WORD_EXT_W -1:0] rcmb_final_y_dout;
+    wire                    rcmb_final_xy_valid;
+
+    //
+    // Reductor Interface
+    //
+    wire [BANK_ADDR_W -1:0] rdct_wide_xy_bank;
+    wire [  OP_ADDR_W -1:0] rdct_wide_xy_addr;
+    wire [ WORD_EXT_W -1:0] rdct_wide_x_dout;
+    wire [ WORD_EXT_W -1:0] rdct_wide_y_dout;
+    wire                    rdct_wide_xy_valid;
+
+    wire [BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
+    wire [  OP_ADDR_W -1:0] rdct_narrow_xy_addr;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
+    wire [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
+    wire                    rdct_narrow_xy_valid;
+
+    //
+    // Reductor Control/Status
+    //
+    wire rdct_ena;
+    wire rdct_rdy;
+    
+    //
+    // UUT
+    //
+    modexpng_mmm_dual uut
+    (
+        .clk                        (clk),
+        .rst                        (rst),
+        
+        .ena                        (ena),
+        .rdy                        (rdy),
+        
+        .ladder_mode                (p_ladder_mode),
+        .word_index_last            (word_index_last),
+        .word_index_last_minus1     (word_index_last_minus1),
+        .force_unity_b              (1'b0),
+        .only_reduce                (1'b0),
+        
+        .sel_wide_in                (BANK_WIDE_A),
+        .sel_narrow_in              (BANK_NARROW_A),
+        
+        .rd_wide_xy_ena             (rd_wide_xy_ena),
+        .rd_wide_xy_ena_aux         (rd_wide_xy_ena_aux),
+        .rd_wide_xy_bank            (rd_wide_xy_bank),
+        .rd_wide_xy_bank_aux        (rd_wide_xy_bank_aux),
+        .rd_wide_xy_addr            (rd_wide_xy_addr),
+        .rd_wide_xy_addr_aux        (rd_wide_xy_addr_aux),
+        .rd_wide_x_dout             (rd_wide_x_dout),
+        .rd_wide_y_dout             (rd_wide_y_dout),
+        .rd_wide_x_dout_aux         (rd_wide_x_dout_aux),
+        .rd_wide_y_dout_aux         (rd_wide_y_dout_aux),
+
+        .rd_narrow_xy_ena           (rd_narrow_xy_ena),
+        .rd_narrow_xy_bank          (rd_narrow_xy_bank),
+        .rd_narrow_xy_addr          (rd_narrow_xy_addr),
+        .rd_narrow_x_dout           (rd_narrow_x_dout),
+        .rd_narrow_y_dout           (rd_narrow_y_dout),
+        
+        .rcmb_wide_xy_bank          (rcmb_wide_xy_bank),
+        .rcmb_wide_xy_addr          (rcmb_wide_xy_addr),
+        .rcmb_wide_x_dout           (rcmb_wide_x_dout),
+        .rcmb_wide_y_dout           (rcmb_wide_y_dout),
+        .rcmb_wide_xy_valid         (rcmb_wide_xy_valid),
+
+        .rcmb_narrow_xy_bank        (rcmb_narrow_xy_bank),
+        .rcmb_narrow_xy_addr        (rcmb_narrow_xy_addr),
+        .rcmb_narrow_x_dout         (rcmb_narrow_x_dout),
+        .rcmb_narrow_y_dout         (rcmb_narrow_y_dout),
+        .rcmb_narrow_xy_valid       (rcmb_narrow_xy_valid),
+        
+        .rcmb_xy_bank               (rcmb_final_xy_bank),
+        .rcmb_xy_addr               (rcmb_final_xy_addr),
+        .rcmb_x_dout                (rcmb_final_x_dout),
+        .rcmb_y_dout                (rcmb_final_y_dout),
+        .rcmb_xy_valid              (rcmb_final_xy_valid),
+        
+        .rdct_ena                   (rdct_ena),
+        .rdct_rdy                   (rdct_rdy)
+    );
+    
+    
+    //
+    // Reductor
+    //    
+    modexpng_reductor reductor
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .ena                    (rdct_ena),
+        .rdy                    (rdct_rdy),
+        
+        .word_index_last        (word_index_last),
+        
+        .sel_wide_out           (BANK_WIDE_B),
+        .sel_narrow_out         (BANK_NARROW_B),
+
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux),
+        
+        .rcmb_final_xy_bank     (rcmb_final_xy_bank),
+        .rcmb_final_xy_addr     (rcmb_final_xy_addr),
+        .rcmb_final_x_dout      (rcmb_final_x_dout),
+        .rcmb_final_y_dout      (rcmb_final_y_dout),
+        .rcmb_final_xy_valid    (rcmb_final_xy_valid),
+        
+        .rdct_wide_xy_bank      (rdct_wide_xy_bank),
+        .rdct_wide_xy_addr      (rdct_wide_xy_addr),
+        .rdct_wide_x_dout       (rdct_wide_x_dout),
+        .rdct_wide_y_dout       (rdct_wide_y_dout),
+        .rdct_wide_xy_valid     (rdct_wide_xy_valid),
+        
+        .rdct_narrow_xy_bank    (rdct_narrow_xy_bank),
+        .rdct_narrow_xy_addr    (rdct_narrow_xy_addr),
+        .rdct_narrow_x_dout     (rdct_narrow_x_dout),
+        .rdct_narrow_y_dout     (rdct_narrow_y_dout),
+        .rdct_narrow_xy_valid   (rdct_narrow_xy_valid)
+    );
+
+    //
+    // Storage Block
+    //
+    modexpng_storage_block storage_block
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+
+        .wr_wide_xy_ena         (wr_wide_xy_ena),
+        .wr_wide_xy_bank        (wr_wide_xy_bank),
+        .wr_wide_xy_addr        (wr_wide_xy_addr),
+        .wr_wide_x_din          (wr_wide_x_din),
+        .wr_wide_y_din          (wr_wide_y_din),
+
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr),
+        .wr_narrow_x_din        (wr_narrow_x_din),
+        .wr_narrow_y_din        (wr_narrow_y_din),
+
+        .rd_wide_xy_ena         (rd_wide_xy_ena),
+        .rd_wide_xy_ena_aux     (rd_wide_xy_ena_aux),
+        .rd_wide_xy_bank        (rd_wide_xy_bank),
+        .rd_wide_xy_bank_aux    (rd_wide_xy_bank_aux),
+        .rd_wide_xy_addr        (rd_wide_xy_addr),
+        .rd_wide_xy_addr_aux    (rd_wide_xy_addr_aux),
+        .rd_wide_x_dout         (rd_wide_x_dout),
+        .rd_wide_y_dout         (rd_wide_y_dout),
+        .rd_wide_x_dout_aux     (rd_wide_x_dout_aux),
+        .rd_wide_y_dout_aux     (rd_wide_y_dout_aux),
+
+        .rd_narrow_xy_ena       (rd_narrow_xy_ena),
+        .rd_narrow_xy_bank      (rd_narrow_xy_bank),
+        .rd_narrow_xy_addr      (rd_narrow_xy_addr),
+        .rd_narrow_x_dout       (rd_narrow_x_dout),
+        .rd_narrow_y_dout       (rd_narrow_y_dout)
+    );
+
+    modexpng_storage_manager storage_manager
+    (
+        .clk                    (clk),
+        .rst                    (rst),
+        
+        .wr_wide_xy_ena         (wr_wide_xy_ena),
+        .wr_wide_xy_bank        (wr_wide_xy_bank),
+        .wr_wide_xy_addr        (wr_wide_xy_addr),
+        .wr_wide_x_din          (wr_wide_x_din),
+        .wr_wide_y_din          (wr_wide_y_din),
+    
+        .wr_narrow_xy_ena       (wr_narrow_xy_ena),
+        .wr_narrow_xy_bank      (wr_narrow_xy_bank),
+        .wr_narrow_xy_addr      (wr_narrow_xy_addr),
+        .wr_narrow_x_din        (wr_narrow_x_din),
+        .wr_narrow_y_din        (wr_narrow_y_din),
+        
+        .ext_wide_xy_ena        (ext_wide_xy_ena),
+        .ext_wide_xy_bank       (ext_wide_xy_bank),
+        .ext_wide_xy_addr       (ext_wide_xy_addr),
+        .ext_wide_x_din         (ext_wide_x_din),
+        .ext_wide_y_din         (ext_wide_y_din),
+    
+        .ext_narrow_xy_ena      (ext_narrow_xy_ena),
+        .ext_narrow_xy_bank     (ext_narrow_xy_bank),
+        .ext_narrow_xy_addr     (ext_narrow_xy_addr),
+        .ext_narrow_x_din       (ext_narrow_x_din),
+        .ext_narrow_y_din       (ext_narrow_y_din),
+        
+        .rcmb_wide_xy_bank      (rcmb_wide_xy_bank),
+        .rcmb_wide_xy_addr      (rcmb_wide_xy_addr),
+        .rcmb_wide_x_din        (rcmb_wide_x_dout),
+        .rcmb_wide_y_din        (rcmb_wide_y_dout),
+        .rcmb_wide_xy_ena       (rcmb_wide_xy_valid),
+
+        .rcmb_narrow_xy_bank    (rcmb_narrow_xy_bank),
+        .rcmb_narrow_xy_addr    (rcmb_narrow_xy_addr),
+        .rcmb_narrow_x_din      (rcmb_narrow_x_dout),
+        .rcmb_narrow_y_din      (rcmb_narrow_y_dout),
+        .rcmb_narrow_xy_ena     (rcmb_narrow_xy_valid)
+    );
+
+
+    //
+    // p_prefill()
+    //
+    task p_prefill;
+        begin
+        
+            ext_wide_xy_ena   = 1'b1;
+            ext_narrow_xy_ena = 1'b1;
+
+            for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+                ext_wide_xy_bank = BANK_WIDE_A;
+                ext_wide_xy_addr = i[OP_ADDR_W-1:0];
+                ext_wide_x_din   = P_T1[i];
+                ext_wide_y_din   = P_T2[i];
+                
+                ext_narrow_xy_bank = BANK_NARROW_A;
+                ext_narrow_xy_addr = i[OP_ADDR_W-1:0];
+                ext_narrow_x_din   = P_T1[i];
+                ext_narrow_y_din   = P_T2[i];
+                
+                wait_clock_tick;
+            end
+
+            for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+                ext_wide_xy_bank = BANK_WIDE_N;
+                ext_wide_xy_addr = i[OP_ADDR_W-1:0];
+                ext_wide_x_din   = P_N[i];
+                ext_wide_y_din   = P_N[i];
+
+                ext_narrow_xy_bank = BANK_NARROW_COEFF;
+                ext_narrow_xy_addr = i[OP_ADDR_W-1:0];
+                ext_narrow_x_din   = P_N_COEFF[i];
+                ext_narrow_y_din   = P_N_COEFF[i];
+                
+                wait_clock_tick;
+            end
+
+            ext_wide_xy_ena  = 1'b0;
+            ext_wide_xy_bank = BANK_DNC;
+            ext_wide_xy_addr = OP_ADDR_DNC;
+            ext_wide_x_din   = WORD_EXT_DNC;
+            ext_wide_y_din   = WORD_EXT_DNC;
+            
+            for (i=32; i<33; i=i+1) begin
+                ext_narrow_xy_bank = BANK_NARROW_EXT;
+                ext_narrow_xy_addr = OP_ADDR_EXT_COEFF;
+                ext_narrow_x_din   = P_N_COEFF[i];
+                ext_narrow_y_din   = P_N_COEFF[i];
+                
+                wait_clock_tick;
+            end
+
+            ext_narrow_xy_ena  = 1'b0;
+            ext_narrow_xy_bank = BANK_DNC;
+            ext_narrow_xy_addr = OP_ADDR_DNC;
+            ext_narrow_x_din   = WORD_EXT_DNC;
+            ext_narrow_y_din   = WORD_EXT_DNC;
+                        
+        end
+    endtask
+    
+    
+    //
+    // wait_clock_tick()
+    //
+    task wait_clock_tick;
+        #`CLK_PERIOD_NS;
+    endtask
+
+    
+    //
+    // wait_clock_ticks()
+    //
+    task wait_clock_ticks;
+        input integer num_ticks;
+        integer _n;
+        for (_n=0; _n<num_ticks; _n=_n+1)
+            wait_clock_tick;
+    endtask
+
+    
+    //
+    // Debug Interceptor
+    //
+    reg [WORD_EXT_W-1:0] P_X_AB_READ[0:2*PQ_NUM_WORDS-1];
+    reg [WORD_EXT_W-1:0] P_Y_AB_READ[0:2*PQ_NUM_WORDS-1];
+    
+    reg [WORD_EXT_W-1:0] P_X_Q_READ[0:PQ_NUM_WORDS];
+    reg [WORD_EXT_W-1:0] P_Y_Q_READ[0:PQ_NUM_WORDS];
+
+    reg [WORD_EXT_W-1:0] P_X_M_READ[0:2*PQ_NUM_WORDS];
+    reg [WORD_EXT_W-1:0] P_Y_M_READ[0:2*PQ_NUM_WORDS];
+    
+    reg [WORD_EXT_W-1:0] P_X_WIDE_READ[0:PQ_NUM_WORDS-1];
+    reg [WORD_EXT_W-1:0] P_Y_WIDE_READ[0:PQ_NUM_WORDS-1];
+    
+    reg [WORD_EXT_W-1:0] P_X_NARROW_READ[0:PQ_NUM_WORDS-1];
+    reg [WORD_EXT_W-1:0] P_Y_NARROW_READ[0:PQ_NUM_WORDS-1];
+    
+    integer xy_offset;
+    always @(posedge clk) begin
+        //
+        if (rcmb_wide_xy_valid)
+            //
+            case (rcmb_wide_xy_bank)
+                //
+                BANK_WIDE_L: begin
+                    //
+                    xy_offset = rcmb_wide_xy_addr;
+                    //
+                    if (xy_offset >= PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to wide bank L!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout;
+                    P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout;
+                    //
+                end
+                //
+                BANK_WIDE_H: begin
+                    //
+                    xy_offset = PQ_NUM_WORDS + rcmb_wide_xy_addr;
+                    //
+                    if (xy_offset >= 2*PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to wide bank H!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_AB_READ[xy_offset] <= rcmb_wide_x_dout;
+                    P_Y_AB_READ[xy_offset] <= rcmb_wide_y_dout;
+                    //
+                end
+                //
+                default: begin
+                    $display("ERROR: Encountered illegal wide bank (%d) while writing!", rcmb_wide_xy_bank);
+                    $finish;
+                end
+                //
+            endcase
+        //
+        if (rcmb_narrow_xy_valid)
+            //
+            case (rcmb_narrow_xy_bank)
+                //
+                BANK_NARROW_Q: begin
+                    //
+                    xy_offset = rcmb_narrow_xy_addr;
+                    //
+                    if (xy_offset >= PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to narrow bank Q!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout;
+                    P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout;
+                    //
+                end
+                //
+                BANK_NARROW_EXT: begin
+                    //
+                    xy_offset = PQ_NUM_WORDS + rcmb_narrow_xy_addr - 1;
+                    //
+                    if (xy_offset != PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_Q_READ[xy_offset] <= rcmb_narrow_x_dout;
+                    P_Y_Q_READ[xy_offset] <= rcmb_narrow_y_dout;
+                    //
+                end
+                //
+                default: begin
+                    $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_narrow_xy_bank);
+                    $finish;
+                end
+                //
+            endcase
+        //
+        if (rcmb_final_xy_valid)
+            //
+            case (rcmb_final_xy_bank)
+                //
+                BANK_RCMB_ML: begin
+                    //
+                    xy_offset = rcmb_final_xy_addr;
+                    //
+                    if (xy_offset >= PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to narrow bank ML!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+                    P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+                    //
+                end
+                //
+                BANK_RCMB_MH: begin
+                    //
+                    xy_offset = PQ_NUM_WORDS + rcmb_final_xy_addr;
+                    //
+                    if (xy_offset >= 2*PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to narrow bank MH!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+                    P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+                    //
+                end
+                //
+                BANK_RCMB_EXT: begin
+                    //
+                    xy_offset = 2*PQ_NUM_WORDS + rcmb_final_xy_addr;
+                    //
+                    if (xy_offset != 2*PQ_NUM_WORDS) begin
+                        $display("ERROR: Encountered illegal offset (%d) writing to narrow bank EXT!", xy_offset);
+                        $finish;
+                    end
+                    //
+                    P_X_M_READ[xy_offset] <= rcmb_final_x_dout;
+                    P_Y_M_READ[xy_offset] <= rcmb_final_y_dout;
+                    //
+                end
+                //
+                default: begin
+                    $display("ERROR: Encountered illegal narrow bank (%d) while writing!", rcmb_final_xy_bank);
+                    $finish;
+                end
+                //
+            endcase
+        //
+        if (rdct_narrow_xy_valid) begin
+            //
+            xy_offset = rdct_narrow_xy_addr;
+            //
+            if (xy_offset >= PQ_NUM_WORDS) begin
+                $display("ERROR: Encountered illegal offset (%d) writing to narrow bank T1/T2!", xy_offset);
+                $finish;
+            end
+            //
+            P_X_NARROW_READ[xy_offset] <= rdct_narrow_x_dout;
+            P_Y_NARROW_READ[xy_offset] <= rdct_narrow_y_dout;
+            //
+        end
+        //
+        if (rdct_wide_xy_valid) begin
+            //
+            xy_offset = rdct_wide_xy_addr;
+            //
+            if (xy_offset >= PQ_NUM_WORDS) begin
+                $display("ERROR: Encountered illegal offset (%d) writing to wide bank T1/T2!", xy_offset);
+                $finish;
+            end
+            //
+            P_X_WIDE_READ[xy_offset] <= rdct_wide_x_dout;
+            P_Y_WIDE_READ[xy_offset] <= rdct_wide_y_dout;
+            //
+        end        
+        //        
+    end
+
+    task p_verify_ab;
+        //
+        reg verify_x_ab_ok;
+        reg verify_y_ab_ok;
+        //
+        begin
+            //
+            verify_x_ab_ok = 1;
+            verify_y_ab_ok = 1;
+            //
+            for (i=0; i<2*PQ_NUM_WORDS; i=i+1) begin
+                if (P_X_AB_READ[i] !== P_X_AB[i]) verify_x_ab_ok = 0;
+                if (P_Y_AB_READ[i] !== P_Y_AB[i]) verify_y_ab_ok = 0;
+            end
+            //    
+            if (!verify_x_ab_ok)
+                for (i=0; i<2*PQ_NUM_WORDS; i=i+1)
+                    if (P_X_AB_READ[i] === P_X_AB[i]) $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x",       i, P_X_AB[i], P_X_AB_READ[i]);
+                    else                              $display("P_X_AB / P_X_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_AB[i], P_X_AB_READ[i]);
+            //
+            if (!verify_y_ab_ok)
+                for (i=0; i<2*PQ_NUM_WORDS; i=i+1)
+                    if (P_Y_AB_READ[i] === P_Y_AB[i]) $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x",       i, P_Y_AB[i], P_Y_AB_READ[i]);
+                    else                              $display("P_Y_AB / P_Y_AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_AB[i], P_Y_AB_READ[i]);
+            //
+            if (verify_x_ab_ok) $display("P_X_AB is OK.");
+            else                $display("P_X_AB is WRONG!");
+            //
+            if (verify_y_ab_ok) $display("P_Y_AB is OK.");
+            else                $display("P_Y_AB is WRONG!");
+            //
+        end
+        //
+    endtask
+    
+    task p_verify_q;
+        //
+        reg verify_x_q_ok;
+        reg verify_y_q_ok;
+        //
+        begin
+            //
+            verify_x_q_ok = 1;
+            verify_y_q_ok = 1;
+            //
+            for (i=0; i<(PQ_NUM_WORDS+1); i=i+1) begin
+                if (P_X_Q_READ[i] !== P_X_Q[i]) verify_x_q_ok = 0;
+                if (P_Y_Q_READ[i] !== P_Y_Q[i]) verify_y_q_ok = 0;
+            end
+            //    
+            if (!verify_x_q_ok)
+                for (i=0; i<(PQ_NUM_WORDS+1); i=i+1)
+                    if (P_X_Q_READ[i] === P_X_Q[i]) $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x",       i, P_X_Q[i], P_X_Q_READ[i]);
+                    else                            $display("P_X_Q / P_X_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_Q[i], P_X_Q_READ[i]);
+            //
+            if (!verify_y_q_ok)
+                for (i=0; i<(PQ_NUM_WORDS+1); i=i+1)
+                    if (P_Y_Q_READ[i] === P_Y_Q[i]) $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x",       i, P_Y_Q[i], P_Y_Q_READ[i]);
+                    else                            $display("P_Y_Q / P_Y_Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_Q[i], P_Y_Q_READ[i]);
+            //
+            if (verify_x_q_ok) $display("P_X_Q is OK.");
+            else               $display("P_X_Q is WRONG!");
+            //
+            if (verify_y_q_ok) $display("P_Y_Q is OK.");
+            else               $display("P_Y_Q is WRONG!");
+            //
+        end
+        //
+    endtask
+
+    task p_verify_m;
+        //
+        reg verify_x_m_ok;
+        reg verify_y_m_ok;
+        //
+        begin
+            //
+            verify_x_m_ok = 1;
+            verify_y_m_ok = 1;
+            //
+            for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1) begin
+                if (P_X_M_READ[i] !== P_X_M[i]) verify_x_m_ok = 0;
+                if (P_Y_M_READ[i] !== P_Y_M[i]) verify_y_m_ok = 0;
+            end
+            //    
+            if (!verify_x_m_ok)
+                for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1)
+                    if (P_X_M_READ[i] === P_X_M[i]) $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x",       i, P_X_M[i], P_X_M_READ[i]);
+                    else                            $display("P_X_M / P_X_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_X_M[i], P_X_M_READ[i]);
+            //
+            if (!verify_y_m_ok)
+                for (i=0; i<(2*PQ_NUM_WORDS+1); i=i+1)
+                    if (P_Y_M_READ[i] === P_Y_M[i]) $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x",       i, P_Y_M[i], P_Y_M_READ[i]);
+                    else                            $display("P_Y_M / P_Y_M_READ [%02d] = 0x%05x / 0x%05x <???>", i, P_Y_M[i], P_Y_M_READ[i]);
+            //
+            if (verify_x_m_ok) $display("P_X_M is OK.");
+            else               $display("P_X_M is WRONG!");
+            //
+            if (verify_y_m_ok) $display("P_Y_M is OK.");
+            else               $display("P_Y_M is WRONG!");
+            //
+        end
+        //
+    endtask
+    
+  task p_verify_p;
+        //
+        reg verify_x_wide_ok;
+        reg verify_y_wide_ok;
+        reg verify_x_narrow_ok;
+        reg verify_y_narrow_ok;
+        //
+        begin
+            //
+            verify_x_wide_ok = 1;
+            verify_y_wide_ok = 1;
+            verify_x_narrow_ok = 1;
+            verify_y_narrow_ok = 1;
+            //
+            for (i=0; i<PQ_NUM_WORDS; i=i+1) begin
+                if (P_X_WIDE_READ[i] !== P_X[i]) verify_x_wide_ok = 0;
+                if (P_Y_WIDE_READ[i] !== P_Y[i]) verify_y_wide_ok = 0;
+                if (P_X_NARROW_READ[i] !== P_X[i]) verify_x_narrow_ok = 0;
+                if (P_Y_NARROW_READ[i] !== P_Y[i]) verify_y_narrow_ok = 0;
+            end
+            //
+            if (!verify_x_wide_ok || !verify_x_narrow_ok)
+                for (i=0; i<PQ_NUM_WORDS; i=i+1)
+                    if ((P_X_WIDE_READ[i] === P_X[i]) && (P_X_NARROW_READ[i] === P_X[i])) $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x",       i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]);
+                    else                                                                  $display("P_X / P_X_WIDE / P_X_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_X[i], P_X_WIDE_READ[i], P_X_NARROW_READ[i]);
+            //
+            if (!verify_y_wide_ok || !verify_y_narrow_ok)
+                for (i=0; i<PQ_NUM_WORDS; i=i+1)
+                    if ((P_Y_WIDE_READ[i] === P_Y[i]) && (P_Y_NARROW_READ[i] === P_Y[i])) $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x",       i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]);
+                    else                                                                  $display("P_Y / P_Y_WIDE / P_Y_NARROW [%02d] = 0x%05x / 0x%05x / 0x%05x <???>", i, P_Y[i], P_Y_WIDE_READ[i], P_Y_NARROW_READ[i]);
+            //
+            if (verify_x_wide_ok && verify_x_narrow_ok) $display("P_X is OK.");
+            else                                        $display("P_X is WRONG!");
+            //
+            if (verify_y_wide_ok && verify_y_narrow_ok) $display("P_Y is OK.");
+            else                                        $display("P_Y is WRONG!");
+            //
+        end
+        //
+    endtask
+    
+endmodule
+
diff --git a/bench/tb_mmm_x8_dual.v b/bench/tb_mmm_x8_dual.v
deleted file mode 100644
index aa25900..0000000
--- a/bench/tb_mmm_x8_dual.v
+++ /dev/null
@@ -1,327 +0,0 @@
-`timescale 1ns / 1ps
-
-module tb_mmm_x8_dual;
-
-
-    //
-    // Headers
-    //
-    `include "../rtl/modexpng_parameters.vh"
-    `include "../rtl/modexpng_parameters_x8.vh"
-    
-
-    //
-    // Settings
-    //
-    localparam INDEX_WIDTH = 6;
-    
-	wire [INDEX_WIDTH-1:0] index_last = 31;  // 512 bits
-    
-	
-    //
-    // Clock
-    //
-    `define CLK_FREQUENCY_MHZ   100.0
-    `define CLK_PERIOD_NS       (1000.0 / `CLK_FREQUENCY_MHZ)
-    `define CLK_PERIOD_HALF_NS  (0.5 * `CLK_PERIOD_NS)
-    
-	reg clk = 1'b0;
-
-    always begin
-        #`CLK_PERIOD_HALF_NS clk = 1'b1;
-        #`CLK_PERIOD_HALF_NS clk = 1'b0;
-    end    
-
-    
-    //
-    // Reset
-    //
-    reg  rst   = 1'b1;
-	wire rst_n = ~rst;
-    
-    
-    //
-    // Control
-    //
-    reg  ena = 1'b0;
-    wire rdy;
-
-    reg  mode;
-    reg  transfer;
-
-
-    //
-    // Interface
-    //
-
-    
-    //
-    // Interface - Data Buses
-    //
-	wire [NUM_MULTS*WORD_WIDTH-1:0] x_din;
-	wire [NUM_MULTS*WORD_WIDTH-1:0] y_din;
-	wire [NUM_MULTS*WORD_WIDTH-1:0] x_dout;
-	wire [NUM_MULTS*WORD_WIDTH-1:0] y_dout;
-
-    
-    //
-    // Interface - Address Buses
-    //
-	wire [INDEX_WIDTH-4:0] x_din_addr;
-	wire [INDEX_WIDTH-4:0] y_din_addr;
-	wire [INDEX_WIDTH-4:0] x_dout_addr;
-	wire [INDEX_WIDTH-4:0] y_dout_addr;
-
-    
-    //
-    // Interface - Enable Buses
-    //
-	wire [        1-1:0] x_din_ena;
-	wire [        1-1:0] y_din_ena;
-	wire [        1-1:0] x_din_reg_ena;
-	wire [        1-1:0] y_din_reg_ena;
-    wire [NUM_MULTS-1:0] x_dout_ena;
-    wire [NUM_MULTS-1:0] y_dout_ena;
-    
-    
-    //
-    // Interface - Bank Buses
-    //
-    wire [3-1:0] x_din_bank;
-    wire [3-1:0] y_din_bank;
-    wire [3-1:0] x_dout_bank;
-    wire [3-1:0] y_dout_bank;
-
-
-    //
-    // Operands
-    //
-    reg [WORD_WIDTH-1:0] T1[0:2**INDEX_WIDTH-1];
-    reg [WORD_WIDTH-1:0] T2[0:2**INDEX_WIDTH-1];
-    reg [WORD_WIDTH-1:0] N[0:2**INDEX_WIDTH-1];
-    reg [WORD_WIDTH-1:0] N_COEFF[0:2**INDEX_WIDTH];
-
-    
-    //
-    // Memories
-    //
-    genvar z;
-    generate for (z=0; z<NUM_MULTS; z=z+1)
-        //
-        begin : gen_z_mem
-            //
-            modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
-            (
-                .MEM_WIDTH(WORD_WIDTH),
-                .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
-            )
-            gen_z_mem_x
-            (
-                .clk        (clk),
-
-                .a_addr     ({x_dout_bank, x_dout_addr}),
-                .a_en       (x_dout_ena[z]),
-                .a_wr       (x_dout_ena[z]),
-                .a_in       (x_dout[z*WORD_WIDTH+:WORD_WIDTH]),
-                .a_out      (), // unused
-
-                .b_addr     ({x_din_bank, x_din_addr}),
-                .b_en       (x_din_ena),
-                .b_reg_en   (x_din_reg_ena),
-                .b_out      (x_din[z*WORD_WIDTH+:WORD_WIDTH])
-            );
-            //
-            modexpng_mem /*bram_1wo_1ro_readfirst_ce*/ #
-            (
-                .MEM_WIDTH(WORD_WIDTH),
-                .MEM_ADDR_BITS(INDEX_WIDTH) // - clog2(NUM_MULTS) + clog2(NUM_BANKS)
-            )
-            gen_z_mem_y
-            (
-                .clk        (clk),
-
-                .a_addr     ({y_dout_bank, y_dout_addr}),
-                .a_en       (y_dout_ena[z]),
-                .a_wr       (y_dout_ena[z]),
-                .a_in       (y_dout[z*WORD_WIDTH+:WORD_WIDTH]),
-                .a_out      (), // unused
-
-                .b_addr     ({y_din_bank, y_din_addr}),
-                .b_en       (y_din_ena),
-                .b_reg_en   (y_din_reg_ena),
-                .b_out      (y_din[z*WORD_WIDTH+:WORD_WIDTH])
-            );
-            //
-        end
-        //
-    endgenerate
-    
-
-    // T1 / T2
-    // N  / N_COEFF
-    // AB_LSB
-    // AB_MSB
-    // M
-    // Q_LSB
-    // Q_MSB
-    // ?
-
-
-    //
-    // Operands - Values
-    //
-    initial begin
-        //
-        T1[ 0] = 18'h0b27b; T1[ 1] = 18'h0fc7d; T1[ 2] = 18'h0a214; T1[ 3] = 18'h08d2b;
-        T1[ 4] = 18'h1c80c; T1[ 5] = 18'h145f1; T1[ 6] = 18'h00db6; T1[ 7] = 18'h1cf0f;
-        T1[ 8] = 18'h19386; T1[ 9] = 18'h02ad9; T1[10] = 18'h1a8b5; T1[11] = 18'h1479b;
-        T1[12] = 18'h08b5f; T1[13] = 18'h14806; T1[14] = 18'h0e6f7; T1[15] = 18'h0ce9d;
-        T1[16] = 18'h0cbc2; T1[17] = 18'h16ef1; T1[18] = 18'h0e14e; T1[19] = 18'h1796f;
-        T1[20] = 18'h14901; T1[21] = 18'h06666; T1[22] = 18'h0cb9f; T1[23] = 18'h09ab4;
-        T1[24] = 18'h12ffc; T1[25] = 18'h0a86d; T1[26] = 18'h19d35; T1[27] = 18'h0cda9;
-        T1[28] = 18'h16a19; T1[29] = 18'h09a36; T1[30] = 18'h0b176; T1[31] = 18'h0e0dc;
-        //
-        T2[ 0] = 18'h0b21a; T2[ 1] = 18'h13e71; T2[ 2] = 18'h03459; T2[ 3] = 18'h1063f;
-        T2[ 4] = 18'h18cef; T2[ 5] = 18'h1b8a5; T2[ 6] = 18'h082d1; T2[ 7] = 18'h1b1be;
-        T2[ 8] = 18'h18979; T2[ 9] = 18'h1409a; T2[10] = 18'h1713c; T2[11] = 18'h0cda3;
-        T2[12] = 18'h11c7d; T2[13] = 18'h0c943; T2[14] = 18'h12d7c; T2[15] = 18'h1531e;
-        T2[16] = 18'h0a45a; T2[17] = 18'h1c637; T2[18] = 18'h0906a; T2[19] = 18'h1670e;
-        T2[20] = 18'h12f78; T2[21] = 18'h08ce6; T2[22] = 18'h1c5c7; T2[23] = 18'h1292d;
-        T2[24] = 18'h0fc4b; T2[25] = 18'h064fb; T2[26] = 18'h0cc3c; T2[27] = 18'h19b37;
-        T2[28] = 18'h1b721; T2[29] = 18'h0f424; T2[30] = 18'h0f608; T2[31] = 18'h03e9b;
-        //
-        N[ 0] = 18'h00a9d; N[ 1] = 18'h01175; N[ 2] = 18'h0254f; N[ 3] = 18'h0ee38;
-        N[ 4] = 18'h00a6a; N[ 5] = 18'h0c7bd; N[ 6] = 18'h0ddac; N[ 7] = 18'h069fe;
-        N[ 8] = 18'h0e9d6; N[ 9] = 18'h0b6bf; N[10] = 18'h09230; N[11] = 18'h04fc5;
-        N[12] = 18'h05c9f; N[13] = 18'h09502; N[14] = 18'h0cbc5; N[15] = 18'h03109;
-        N[16] = 18'h08029; N[17] = 18'h0b27c; N[18] = 18'h0eeb8; N[19] = 18'h0c191;
-        N[20] = 18'h0ff86; N[21] = 18'h027ab; N[22] = 18'h07d76; N[23] = 18'h0ff1a;
-        N[24] = 18'h02afc; N[25] = 18'h0b25a; N[26] = 18'h0d3c1; N[27] = 18'h05589;
-        N[28] = 18'h09f7c; N[29] = 18'h0ddd6; N[30] = 18'h0b4fc; N[31] = 18'h0e8e7;
-        //
-        N_COEFF[ 0] = 18'h0344b; N_COEFF[ 1] = 18'h0ca66; N_COEFF[ 2] = 18'h0d9e8; N_COEFF[ 3] = 18'h070d5;
-        N_COEFF[ 4] = 18'h0ce4b; N_COEFF[ 5] = 18'h049b2; N_COEFF[ 6] = 18'h0abb3; N_COEFF[ 7] = 18'h0c3b2;
-        N_COEFF[ 8] = 18'h0ad38; N_COEFF[ 9] = 18'h05672; N_COEFF[10] = 18'h0fd47; N_COEFF[11] = 18'h06671;
-        N_COEFF[12] = 18'h00b7f; N_COEFF[13] = 18'h0fa35; N_COEFF[14] = 18'h0d4ac; N_COEFF[15] = 18'h0f1ca;
-        N_COEFF[16] = 18'h08e0a; N_COEFF[17] = 18'h05858; N_COEFF[18] = 18'h02dc6; N_COEFF[19] = 18'h08cfc;
-        N_COEFF[20] = 18'h01941; N_COEFF[21] = 18'h0f855; N_COEFF[22] = 18'h01e43; N_COEFF[23] = 18'h053f0;
-        N_COEFF[24] = 18'h0a479; N_COEFF[25] = 18'h0ae7e; N_COEFF[26] = 18'h05c66; N_COEFF[27] = 18'h02413;
-        N_COEFF[28] = 18'h0b5f8; N_COEFF[29] = 18'h0eb06; N_COEFF[30] = 18'h0de5b; N_COEFF[31] = 18'h0a751;
-        N_COEFF[32] = 18'h0c1ec;
-        //
-    end
-    
-    
-    //
-    // Load Interface
-    //
-    wire                   load_phase;
-    wire [  INDEX_WIDTH:0] load_xy_addr;
-    wire                   load_xy_addr_vld;
-    wire                   load_xy_req;
-    reg  [ WORD_WIDTH-1:0] load_x_din;
-    reg  [ WORD_WIDTH-1:0] load_y_din;
-    reg  [ WORD_WIDTH-1:0] load_x_pipe;
-    reg  [ WORD_WIDTH-1:0] load_y_pipe;
-
-    always @(posedge clk)
-        //
-        if (load_xy_addr_vld) begin
-        
-            if (!load_phase) begin
-                load_x_pipe <= T1[load_xy_addr];
-                load_y_pipe <= T2[load_xy_addr];
-            end else begin
-                load_x_pipe <= !load_xy_addr[INDEX_WIDTH] ? N[load_xy_addr] : {WORD_WIDTH{1'bX}};
-                load_y_pipe <= N_COEFF[load_xy_addr];
-            end 
-        end
-
-    always @(posedge clk)
-        //
-        if (load_xy_req)
-            {load_y_din, load_x_din} <= {load_y_pipe, load_x_pipe};
-        else
-            {load_y_din, load_x_din} <= {2*WORD_WIDTH{1'bX}};
-
-        
-    //
-    // UUT
-    //
-    modexpng_mmm_x8_dual #
-    (
-        .INDEX_WIDTH(INDEX_WIDTH)
-    )
-    uut
-    (
-		.clk                (clk),
-		.rst_n              (rst_n),
-        
-		.ena                (ena),
-		.rdy                (rdy),
-        
-        .mode               (mode),
-        .transfer           (transfer),
-
-		.index_last         (index_last),
-
-        .x_din              (x_din),
-        .y_din              (y_din),
-        .x_dout             (x_dout),
-        .y_dout             (y_dout),
-
-        .x_din_addr         (x_din_addr),
-        .y_din_addr         (y_din_addr),
-        .x_dout_addr        (x_dout_addr),
-        .y_dout_addr        (y_dout_addr),
-
-        .x_din_ena          (x_din_ena),
-        .y_din_ena          (y_din_ena),
-        .x_dout_ena         (x_dout_ena),
-        .y_dout_ena         (y_dout_ena),
-        
-        .x_din_reg_ena      (x_din_reg_ena),
-        .y_din_reg_ena      (y_din_reg_ena),
-    
-        .x_din_bank         (x_din_bank),
-        .y_din_bank         (y_din_bank),
-        .x_dout_bank        (x_dout_bank),
-        .y_dout_bank        (y_dout_bank),
-        
-        .load_phase         (load_phase),
-        .load_xy_addr       (load_xy_addr),
-        .load_xy_addr_vld   (load_xy_addr_vld),
-        .load_xy_req        (load_xy_req),
-        .load_x_din         (load_x_din),
-        .load_y_din         (load_y_din)
-	);
-
-
-    //
-    // Script
-    //
-	initial begin
-        #(100.0*`CLK_PERIOD_NS)     rst      = 1'b0;
-        #(100.0*`CLK_PERIOD_NS)     ena      = 1'b1;
-                                    transfer = 1'b1;
-                                    mode     = 1'b0;
-        #(  1.0*`CLK_PERIOD_NS)     ena      = 1'b0;
-                                    transfer = 1'bX;
-                                    mode     = 1'bX;
-                                    
-        while (!rdy) #`CLK_PERIOD_NS;
-        
-        #(100.0*`CLK_PERIOD_NS)     ena      = 1'b1;
-                                    transfer = 1'b0;
-                                    mode     = 1'b0;
-        #(  1.0*`CLK_PERIOD_NS)     ena      = 1'b0;
-                                    transfer = 1'bX;
-                                    mode     = 1'bX;
-                                    
-        while (!rdy) #`CLK_PERIOD_NS;
-        
-	end
-
-      
-endmodule
-
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index 6b194dc..c78a969 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -6,6 +6,8 @@ module modexpng_core_top
     crt_mode,
     word_index_last_n,
     word_index_last_pq,
+    bit_index_last_n,
+    bit_index_last_pq,
     bus_cs,
     bus_we,
     bus_addr,
@@ -37,6 +39,9 @@ module modexpng_core_top
     input  [                  OP_ADDR_W     -1:0] word_index_last_n;
     input  [                  OP_ADDR_W     -1:0] word_index_last_pq;
 
+    input  [                  BIT_INDEX_W   -1:0] bit_index_last_n;
+    input  [                  BIT_INDEX_W   -1:0] bit_index_last_pq;
+    
     input                                         bus_cs;
     input                                         bus_we;
     input  [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
@@ -71,16 +76,23 @@ module modexpng_core_top
     wire [BANK_ADDR_W  -1:0] uop_data_sel_wide_out   = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
     wire [BANK_ADDR_W  -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
     
-    wire uop_opcode_is_stop =  uop_data_opcode == UOP_OPCODE_STOP               ;
-    wire uop_opcode_is_in   = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE      ) ||
-                              (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW    ) ;
-    wire uop_opcode_is_out  =  uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
-    wire uop_opcode_is_mmm  =  uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY   ;
-    wire uop_opcode_is_wrk  = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES  ) ||
-                              (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X       ) ;
-
+    wire uop_opcode_is_stop   =  uop_data_opcode == UOP_OPCODE_STOP                ;
+    wire uop_opcode_is_in     = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE       ) ||
+                                (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW     ) ;
+    wire uop_opcode_is_out    =  uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW  ;
+    wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT         ) ||
+                                (uop_data_opcode == UOP_OPCODE_LADDER_STEP         ) ;
+    wire uop_opcode_is_mmm    = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY    ) ||
+                                (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ;
+    wire uop_opcode_is_wrk    = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES   ) ||
+                                (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X        ) ||
+                                (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
+                                (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y    ) ;
+                                
+    wire uop_loop_now;
+    
     wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
-    wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
+    wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1;
     
     modexpng_uop_rom uop_rom
     (
@@ -595,6 +607,14 @@ module modexpng_core_top
     reg  [OP_ADDR_W    -1:0] io_mgr_word_index_last;
     reg  [UOP_OPCODE_W -1:0] io_mgr_opcode;
     
+    reg  [BIT_INDEX_W  -1:0] io_mgr_ladder_steps;
+    wire                     io_mgr_ladder_d;
+    wire                     io_mgr_ladder_p;
+    wire                     io_mgr_ladder_q;
+    wire                     io_mgr_ladder_done;
+    
+    assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
+    
     wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0]; 
     wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0];
     
@@ -653,7 +673,13 @@ module modexpng_core_top
         .io_out_dout                (io_out_data),
         
         .wrk_narrow_x_din_x_trunc   (wrk_rd_narrow_x_data_x_trunc),
-        .wrk_narrow_x_din_y_trunc   (wrk_rd_narrow_x_data_y_trunc)
+        .wrk_narrow_x_din_y_trunc   (wrk_rd_narrow_x_data_y_trunc),
+        
+        .ladder_steps               (io_mgr_ladder_steps),
+        .ladder_d                   (io_mgr_ladder_d),
+        .ladder_p                   (io_mgr_ladder_p),
+        .ladder_q                   (io_mgr_ladder_q),
+        .ladder_done                (io_mgr_ladder_done)
     );
 
 
@@ -685,6 +711,9 @@ module modexpng_core_top
     reg                     mmm_force_unity_b_x;
     reg                     mmm_force_unity_b_y;
     
+    reg                     mmm_only_reduce_x;
+    reg                     mmm_only_reduce_y;
+    
     wire                    rdct_ena_x;
     wire                    rdct_ena_y;
     wire                    rdct_rdy_x;
@@ -702,6 +731,7 @@ module modexpng_core_top
         .word_index_last            (mmm_word_index_last_x),
         .word_index_last_minus1     (mmm_word_index_last_minus1_x),
         .force_unity_b              (mmm_force_unity_b_x),
+        .only_reduce                (mmm_only_reduce_x),
         
         .sel_wide_in                (mmm_sel_wide_in_x),
         .sel_narrow_in              (mmm_sel_narrow_in_x),
@@ -757,6 +787,7 @@ module modexpng_core_top
         .word_index_last            (mmm_word_index_last_y),
         .word_index_last_minus1     (mmm_word_index_last_minus1_y),
         .force_unity_b              (mmm_force_unity_b_y),
+        .only_reduce                (mmm_only_reduce_y),
         
         .sel_wide_in                (mmm_sel_wide_in_y),
         .sel_narrow_in              (mmm_sel_narrow_in_y),
@@ -898,6 +929,7 @@ module modexpng_core_top
     reg  [ BANK_ADDR_W -1:0] wrk_sel_narrow_in;
     reg  [ BANK_ADDR_W -1:0] wrk_sel_narrow_out;
     reg  [   OP_ADDR_W -1:0] wrk_word_index_last;
+    reg  [   OP_ADDR_W -1:0] wrk_word_index_last_half;
     reg  [UOP_OPCODE_W -1:0] wrk_opcode;
     
     modexpng_general_worker general_worker
@@ -916,6 +948,7 @@ module modexpng_core_top
         .opcode                     (wrk_opcode),
         
         .word_index_last            (wrk_word_index_last),
+        .word_index_last_half       (wrk_word_index_last_half),
         
         .wrk_rd_wide_xy_ena_x       (wrk_rd_wide_xy_ena_x),
         .wrk_rd_wide_xy_bank_x      (wrk_rd_wide_xy_bank_x),
@@ -976,10 +1009,11 @@ module modexpng_core_top
         //
         uop_exit_from_busy = 0;
         //
-        if (uop_opcode_is_in)   uop_exit_from_busy =  ~io_mgr_ena & io_mgr_rdy;
-        if (uop_opcode_is_out)  uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy);
-        if (uop_opcode_is_mmm)  uop_exit_from_busy =  ~mmm_ena    & mmm_rdy   ;
-        if (uop_opcode_is_wrk)  uop_exit_from_busy =  ~wrk_ena    & wrk_rdy   ;
+        if (uop_opcode_is_in    ) uop_exit_from_busy =  ~io_mgr_ena & io_mgr_rdy;
+        if (uop_opcode_is_out   ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy);
+        if (uop_opcode_is_mmm   ) uop_exit_from_busy =  ~mmm_ena    & mmm_rdy;
+        if (uop_opcode_is_wrk   ) uop_exit_from_busy =  ~wrk_ena    & wrk_rdy;
+        if (uop_opcode_is_ladder) uop_exit_from_busy =  ~io_mgr_ena & io_mgr_rdy;
         //
     end
 
@@ -995,17 +1029,22 @@ module modexpng_core_top
             mmm_ena_y  <= 1'b0;
             wrk_ena    <= 1'b0;
         end else begin
-            io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in  || uop_opcode_is_out) : 1'b0;
-            mmm_ena_x  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm                       : 1'b0;
-            mmm_ena_y  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm                       : 1'b0;
-            wrk_ena    <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0;
+            io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in    ||
+                                                                   uop_opcode_is_out   ||
+                                                                   uop_opcode_is_ladder): 1'b0;
+            mmm_ena_x  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm    : 1'b0;
+            mmm_ena_y  <= uop_fsm_state == UOP_FSM_STATE_DECODE ?  uop_opcode_is_mmm    : 1'b0;
+            wrk_ena    <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk   ||
+                                                                   uop_opcode_is_out   ): 1'b0;
         end
 
     //
     // Parameters
     //
     wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1;
-        
+    
+    // TODO: Add reset to default don't care values.
+
     always @(posedge clk)
         //
         if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
@@ -1044,9 +1083,10 @@ module modexpng_core_top
                         UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00; 
                         UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11;
                         UOP_LADDER_D:  {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
-                        UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+                        UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q};
                     endcase
                     //
+                    {mmm_only_reduce_x,     mmm_only_reduce_y    } <= {2{1'b0}};
                     {mmm_force_unity_b_x,   mmm_force_unity_b_y  } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
                     {mmm_sel_wide_in_x,     mmm_sel_wide_in_y    } <= {2{uop_data_sel_wide_in      }};
                     {mmm_sel_narrow_in_x,   mmm_sel_narrow_in_y  } <= {2{uop_data_sel_narrow_in    }};
@@ -1055,24 +1095,42 @@ module modexpng_core_top
                     //
                 end
                 //
+                UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+                    //
+                    {mmm_ladder_mode_x,     mmm_ladder_mode_y   }  <= {2{1'bX                   }};
+                    //
+                    {mmm_only_reduce_x,     mmm_only_reduce_y    } <= {2{1'b1                   }};
+                    {mmm_force_unity_b_x,   mmm_force_unity_b_y  } <= {2{1'b0                   }};
+                    {mmm_sel_wide_in_x,     mmm_sel_wide_in_y    } <= {2{BANK_DNC               }};
+                    {mmm_sel_narrow_in_x,   mmm_sel_narrow_in_y  } <= {2{BANK_DNC               }};
+                    {rdct_sel_wide_out_x,   rdct_sel_wide_out_y  } <= {2{uop_data_sel_wide_out  }}; 
+                    {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}};
+                    //
+                end
+                //
                 UOP_OPCODE_PROPAGATE_CARRIES: begin
                     wrk_sel_narrow_in   <= uop_data_sel_narrow_in;
                     wrk_sel_narrow_out  <= uop_data_sel_narrow_out;
                 end
                 //
-                UOP_OPCODE_COPY_CRT_Y2X: begin
+                UOP_OPCODE_COPY_CRT_Y2X,
+                UOP_OPCODE_COPY_LADDERS_X2Y: begin
                     wrk_sel_wide_in    <= uop_data_sel_wide_in;
                     wrk_sel_wide_out   <= uop_data_sel_wide_out;
                     wrk_sel_narrow_in  <= uop_data_sel_narrow_in;
                     wrk_sel_narrow_out <= uop_data_sel_narrow_out;                    
                 end
                 //
+                UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+                    wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+                end
+                //
             endcase
             //
         end
     
     //
-    // Length
+    // Lengths
     //    
     wire [OP_ADDR_W -1:0] word_index_last_n_minus1  = word_index_last_n  - 1'b1;
     wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1;
@@ -1086,7 +1144,10 @@ module modexpng_core_top
             case (uop_data_opcode)
                 //
                 UOP_OPCODE_INPUT_TO_WIDE,    
-                UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; 
+                UOP_OPCODE_INPUT_TO_NARROW,
+                UOP_OPCODE_OUTPUT_FROM_NARROW:
+                    //
+                    io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq; 
                 //
                 UOP_OPCODE_MODULAR_MULTIPLY: begin
                     {mmm_word_index_last_x,        mmm_word_index_last_y       } <= {2{uop_npq_is_n ? word_index_last_n        : word_index_last_pq       }};  
@@ -1094,8 +1155,31 @@ module modexpng_core_top
                     {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{uop_npq_is_n ? word_index_last_n        : word_index_last_pq       }};
                 end
                 //
-                UOP_OPCODE_PROPAGATE_CARRIES:
-                    wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+                UOP_OPCODE_PROPAGATE_CARRIES,
+                UOP_OPCODE_COPY_CRT_Y2X,
+                UOP_OPCODE_COPY_LADDERS_X2Y:
+                    wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+                //
+                UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+                    wrk_word_index_last      <= word_index_last_n;
+                    wrk_word_index_last_half <= word_index_last_pq;
+                end
+                //
+                UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+                    {mmm_word_index_last_x,        mmm_word_index_last_y       } <= {2{word_index_last_pq       }};  
+                    {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}};
+                    {rdct_word_index_last_x,       rdct_word_index_last_y      } <= {2{word_index_last_pq       }};
+                end
+                //
+                UOP_OPCODE_LADDER_INIT: begin
+                    io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+                    io_mgr_ladder_steps    <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+                end
+                //
+                UOP_OPCODE_LADDER_STEP: begin
+                    io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+                    io_mgr_ladder_steps    <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+                end
                 //
             endcase
             //
@@ -1140,8 +1224,8 @@ module modexpng_core_top
             UOP_FSM_STATE_IDLE:   valid_reg <= ~next;
             UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop;
         endcase
-
-
+        
+        
 
     //
     // BEGIN DEBUG
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index c35f0b3..269ef98 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -14,6 +14,7 @@ module modexpng_general_worker
     opcode,
     
     word_index_last,
+    word_index_last_half,
     
     wrk_rd_wide_xy_ena_x,
     wrk_rd_wide_xy_bank_x,
@@ -88,6 +89,7 @@ module modexpng_general_worker
     input  [              UOP_OPCODE_W -1:0] opcode;
     
     input  [              OP_ADDR_W    -1:0] word_index_last;
+    input  [              OP_ADDR_W    -1:0] word_index_last_half;
     
     output                                   wrk_rd_wide_xy_ena_x;
     output [              BANK_ADDR_W  -1:0] wrk_rd_wide_xy_bank_x;
@@ -141,18 +143,35 @@ module modexpng_general_worker
     //
     // FSM Declaration
     //
-    localparam [3:0] WRK_FSM_STATE_IDLE          = 4'h0;
-    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1  = 4'h1;
-    localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2  = 4'h2;
-    localparam [3:0] WRK_FSM_STATE_BUSY          = 4'h3;
-    localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5;    // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
-    localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
-    localparam [3:0] WRK_FSM_STATE_STOP          = 4'h7;
+    localparam [4:0] WRK_FSM_STATE_IDLE             = 5'h00;
     
-    reg  [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
-    reg  [3:0] wrk_fsm_state_next_one_pass;     // single address space sweep
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1     = 5'h01;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2     = 5'h02;
+    localparam [4:0] WRK_FSM_STATE_BUSY             = 5'h03;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST1    = 5'h05;    // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST2    = 5'h06;
+    
+    localparam [4:0] WRK_FSM_STATE_STOP             = 5'h07;
+    
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1  = 5'h10;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2  = 5'h11;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1  = 5'h12;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2  = 5'h13;
+    localparam [4:0] WRK_FSM_STATE_BUSY_M1          = 5'h14;
+    localparam [4:0] WRK_FSM_STATE_BUSY_M2          = 5'h15;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
+    localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+    
+    reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+    reg [4:0] wrk_fsm_state_next_one_pass;         // single address space sweep
+    reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
 
 
+    // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
+    
+
     //
     // Control Signals
     //
@@ -244,32 +263,62 @@ module modexpng_general_worker
     //
     // Delays
     //    
-    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
-    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
-    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
-    //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
 
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
+    reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
+    
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
+    
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
+    reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
+    
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+    
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
+    reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
+
     
     always @(posedge clk) begin
         //
-        //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
-        //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};        
+        {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+        {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};        
+        //
+        {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
+        {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};                
         //
         {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
         {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
         //
+        {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
+        {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+        //
+        {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
+        {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
+        //
+        {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
+        {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};    
+        //
     end
-
-
-    //
-    // Handy Wires
-    //
-    wire rd_narrow_xy_addr_x_next_is_last;
-    wire rd_narrow_xy_addr_y_next_is_last;
         
 
     //
@@ -310,7 +359,8 @@ module modexpng_general_worker
             case (opcode)
                 //
                 UOP_OPCODE_PROPAGATE_CARRIES,
-                UOP_OPCODE_OUTPUT_FROM_NARROW:
+                UOP_OPCODE_OUTPUT_FROM_NARROW,
+                UOP_OPCODE_MODULAR_REDUCE_INIT:
                     //
                     case (wrk_fsm_state_next_one_pass)
                         //
@@ -333,12 +383,30 @@ module modexpng_general_worker
                         WRK_FSM_STATE_LATENCY_PRE2,
                         WRK_FSM_STATE_BUSY: begin
                             //
-                            enable_narrow_xy_rd_en;
                             enable_wide_xy_rd_en;
+                            enable_narrow_xy_rd_en;
                             //
                         end
                         //
                     endcase
+                    //
+                UOP_OPCODE_COPY_LADDERS_X2Y:
+                    //
+                    case (wrk_fsm_state_next_one_pass_meander)
+                        //
+                        WRK_FSM_STATE_LATENCY_PRE1_M1,
+                        WRK_FSM_STATE_LATENCY_PRE1_M2,
+                        WRK_FSM_STATE_LATENCY_PRE2_M1,
+                        WRK_FSM_STATE_LATENCY_PRE2_M2,
+                        WRK_FSM_STATE_BUSY_M1,
+                        WRK_FSM_STATE_BUSY_M2: begin
+                            //
+                            enable_wide_xy_rd_en;
+                            enable_narrow_xy_rd_en;
+                            //
+                        end
+                        //
+                    endcase                    
                 //
             endcase
             //
@@ -389,8 +457,7 @@ module modexpng_general_worker
                         WRK_FSM_STATE_LATENCY_POST1,
                         WRK_FSM_STATE_LATENCY_POST2:
                             //
-                            enable_narrow_xy_wr_en;                            
-                            //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+                            enable_narrow_xy_wr_en;
                             //
                         //
                     endcase
@@ -405,7 +472,34 @@ module modexpng_general_worker
                             //
                             enable_wide_xy_wr_en;
                             enable_narrow_xy_wr_en;
-                            //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+                            //
+                        end
+                        //
+                    endcase
+                    //
+                UOP_OPCODE_MODULAR_REDUCE_INIT:
+                    //
+                    case (wrk_fsm_state)
+                        //
+                        WRK_FSM_STATE_BUSY,
+                        WRK_FSM_STATE_LATENCY_POST1,
+                        WRK_FSM_STATE_LATENCY_POST2:
+                            //
+                            enable_wide_xy_wr_en;
+                            //
+                        //
+                    endcase
+                    //
+                UOP_OPCODE_COPY_LADDERS_X2Y:
+                    //
+                    case (wrk_fsm_state)
+                        //
+                        WRK_FSM_STATE_BUSY_M2,
+                        WRK_FSM_STATE_LATENCY_POST1_M2,
+                        WRK_FSM_STATE_LATENCY_POST2_M2: begin
+                            //
+                            enable_wide_xy_wr_en;
+                            enable_narrow_xy_wr_en;
                             //
                         end
                         //
@@ -424,7 +518,7 @@ module modexpng_general_worker
     reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
     reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
     
-    wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};  
+    wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
     wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
     wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
     wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
@@ -497,6 +591,45 @@ module modexpng_general_worker
                     end
                     //
                 endcase
+                //
+            UOP_OPCODE_COPY_LADDERS_X2Y:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY_M2,
+                    WRK_FSM_STATE_LATENCY_POST1_M2,
+                    WRK_FSM_STATE_LATENCY_POST2_M2: begin
+                        //
+                        wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
+                        wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
+                        wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
+                        wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+                        //
+                        wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
+                        wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
+                        wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
+                        wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+                        //
+                    end
+                    //
+                endcase
+                //
+            UOP_OPCODE_MODULAR_REDUCE_INIT:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY,
+                    WRK_FSM_STATE_LATENCY_POST1,
+                    WRK_FSM_STATE_LATENCY_POST2: begin
+                        //
+                        wr_wide_x_dout_x   <= wrk_rd_narrow_x_din_x;
+                        wr_wide_y_dout_x   <= wrk_rd_narrow_y_din_x;
+                        wr_wide_x_dout_y   <= wrk_rd_narrow_x_din_y;
+                        wr_wide_y_dout_y   <= wrk_rd_narrow_y_din_y;
+                        //
+                    end
+                    //
+                endcase
             //
         endcase
         //
@@ -506,6 +639,9 @@ module modexpng_general_worker
     //
     // Write Address Logic
     //
+    wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+    wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+    
     always @(posedge clk) begin
         //
         {wr_wide_xy_bank_x,   wr_wide_xy_addr_x }  <= {BANK_DNC, OP_ADDR_DNC};
@@ -534,22 +670,64 @@ module modexpng_general_worker
                     //
                 endcase
                 //
+            UOP_OPCODE_MODULAR_REDUCE_INIT:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY,
+                    WRK_FSM_STATE_LATENCY_POST1,
+                    WRK_FSM_STATE_LATENCY_POST2: begin
+                        //
+                        wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+                        wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+                        //
+                        wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
+                        wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+                        //
+                    end
+                    //
+                endcase
+                //
+            UOP_OPCODE_COPY_LADDERS_X2Y:
+                //
+                case (wrk_fsm_state)
+                    //
+                    WRK_FSM_STATE_BUSY_M2,
+                    WRK_FSM_STATE_LATENCY_POST1_M2,
+                    WRK_FSM_STATE_LATENCY_POST2_M2: begin
+                        //
+                        {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
+                        {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};                        
+                        //
+                        {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
+                        {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+                        //
+                    end
+                    //
+                endcase
+                //
             //
         endcase
         //
     end
- 
-    
+
+
     //
     // Read Address Logic
     //
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
+    reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
+
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
     reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
 
-    assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
-    assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+    wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
+    wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+
+    wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+    wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
     
-    always @(posedge clk) begin
+    always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
         //
         {rd_wide_xy_bank_x,   rd_wide_xy_addr_x  } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
         {rd_wide_xy_bank_y,   rd_wide_xy_addr_y  } <= {BANK_DNC, OP_ADDR_DNC};
@@ -572,6 +750,9 @@ module modexpng_general_worker
                         {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
                         {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
                         //
+                        rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
                         rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
                         rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
                         //
@@ -586,11 +767,113 @@ module modexpng_general_worker
                         {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
                         {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
                         //
+                        rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; 
+                        rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+                        //
+                        rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+                        rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+                        //
+                    end
+                    //
+                endcase
+                //
+            UOP_OPCODE_MODULAR_REDUCE_INIT:
+                //
+                case (wrk_fsm_state_next_one_pass)
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE1: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+                        //
+                        rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
+                        rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
+                    end
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE2,
+                    WRK_FSM_STATE_BUSY: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};                        
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+                        //
+                        rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; 
+                        rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+                        //
+                        rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+                        rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+                        //
+                    end
+                    //
+                endcase
+                //
+            UOP_OPCODE_COPY_LADDERS_X2Y:
+                //
+                case (wrk_fsm_state_next_one_pass_meander)
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE1_M1: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
+                        //
+                        rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
+                        rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+                        rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+                        //
+                    end
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE1_M2: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+                        //
+                    end
+                    //
+                    WRK_FSM_STATE_LATENCY_PRE2_M1,
+                    WRK_FSM_STATE_BUSY_M1: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};                        
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+                        //
+                        rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO; 
+                        rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+                        //
                         rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
                         rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
                         //
                     end
                     //
+                    WRK_FSM_STATE_LATENCY_PRE2_M2,
+                    WRK_FSM_STATE_BUSY_M2: begin
+                        //
+                        {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+                        {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+                        //
+                        {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+                        {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+                        //
+                    end
+                    //
                 endcase
                 //
             //
@@ -608,7 +891,9 @@ module modexpng_general_worker
         else case (opcode)
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_OUTPUT_FROM_NARROW,
-            UOP_OPCODE_COPY_CRT_Y2X:        wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+            UOP_OPCODE_COPY_CRT_Y2X,
+            UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+            UOP_OPCODE_COPY_LADDERS_X2Y:    wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
             default:                        wrk_fsm_state <= WRK_FSM_STATE_IDLE;
         endcase
     
@@ -616,17 +901,20 @@ module modexpng_general_worker
     //
     // Busy Exit Logic
     //
-    reg wrk_fsm_done_one_pass = 1'b0; 
+    reg wrk_fsm_done_one_pass         = 1'b0; 
+    reg wrk_fsm_done_one_pass_meander = 1'b0;
     
     always @(posedge clk) begin
         //
-        wrk_fsm_done_one_pass <= 1'b0;
+        wrk_fsm_done_one_pass         <= 1'b0;
+        wrk_fsm_done_one_pass_meander <= 1'b0;
         //
         case (opcode)
             //
             UOP_OPCODE_PROPAGATE_CARRIES,
             UOP_OPCODE_OUTPUT_FROM_NARROW,
-            UOP_OPCODE_COPY_CRT_Y2X: begin
+            UOP_OPCODE_COPY_CRT_Y2X,
+            UOP_OPCODE_MODULAR_REDUCE_INIT: begin
                 //
                 if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
                     //
@@ -637,6 +925,20 @@ module modexpng_general_worker
                 //
             end
             //
+            UOP_OPCODE_COPY_LADDERS_X2Y: begin
+                //
+                if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
+                    //
+                    if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
+                    if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+                    //
+                end
+                //
+                if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
+                    wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+                //
+            end
+            //
         endcase
         //
     end
@@ -654,7 +956,31 @@ module modexpng_general_worker
             WRK_FSM_STATE_BUSY:          wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
             WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_LATENCY_POST2 ;
             WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_STOP          ;
-            WRK_FSM_STATE_STOP:          wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_IDLE          ; 
+            WRK_FSM_STATE_STOP:          wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_IDLE          ;
+            default:                     wrk_fsm_state_next_one_pass =                         WRK_FSM_STATE_IDLE          ;
+        endcase
+        //
+    end
+    
+    always @* begin
+        //
+        case (wrk_fsm_state)
+            WRK_FSM_STATE_IDLE:             wrk_fsm_state_next_one_pass_meander = ena                           ? WRK_FSM_STATE_LATENCY_PRE1_M1  : WRK_FSM_STATE_IDLE    ;           
+            //
+            WRK_FSM_STATE_LATENCY_PRE1_M1:  wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_PRE1_M2  ;
+            WRK_FSM_STATE_LATENCY_PRE1_M2:  wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_PRE2_M1  ;
+            WRK_FSM_STATE_LATENCY_PRE2_M1:  wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_PRE2_M2  ;
+            WRK_FSM_STATE_LATENCY_PRE2_M2:  wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_BUSY_M1          ;
+            WRK_FSM_STATE_BUSY_M1:          wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_BUSY_M2          ;
+            WRK_FSM_STATE_BUSY_M2:          wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
+            WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_POST1_M2 ;
+            WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_POST2_M1 ;
+            WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_LATENCY_POST2_M2 ;
+            WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_STOP             ;
+            //
+            WRK_FSM_STATE_STOP:             wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_IDLE             ;
+            //
+            default:                        wrk_fsm_state_next_one_pass_meander =                                 WRK_FSM_STATE_IDLE             ;
         endcase
         //
     end
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index dfbd676..59f4709 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -53,7 +53,13 @@ module modexpng_io_manager
     io_out_dout,
     
     wrk_narrow_x_din_x_trunc,
-    wrk_narrow_x_din_y_trunc
+    wrk_narrow_x_din_y_trunc,
+    
+    ladder_steps,
+    ladder_d,
+    ladder_p,
+    ladder_q,
+    ladder_done
 );
 
     //
@@ -120,6 +126,12 @@ module modexpng_io_manager
     
     output [              WORD_W       -1:0] wrk_narrow_x_din_x_trunc;
     output [              WORD_W       -1:0] wrk_narrow_x_din_y_trunc;
+    
+    input  [              BIT_INDEX_W  -1:0] ladder_steps;
+    output                                   ladder_d;
+    output                                   ladder_p;
+    output                                   ladder_q;
+    output                                   ladder_done;
 
 
     //
@@ -254,6 +266,10 @@ module modexpng_io_manager
 
     wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
 
+    wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
+    wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
+    wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;  
+
     wire opcode_is_input_wide   = opcode == UOP_OPCODE_INPUT_TO_WIDE;
     wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW; 
 
@@ -269,8 +285,90 @@ module modexpng_io_manager
 
     wire in_1_addr_op_next_is_last;
     wire in_2_addr_op_next_is_last;
+    wire in_2_addr_op_next_is_one;
     wire dummy_addr_op_next_is_last;
 
+
+    //
+    // Ladder Init/Step Logic
+    //
+    reg ladder_d_r;
+    reg ladder_p_r;
+    reg ladder_q_r;
+    reg ladder_done_r = 1'b0;
+    
+    assign ladder_d    = ladder_d_r;
+    assign ladder_p    = ladder_p_r;
+    assign ladder_q    = ladder_q_r;
+    assign ladder_done = ladder_done_r;
+    
+    reg  [BIT_INDEX_W -1:0] ladder_index;
+    reg  [BIT_INDEX_W -1:0] ladder_index_next;
+    wire [  OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
+    wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
+    wire                    ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO; 
+    
+    always @(posedge clk)
+        //
+        if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin
+            //
+            if (opcode_is_ladder_init) begin
+                ladder_index      <= ladder_steps;
+                ladder_index_next <= ladder_steps - 1'b1;
+                ladder_done_r     <= 1'b0;
+            end
+            //
+            if (opcode_is_ladder_step) begin
+                ladder_index      <= ladder_index_next;
+                ladder_index_next <= ladder_index_next - 1'b1;
+                if (ladder_index_is_zero) ladder_done_r <= 1'b1;
+            end
+            //
+        end
+
+
+    //
+    // Ladder Mux
+    //
+    reg ladder_dpq_mux;
+    
+    always @(io_in_2_din, ladder_index_lsb)
+        //
+        case(ladder_index_lsb)
+            4'b0000: ladder_dpq_mux = io_in_2_din[ 0];
+            4'b0001: ladder_dpq_mux = io_in_2_din[ 1];
+            4'b0010: ladder_dpq_mux = io_in_2_din[ 2];
+            4'b0011: ladder_dpq_mux = io_in_2_din[ 3];
+            4'b0100: ladder_dpq_mux = io_in_2_din[ 4];
+            4'b0101: ladder_dpq_mux = io_in_2_din[ 5];
+            4'b0110: ladder_dpq_mux = io_in_2_din[ 6];
+            4'b0111: ladder_dpq_mux = io_in_2_din[ 7];
+            4'b1000: ladder_dpq_mux = io_in_2_din[ 8];
+            4'b1001: ladder_dpq_mux = io_in_2_din[ 9];
+            4'b1010: ladder_dpq_mux = io_in_2_din[10];
+            4'b1011: ladder_dpq_mux = io_in_2_din[11];
+            4'b1100: ladder_dpq_mux = io_in_2_din[12];
+            4'b1101: ladder_dpq_mux = io_in_2_din[13];
+            4'b1110: ladder_dpq_mux = io_in_2_din[14];
+            4'b1111: ladder_dpq_mux = io_in_2_din[15];
+        endcase
+
+    always @(posedge clk)
+        //
+        case (io_fsm_state)
+            //
+            IO_FSM_STATE_BUSY:
+                if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux; 
+            //
+            IO_FSM_STATE_LATENCY_POST1:
+                if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
+            //
+            IO_FSM_STATE_LATENCY_POST2:
+                if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
+            //
+        endcase
+    
+    
     //
     // Source Enable Logic
     //
@@ -284,8 +382,8 @@ module modexpng_io_manager
             IO_FSM_STATE_LATENCY_PRE1,
             IO_FSM_STATE_LATENCY_PRE2,
             IO_FSM_STATE_BUSY: begin
-                in_1_en <= opcode_is_input && sel_aux_is_1;
-                in_2_en <= opcode_is_input && sel_aux_is_2;
+                in_1_en <=  opcode_is_input && sel_aux_is_1;
+                in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
             end
             //
             IO_FSM_STATE_EXTRA: begin
@@ -450,35 +548,59 @@ module modexpng_io_manager
     wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;  
     
     assign in_1_addr_op_next_is_last  = in_1_addr_op_next  == word_index_last;
-    assign in_2_addr_op_next_is_last  = in_2_addr_op_next  == word_index_last;  
+    assign in_2_addr_op_next_is_last  = in_2_addr_op_next  == word_index_last;
+    assign in_2_addr_op_next_is_one   = in_2_addr_op_next  == OP_ADDR_ONE;
     assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last; 
     
-    always @(posedge clk)
+    always @(posedge clk) begin
+        //
+        {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+        {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+        {                dummy_addr_op} <= {          OP_ADDR_DNC};
+        //
+        in_1_addr_next  <= {BANK_DNC, OP_ADDR_DNC};
+        in_2_addr_next  <= {BANK_DNC, OP_ADDR_DNC};
+        dummy_addr_next <= {          OP_ADDR_DNC};
         //
         case (io_fsm_state_next)
             //
             IO_FSM_STATE_LATENCY_PRE1: begin
                 //
-                {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
-                {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
-                {                dummy_addr_op} <= {        OP_ADDR_ZERO};
+                                       {in_1_addr_bank, in_1_addr_op } <= {sel_in,   OP_ADDR_ZERO};
+                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in,   OP_ADDR_ZERO};
+                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+                                       {                dummy_addr_op} <= {          OP_ADDR_ZERO};
+                //
+                in_1_addr_next  <= {sel_in, OP_ADDR_ONE};
+                in_2_addr_next  <= {sel_in, OP_ADDR_ONE};
+                dummy_addr_next <= {        OP_ADDR_ONE};
+                //
+            end
+            //
+            IO_FSM_STATE_LATENCY_PRE2: begin
+                //
+                                       {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
+                if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
+                else                   {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
+                                       {                dummy_addr_op} <= dummy_addr_next;
                 //
-                in_1_addr_next  <= {sel_in,  OP_ADDR_ONE};
-                in_2_addr_next  <= {sel_in,  OP_ADDR_ONE};
-                dummy_addr_next <= {         OP_ADDR_ONE};
+                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
+                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
+                else                   in_2_addr_next  <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                 //
             end
             //
-            IO_FSM_STATE_LATENCY_PRE2, 
             IO_FSM_STATE_BUSY: begin
                 //
                 {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
                 {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
                 {                dummy_addr_op} <= dummy_addr_next;
                 //
-                in_1_addr_next  <= in_1_addr_next  + 1'b1;
-                in_2_addr_next  <= in_2_addr_next  + 1'b1;
-                dummy_addr_next <= dummy_addr_next + 1'b1;
+                                       in_1_addr_next  <= in_1_addr_next  + 1'b1;
+                if (!opcode_is_ladder) in_2_addr_next  <= in_2_addr_next  + 1'b1;
+                else                   in_2_addr_next  <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+                                       dummy_addr_next <= dummy_addr_next + 1'b1;
                 //
             end
             //
@@ -499,7 +621,8 @@ module modexpng_io_manager
                 end
             //
         endcase
-    
+        //
+    end
 
 
     //
@@ -525,7 +648,7 @@ module modexpng_io_manager
             if (opcode_is_input) begin
                 if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
                 if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
-            end else if (opcode_is_output) begin
+            end else if (opcode_is_output || opcode_is_ladder) begin
                 if (dummy_addr_op_next_is_last)                io_fsm_done <= 1'b1;
             end
             //
@@ -571,4 +694,17 @@ module modexpng_io_manager
         endcase
 
 
+    //
+    // BEGIN DEBUG
+    //
+    always @(posedge clk)
+        //
+        if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder)
+            $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r);
+
+    //
+    // END DEBUG
+    //
+
+
 endmodule
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 2e591e7..f68c559 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -1,8 +1,8 @@
-localparam UOP_OPCODE_W = 4;
+localparam UOP_OPCODE_W = 5;
 localparam UOP_CRT_W    = 1;
 localparam UOP_NPQ_W    = 1;
 localparam UOP_AUX_W    = 1;
-localparam UOP_LADDER_W = 1;
+localparam UOP_LADDER_W = 2;
 localparam UOP_SEL_W    = 4 * BANK_ADDR_W;
 
 localparam UOP_ADDR_W = 6;  // 64 instructions
@@ -11,17 +11,17 @@ localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0;
 localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31;
 
 
-//                 4              1           1           1           2              4*3=12
+//                 5              1           1           1           2              4*3=12
 localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W;
-//                 [20:17]        [16]        [15]        [14]        [13:12]        [11:9][8:6][5:3][2:0]
+//                 [21:17]        [16]        [15]        [14]        [13:12]        [11:9][8:6][5:3][2:0]
 
 // OPCODE
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP               = 4'd0;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0;
 /* all fields are don't care
  */
 
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE      = 4'd1;
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW    = 4'd2;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE   = 5'd1;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2;
 /* CRT tells into which of the dual MMM to write
  * NPQ specifies the width of the operand
  * AUX specifies from which INPUT to read
@@ -31,7 +31,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW    = 4'd2;
  * 
 */
 
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3;
 /* CRT tells from which of the dual MMM to read
  * NPQ specifies the width of the operand
  * AUX is don't care
@@ -39,27 +39,45 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
  * source and destination WIDE are don't care
  */
 
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X  = 4'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X     = 5'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
 /* CRT is don't care
  * NPQ specifies the width of the operand
  * AUX is don't care
  * LADDER is don't care
  */
 
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY   = 4'd8;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8;
 /* CRT is don't care
  * NPQ specifies the width of the operand
  * AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual)
  * LADDER specifies Montgomery ladder mode 
  */
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11;
+/* CRT
+ * NPQ
+ * AUX
+ * LADDER is don't care
+ */
+
 
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12;
 /* CRT is don't care
  * NPQ specifies the width of the operand
  * AUX is don't care
  * LADDER is don't care
  * source and destination WIDE are don't care
- */ 
+ */
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17;
+/* CRT is don't care
+ * NPQ is don't care
+ * AUX is don't care
+ * LADDER is don't care
+ * WIDE and NARROW are don't care
+ */
 
 // CRT
 localparam [UOP_CRT_W -1:0] UOP_CRT_X   = 1'b0;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 14f1b47..6e52a97 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -8,6 +8,7 @@ module modexpng_mmm_dual
     word_index_last,
     word_index_last_minus1,
     force_unity_b,
+    only_reduce,
     
     sel_wide_in, sel_narrow_in,
     
@@ -72,6 +73,7 @@ module modexpng_mmm_dual
     input [7:0] word_index_last;
     input [7:0] word_index_last_minus1;
     input       force_unity_b;
+    input only_reduce;
         
     input [BANK_ADDR_W-1:0] sel_wide_in;
     input [BANK_ADDR_W-1:0] sel_narrow_in;
@@ -120,7 +122,8 @@ module modexpng_mmm_dual
     //
     reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
     reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
-    
+
+    wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;    
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
     wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
@@ -905,16 +908,17 @@ module modexpng_mmm_dual
     //
     // FSM Transition Logic
     //
+    assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
     assign fsm_state_after_mult_square    = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF   : FSM_STATE_MULT_SQUARE_COL_N_INIT;
     assign fsm_state_after_mult_triangle  = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
-    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+    assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT; 
 
     always @* begin
         //
         fsm_state_next = FSM_STATE_IDLE;
         //
         case (fsm_state)
-            FSM_STATE_IDLE:                   fsm_state_next = ena                   ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
+            FSM_STATE_IDLE:                   fsm_state_next = ena                   ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE;
                         
             FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
             FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next =                         FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 6e6c3ca..c7566ad 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -23,7 +23,7 @@ localparam MAC_W      = 47;
 localparam BUS_DATA_W = 32;
 localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
 
-
+localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W);
 
 localparam BANK_ADDR_W = 3;
 localparam OP_ADDR_W   = cryptech_clog2(MAX_OP_W / WORD_W); 
@@ -33,6 +33,8 @@ localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
 
 localparam CARRY_W = WORD_EXT_W - WORD_W;
 
+localparam WORD_MUX_W = cryptech_clog2(WORD_W);
+
 localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}};
 
 localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
@@ -80,6 +82,10 @@ localparam [BANK_ADDR_W-1:0] BANK_OUT_YM        = 3'd2;
 
 localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}};
 
+localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q
+
+localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}};
+
 localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
 localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q     = 1;
 
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 016b1b0..04f0c83 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -10,53 +10,67 @@ module modexpng_uop_rom
 
     input  wire 	              clk;
     input  wire [UOP_ADDR_W -1:0] addr;
-    output reg  [UOP_W      -1:0] data;    
+    output reg  [UOP_W      -1:0] data;
 
     always @(posedge clk)
         //
         case (addr)
-            6'd00:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
-            6'd01:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
-            6'd02:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_X,        BANK_WIDE_A,    BANK_DNC         }; //
-            6'd03:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_Y,        BANK_WIDE_A,    BANK_DNC         }; //
-            6'd04:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
-            6'd05:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
+            6'd00:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd01:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd02:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_X,        BANK_WIDE_A,    BANK_DNC         }; //
+            6'd03:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_Y,        BANK_WIDE_A,    BANK_DNC         }; //
+            6'd04:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
+            6'd05:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_WIDE_E,    BANK_DNC         }; //
                                                                                                                                                                                          //
-            6'd06:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
-            6'd07:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
-            6'd08:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
-            6'd09:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
-            6'd10:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
-            6'd11:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
+            6'd06:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd07:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd08:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd09:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_N_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd10:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
+            6'd11:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_1_M,        BANK_DNC,       BANK_NARROW_E    }; //
                                                                                                                                                                                          //
-            6'd12:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_A, BANK_NARROW_A,      BANK_WIDE_B,    BANK_NARROW_B    }; //
-            6'd13:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_B, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
-            6'd14:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_2,   UOP_LADDER_11,  BANK_WIDE_C, BANK_DNC,           BANK_WIDE_D,    BANK_NARROW_D    }; //
+            6'd12:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_A, BANK_NARROW_A,      BANK_WIDE_B,    BANK_NARROW_B    }; //
+            6'd13:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_B, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd14:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_2,   UOP_LADDER_11,  BANK_WIDE_C, BANK_DNC,           BANK_WIDE_D,    BANK_NARROW_D    }; //
                                                                                                                                                                                          //
-            6'd15:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,  UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_NARROW_D    }; //
+            6'd15:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_NARROW_D    }; //
                                                                                                                                                                                          //
-            6'd16:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_XM      }; //
-            6'd17:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_YM      }; //
+            6'd16:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW,  UOP_CRT_X,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_XM      }; //
+            6'd17:   data <= {UOP_OPCODE_OUTPUT_FROM_NARROW,  UOP_CRT_Y,   UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_D,      BANK_DNC,       BANK_OUT_YM      }; //
                                                                                                                                                                                          //            
-            6'd18:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_E, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd18:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_E, BANK_NARROW_B,      BANK_WIDE_C,    BANK_NARROW_C    }; //
                                                                                                                                                                                          //
-            6'd19:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,  UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_C,      BANK_DNC,       BANK_NARROW_C    }; //
+            6'd19:   data <= {UOP_OPCODE_PROPAGATE_CARRIES,   UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_C,      BANK_DNC,       BANK_NARROW_C    }; //
                                                                                                                                                                                          //            
-            6'd20:   data <= {UOP_OPCODE_COPY_CRT_Y2X,       UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd20:   data <= {UOP_OPCODE_COPY_CRT_Y2X,        UOP_CRT_DNC, UOP_NPQ_N,   UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C,      BANK_WIDE_C,    BANK_NARROW_C    }; //
                                                                                                                                                                                          //
-            6'd21:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P,        BANK_WIDE_N,    BANK_DNC         }; //
-            6'd22:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q,        BANK_WIDE_N,    BANK_DNC         }; //
-            6'd23:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
-            6'd24:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
-            6'd25:   data <= {UOP_OPCODE_INPUT_TO_WIDE,      UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_WIDE_E,    BANK_DNC         }; //
+            6'd21:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd22:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q,        BANK_WIDE_N,    BANK_DNC         }; //
+            6'd23:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
+            6'd24:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_WIDE_A,    BANK_DNC         }; //
+            6'd25:   data <= {UOP_OPCODE_INPUT_TO_WIDE,       UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_WIDE_E,    BANK_DNC         }; //
                                                                                                                                                                                          //
-            6'd26:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
-            6'd27:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
-            6'd28:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
-            6'd29:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
-            6'd30:   data <= {UOP_OPCODE_INPUT_TO_NARROW,    UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_DNC,       BANK_NARROW_E    }; //
-                                                                                                                                                                                         //
-            default: data <= {UOP_OPCODE_STOP,               UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //                                            
+            6'd26:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd27:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_COEFF,  BANK_DNC,       BANK_NARROW_COEFF}; //
+            6'd28:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_P_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd29:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_Y,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_Q_FACTOR, BANK_DNC,       BANK_NARROW_A    }; //
+            6'd30:   data <= {UOP_OPCODE_INPUT_TO_NARROW,     UOP_CRT_X,   UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_DNC, BANK_DNC,    BANK_IN_2_QINV,     BANK_DNC,       BANK_NARROW_E    }; //
+                                                                                                                                                                                          //            
+            6'd31:   data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_NARROW_C,      BANK_DNC,       BANK_DNC         }; //
+                                                                                                                                                                                          //                        
+            6'd32:   data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC,    BANK_DNC,           BANK_WIDE_D,    BANK_NARROW_D    }; //
+                                                                                                                                                                                          //            
+            6'd33:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_D, BANK_NARROW_A,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd34:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_11,  BANK_WIDE_C, BANK_NARROW_A,      BANK_WIDE_D,    BANK_NARROW_D    }; //
+            6'd35:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_2,   UOP_LADDER_11,  BANK_WIDE_A, BANK_DNC,           BANK_WIDE_C,    BANK_NARROW_C    }; //
+                                                                                                                                                                                          //
+            6'd36:   data <= {UOP_OPCODE_COPY_LADDERS_X2Y,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+                                                                                                                                                                                          //
+            6'd37:   data <= {UOP_OPCODE_LADDER_INIT,         UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //
+            6'd38:   data <= {UOP_OPCODE_MODULAR_MULTIPLY,    UOP_CRT_DNC, UOP_NPQ_PQ,  UOP_AUX_1,   UOP_LADDER_PQ,  BANK_WIDE_C, BANK_NARROW_C,      BANK_WIDE_C,    BANK_NARROW_C    }; //
+            6'd39:   data <= {UOP_OPCODE_LADDER_STEP,         UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //
+                                                                                                                                                                                          //
+            default: data <= {UOP_OPCODE_STOP,                UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL                                                   }; //                                            
         endcase
 
 endmodule



More information about the Commits mailing list