[Cryptech-Commits] [core/novena_eim] 01/02: Initial commit

git at cryptech.is git at cryptech.is
Tue Sep 30 17:24:26 UTC 2014


This is an automated email from the git hooks/post-receive script.

paul at psgd.org pushed a commit to branch master
in repository core/novena_eim.

commit 70be206c9853f867837bac1b99e19774a7dbf015
Author: Paul Selkirk <paul at psgd.org>
Date:   Tue Sep 30 11:01:55 2014 -0400

    Initial commit
---
 README.md                              |   14 +
 src/rtl/common/reg_ro.v                |   24 +
 src/rtl/common/reg_ro_4burst.v         |   79 +++
 src/rtl/common/reg_wo.v                |   32 +
 src/rtl/common/reg_wo_4burst.v         |  106 ++++
 src/rtl/common/rising_edge.v           |   14 +
 src/rtl/common/sync_reset.v            |   61 ++
 src/rtl/ip/bclk_dll/bclk_dll.v         |  148 +++++
 src/rtl/ip/dcm_delay/dcm_delay.v       |  168 ++++++
 src/rtl/ip/ddr3_clkgen/ddr3_clkgen.v   |  158 +++++
 src/rtl/ip/eimram/eimram.v             |  186 ++++++
 src/rtl/ip/nandadr_fifo/nandadr_fifo.v |  491 +++++++++++++++
 src/rtl/ip/uk_fifo/uk_fifo.v           |  491 +++++++++++++++
 src/rtl/nand_log_tb.v                  |  225 +++++++
 src/rtl/novena.ucf                     |  602 +++++++++++++++++++
 src/rtl/novena_fpga.v                  |  659 ++++++++++++++++++++
 src/rtl/romulator_ddr3_tb.v            |  802 +++++++++++++++++++++++++
 src/rtl/romulator_tb.v                 |  370 ++++++++++++
 src/rtl/spi_eeprom.v                   |  255 ++++++++
 src/rtl/spi_eeprom_tb.v                |  317 ++++++++++
 src/sw/Makefile                        |   15 +
 src/sw/eim.c                           |  285 +++++++++
 src/sw/eim.h                           |   46 ++
 src/sw/gpio.c                          |  163 +++++
 src/sw/gpio.h                          |   21 +
 src/sw/novena-fpga.c                   | 1032 ++++++++++++++++++++++++++++++++
 26 files changed, 6764 insertions(+)

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b52e01f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+novena_eim
+==========
+
+The coretest system for the Novena PVT1, over EIM.
+
+## Introduction ##
+
+src/rtl is a fork of https://github.com/bunnie/novena-spi-romulator.git
+
+src/sw is a fork of https://github.com/bunnie/novena-fpga-drivers.git
+
+## Status ##
+***(2014-09-30)***
+Initial commit of unchanged code. Does not build.
diff --git a/src/rtl/common/reg_ro.v b/src/rtl/common/reg_ro.v
new file mode 100755
index 0000000..5eb9186
--- /dev/null
+++ b/src/rtl/common/reg_ro.v
@@ -0,0 +1,24 @@
+module reg_ro(
+	      input wire clk,
+	      input wire [18:0] bus_a,
+	      input wire [18:0] my_a,
+	      input wire [15:0] reg_d,
+	      input wire re,
+	      output reg [15:0] bus_d
+	      );
+
+   reg [15:0] 			 state;
+
+   always @(posedge clk) begin
+      state <= reg_d;
+   end
+
+   always @(bus_a or my_a or re) begin
+      if( (bus_a[18:1] == my_a[18:1]) && re ) begin
+	 bus_d = state;
+      end else begin
+	 bus_d = 16'hZZZZ;
+      end
+   end
+
+endmodule // reg_ro
diff --git a/src/rtl/common/reg_ro_4burst.v b/src/rtl/common/reg_ro_4burst.v
new file mode 100755
index 0000000..b046493
--- /dev/null
+++ b/src/rtl/common/reg_ro_4burst.v
@@ -0,0 +1,79 @@
+module reg_ro_4burst(
+		     input wire clk,
+		     input wire [15:0] bus_ad, // raw mux data
+		     input wire [18:0] my_a,
+		     input wire [2:0] bus_a, // high address bits
+		     input wire adv, // active high, so connect to !EIM_LBA
+		     input wire rw,  // low = write, high = read, so connect to EIM_RW
+		     input wire cs,  // active high, so connect to !EIM_CS[1]
+		     input wire [63:0] reg_d,
+		     output reg [15:0] rbk_d, // readback tri-state interface
+		     output wire strobe
+	      );
+
+   reg [2:0] 			 bcount;
+   reg 				 activated;
+   reg [15:0] 			 bus_ad_r;
+   reg 				 cs_r;
+   reg [2:0] 			 bus_a_r;
+   reg 				 rw_r;
+   reg 				 adv_r;
+   reg [2:0] 			 activated_d;
+   
+   always @(posedge clk) begin
+      activated_d[2:0] <= {activated_d[1:0],activated};
+   end
+   // delay a couple cycles to avoid pre-mature changing of read data
+   assign strobe = activated_d[2] & !activated_d[1]; // pulse on falling edge of activated
+   
+   ////// address decode path
+   always @(posedge clk) begin
+      bus_ad_r <= bus_ad;
+      bus_a_r <= bus_a;
+      cs_r <= cs;
+      rw_r <= rw;
+      adv_r <= adv;
+      
+      if( cs_r && adv_r && ({bus_a_r, bus_ad_r} == my_a) ) begin
+	 activated <= 1'b1;
+	 bcount <= 3'b0;
+      end else if( !cs_r ) begin
+	 activated <= 1'b0;
+	 bcount <= 3'b0;
+      end else begin
+	 activated <= activated;
+	 // chip select is active, and we're beyond the address latch stage
+	 if( bcount <= 3'b111 ) begin
+	    bcount <= bcount + 3'b01;
+	 end else begin
+	    bcount <= bcount;
+	 end
+      end // else: !if( !cs )
+   end // always @ (posedge clk)
+
+   always @(*) begin
+      if( activated && rw_r ) begin
+	 case (bcount) // bcount is delayed by one due to adr-to-oe turnaround provision
+	   3'b0001: begin
+	      rbk_d = reg_d[15:0];
+	   end
+	   3'b010: begin
+	      rbk_d = reg_d[31:16];
+	   end
+	   3'b011: begin
+	      rbk_d = reg_d[47:32];
+	   end
+	   3'b100: begin
+	      rbk_d = reg_d[63:48];
+	   end
+	   default: begin
+	      rbk_d = 16'hZZZZ;
+	   end
+	 endcase // case (bcount)
+      end else begin // if ( activated && rw )
+	 rbk_d = 16'hZZZZ;
+      end // else: !if( activated && rw )
+   end
+
+endmodule // reg_wo_4burst
+
diff --git a/src/rtl/common/reg_wo.v b/src/rtl/common/reg_wo.v
new file mode 100755
index 0000000..a592244
--- /dev/null
+++ b/src/rtl/common/reg_wo.v
@@ -0,0 +1,32 @@
+module reg_wo(
+	      input wire clk,
+	      input wire [18:0] bus_a,
+	      input wire [18:0] my_a,
+	      input wire [15:0] bus_d,
+	      input wire we,
+	      input wire re,
+	      output wire [15:0] reg_d,
+	      output reg [15:0] rbk_d
+	      );
+
+   reg [15:0] 			 state;
+
+   always @(posedge clk) begin
+      if( (bus_a[18:1] == my_a[18:1]) && we ) begin
+	 state <= bus_d;
+      end else begin
+	 state <= state;
+      end
+   end
+
+   assign reg_d = state;
+
+   always @(bus_a or my_a or re or state) begin
+      if( (bus_a[18:1] == my_a[18:1]) && re ) begin
+	 rbk_d = state;
+      end else begin
+	 rbk_d = 16'hZZZZ;
+      end
+   end
+
+endmodule // reg_wo
diff --git a/src/rtl/common/reg_wo_4burst.v b/src/rtl/common/reg_wo_4burst.v
new file mode 100755
index 0000000..5e0c9e0
--- /dev/null
+++ b/src/rtl/common/reg_wo_4burst.v
@@ -0,0 +1,106 @@
+module reg_wo_4burst(
+		     input wire clk,
+		     input wire [15:0] bus_ad, // raw mux data
+		     input wire [18:0] my_a,
+		     input wire [2:0] bus_a, // high address bits
+		     input wire adv, // active high, so connect to !EIM_LBA
+		     input wire rw,  // low = write, high = read, so connect to EIM_RW
+		     input wire cs,  // active high, so connect to !EIM_CS[1]
+		     output wire [63:0] reg_d,
+		     output reg [15:0] rbk_d, // readback tri-state interface
+		     output wire strobe // pulses one cycle after access is done
+	      );
+
+   reg [63:0] 			 bstate;
+   reg [2:0] 			 bcount;
+   reg 				 activated;
+   reg [15:0] 			 bus_ad_r;
+   reg 				 cs_r;
+   reg [2:0] 			 bus_a_r;
+   reg 				 rw_r;
+   reg 				 adv_r;
+   reg 				 activated_d;
+
+   always @(posedge clk) begin
+      activated_d <= activated;
+   end
+   assign strobe = activated_d & !activated; // pulse on falling edge of activated
+		  
+   ////// write path
+   always @(posedge clk) begin
+      bus_ad_r <= bus_ad;
+      bus_a_r <= bus_a;
+      cs_r <= cs;
+      rw_r <= rw;
+      adv_r <= adv;
+      
+      if( cs_r && adv_r && ({bus_a_r, bus_ad_r} == my_a) ) begin
+	 activated <= 1'b1;
+	 bcount <= 3'b0;
+      end else if( !cs_r ) begin
+	 activated <= 1'b0; 
+	 bcount <= 3'b0;
+      end else begin
+	 activated <= activated;
+	 // chip select is active, and we're beyond the address latch stage
+	 if( bcount <= 3'b111 ) begin 
+	    bcount <= bcount + 3'b01;
+	 end else begin
+	    bcount <= bcount;
+	 end
+      end // else: !if( !cs_r )
+   end // always @ (posedge clk)
+
+   always @(posedge clk) begin
+      if( activated && !rw_r ) begin
+	 case (bcount)
+	   3'b00: begin
+	      bstate[15:0] <= bus_ad_r;
+	   end
+	   3'b01: begin
+	      bstate[31:16] <= bus_ad_r;
+	   end
+	   3'b10: begin
+	      bstate[47:32] <= bus_ad_r;
+	   end
+	   3'b11: begin
+	      bstate[63:48] <= bus_ad_r;
+	   end
+	   default: begin
+	      bstate <= bstate;
+	   end
+	 endcase // case (bcount)
+      end else begin // if ( activated )
+	 bstate <= bstate;
+      end // else: !if( activated )
+
+   end // always @ (posedge clk)
+
+   assign reg_d = bstate;
+
+   always @(activated or bcount or rw_r or bstate) begin
+      if( activated && rw_r ) begin
+	 case (bcount) // bcount is delayed by one due to adr-to-oe turnaround provision
+	   3'b0001: begin
+	      rbk_d = bstate[15:0];
+	   end
+	   3'b010: begin
+	      rbk_d = bstate[31:16];
+	   end
+	   3'b011: begin
+	      rbk_d = bstate[47:32];
+	   end
+	   3'b100: begin
+	      rbk_d = bstate[63:48];
+	   end
+	   default: begin
+	      rbk_d = 16'hZZZZ;
+	   end
+	 endcase // case (bcount)
+      end else begin // if ( activated && rw )
+	 rbk_d = 16'hZZZZ;
+      end // else: !if( activated && rw )
+   end
+
+endmodule // reg_wo_4burst
+
diff --git a/src/rtl/common/rising_edge.v b/src/rtl/common/rising_edge.v
new file mode 100755
index 0000000..35684f7
--- /dev/null
+++ b/src/rtl/common/rising_edge.v
@@ -0,0 +1,14 @@
+module rising_edge(
+		   input wire clk,
+		   input wire level,
+		   output wire pulse
+		   );
+
+   reg 			       state;
+   
+   always @(posedge clk) begin
+      state <= level;
+   end
+   assign pulse = !state && level;
+
+endmodule // rising_edge
diff --git a/src/rtl/common/sync_reset.v b/src/rtl/common/sync_reset.v
new file mode 100755
index 0000000..9cdbd6d
--- /dev/null
+++ b/src/rtl/common/sync_reset.v
@@ -0,0 +1,61 @@
+////////////////////////////////////////////////
+// Copyright (c) 2012, Andrew "bunnie" Huang  
+// (bunnie _aht_ bunniestudios "dote" com)
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     Redistributions of source code must retain the above copyright
+//     notice, this list of conditions and the following disclaimer.
+//     Redistributions in binary form must reproduce the above copyright
+//     notice, this list of conditions and the following disclaimer in
+//     the documentation and/or other materials provided with the
+//     distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+////////////////////////////////////////////////
+
+/// according to Xilinx WP272, all flip flops are reset to a "known value"
+/// by GSR. You're supposed to trust that. Of course, this "known value"
+/// isn't very explicitly stated, searching through the xilinx manuals
+/// it seems everything defaults to 0 except for stuff that's presetable.
+
+/// anyways, this module generates a local, synchronized reset based upon
+/// a global reset. The idea is to instantiate one of these near every
+/// terminal reset sink, so as to avoid loading down a global reset network.
+
+/// this should optimize utilization and speed a bit, and also allow the
+/// synthesizer to get more aggressive about using larger primitives
+
+//////////
+// the input is the asychronous reset of interest
+// and the clock to synchronize it to
+// the output is a synchronized reset that is at least four clock cycles wide
+module sync_reset (
+		   input wire glbl_reset, // async reset
+		   input wire clk,
+		   output wire reset 
+	      );
+
+   wire [3:0]       reschain;
+   
+   FDPE fdres0( .Q(reschain[0]), .C(clk), .CE(1'b1), .D(1'b0), .PRE(glbl_reset) );
+   FDPE fdres1( .Q(reschain[1]), .C(clk), .CE(1'b1), .D(reschain[0]), .PRE(glbl_reset) );
+   FDPE fdres2( .Q(reschain[2]), .C(clk), .CE(1'b1), .D(reschain[1]), .PRE(glbl_reset) );
+   FDPE fdres3( .Q(reschain[3]), .C(clk), .CE(1'b1), .D(reschain[2]), .PRE(glbl_reset) );
+
+   assign reset = reschain[3];
+
+endmodule // sync_reset
diff --git a/src/rtl/ip/bclk_dll/bclk_dll.v b/src/rtl/ip/bclk_dll/bclk_dll.v
new file mode 100755
index 0000000..cd7aa5b
--- /dev/null
+++ b/src/rtl/ip/bclk_dll/bclk_dll.v
@@ -0,0 +1,148 @@
+// file: bclk_dll.v
+// 
+// (c) Copyright 2008 - 2011 Xilinx, Inc. All rights reserved.
+// 
+// This file contains confidential and proprietary information
+// of Xilinx, Inc. and is protected under U.S. and
+// international copyright and other intellectual property
+// laws.
+// 
+// DISCLAIMER
+// This disclaimer is not a license and does not grant any
+// rights to the materials distributed herewith. Except as
+// otherwise provided in a valid license issued to you by
+// Xilinx, and to the maximum extent permitted by applicable
+// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+// (2) Xilinx shall not be liable (whether in contract or tort,
+// including negligence, or under any other theory of
+// liability) for any loss or damage of any kind or nature
+// related to, arising under or in connection with these
+// materials, including for any direct, or any indirect,
+// special, incidental, or consequential loss or damage
+// (including loss of data, profits, goodwill, or any type of
+// loss or damage suffered as a result of any action brought
+// by a third party) even if such damage or loss was
+// reasonably foreseeable or Xilinx had been advised of the
+// possibility of the same.
+// 
+// CRITICAL APPLICATIONS
+// Xilinx products are not designed or intended to be fail-
+// safe, or for use in any application requiring fail-safe
+// performance, such as life-support or safety devices or
+// systems, Class III medical devices, nuclear facilities,
+// applications related to the deployment of airbags, or any
+// other applications that could lead to death, personal
+// injury, or severe property or environmental damage
+// (individually and collectively, "Critical
+// Applications"). Customer assumes the sole risk and
+// liability of any use of Xilinx products in Critical
+// Applications, subject only to applicable laws and
+// regulations governing limitations on product liability.
+// 
+// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+// PART OF THIS FILE AT ALL TIMES.
+// 
+//----------------------------------------------------------------------------
+// User entered comments
+//----------------------------------------------------------------------------
+// None
+//
+//----------------------------------------------------------------------------
+// "Output    Output      Phase     Duty      Pk-to-Pk        Phase"
+// "Clock    Freq (MHz) (degrees) Cycle (%) Jitter (ps)  Error (ps)"
+//----------------------------------------------------------------------------
+// CLK_OUT1___133.014______0.000______50.0______350.360____150.000
+//
+//----------------------------------------------------------------------------
+// "Input Clock   Freq (MHz)    Input Jitter (UI)"
+//----------------------------------------------------------------------------
+// __primary_________133.014_____________0.01
+
+`timescale 1ps/1ps
+
+(* CORE_GENERATION_INFO = "bclk_dll,clk_wiz_v4_1,{component_name=bclk_dll,use_phase_alignment=false,use_min_o_jitter=false,use_max_i_jitter=false,use_dyn_phase_shift=false,use_inclk_switchover=false,use_dyn_reconfig=false,feedback_source=FDBK_AUTO,primtype_sel=DCM_SP,num_out_clk=1,clkin1_period=7.518,clkin2_period=7.518,use_power_down=false,use_reset=true,use_locked=true,use_inclk_stopped=false,use_status=false,use_freeze=false,use_clk_valid=false,feedback_type=SINGLE,clock_mgr_type=AUTO [...]
+module bclk_dll
+ (// Clock in ports
+  input         clk133in,
+  // Clock out ports
+  output        clk133,
+  // Status and control signals
+  input         RESET,
+  output        LOCKED
+ );
+
+  // Input buffering
+  //------------------------------------
+  assign clkin1 = clk133in;
+
+
+  // Clocking primitive
+  //------------------------------------
+
+  // Instantiation of the DCM primitive
+  //    * Unused inputs are tied off
+  //    * Unused outputs are labeled unused
+  wire        psdone_unused;
+  wire        locked_int;
+  wire [7:0]  status_int;
+  wire clkfb;
+  wire clk0;
+  wire clkfx;
+
+  DCM_SP
+  #(.CLKDV_DIVIDE          (2.000),
+    .CLKFX_DIVIDE          (2),
+    .CLKFX_MULTIPLY        (2),
+    .CLKIN_DIVIDE_BY_2     ("FALSE"),
+    .CLKIN_PERIOD          (7.518),
+    .CLKOUT_PHASE_SHIFT    ("NONE"),
+    .CLK_FEEDBACK          ("NONE"),
+    .DESKEW_ADJUST         ("SYSTEM_SYNCHRONOUS"),
+    .PHASE_SHIFT           (0),
+    .STARTUP_WAIT          ("FALSE"))
+  dcm_sp_inst
+    // Input clock
+   (.CLKIN                 (clkin1),
+    .CLKFB                 (clkfb),
+    // Output clocks
+    .CLK0                  (clk0),
+    .CLK90                 (),
+    .CLK180                (),
+    .CLK270                (),
+    .CLK2X                 (),
+    .CLK2X180              (),
+    .CLKFX                 (clkfx),
+    .CLKFX180              (),
+    .CLKDV                 (),
+    // Ports for dynamic phase shift
+    .PSCLK                 (1'b0),
+    .PSEN                  (1'b0),
+    .PSINCDEC              (1'b0),
+    .PSDONE                (),
+    // Other control and status signals
+    .LOCKED                (locked_int),
+    .STATUS                (status_int),
+ 
+    .RST                   (RESET),
+    // Unused pin- tie low
+    .DSSEN                 (1'b0));
+
+    assign LOCKED = locked_int;
+
+  // Output buffering
+  //-----------------------------------
+  // no phase alignment active, connect to ground
+  assign clkfb = 1'b0;
+
+  BUFG clkout1_buf
+   (.O   (clk133),
+    .I   (clkfx));
+
+
+
+
+endmodule
diff --git a/src/rtl/ip/dcm_delay/dcm_delay.v b/src/rtl/ip/dcm_delay/dcm_delay.v
new file mode 100755
index 0000000..d422d2c
--- /dev/null
+++ b/src/rtl/ip/dcm_delay/dcm_delay.v
@@ -0,0 +1,168 @@
+// file: dcm_delay.v
+// 
+// (c) Copyright 2008 - 2011 Xilinx, Inc. All rights reserved.
+// 
+// This file contains confidential and proprietary information
+// of Xilinx, Inc. and is protected under U.S. and
+// international copyright and other intellectual property
+// laws.
+// 
+// DISCLAIMER
+// This disclaimer is not a license and does not grant any
+// rights to the materials distributed herewith. Except as
+// otherwise provided in a valid license issued to you by
+// Xilinx, and to the maximum extent permitted by applicable
+// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+// (2) Xilinx shall not be liable (whether in contract or tort,
+// including negligence, or under any other theory of
+// liability) for any loss or damage of any kind or nature
+// related to, arising under or in connection with these
+// materials, including for any direct, or any indirect,
+// special, incidental, or consequential loss or damage
+// (including loss of data, profits, goodwill, or any type of
+// loss or damage suffered as a result of any action brought
+// by a third party) even if such damage or loss was
+// reasonably foreseeable or Xilinx had been advised of the
+// possibility of the same.
+// 
+// CRITICAL APPLICATIONS
+// Xilinx products are not designed or intended to be fail-
+// safe, or for use in any application requiring fail-safe
+// performance, such as life-support or safety devices or
+// systems, Class III medical devices, nuclear facilities,
+// applications related to the deployment of airbags, or any
+// other applications that could lead to death, personal
+// injury, or severe property or environmental damage
+// (individually and collectively, "Critical
+// Applications"). Customer assumes the sole risk and
+// liability of any use of Xilinx products in Critical
+// Applications, subject only to applicable laws and
+// regulations governing limitations on product liability.
+// 
+// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+// PART OF THIS FILE AT ALL TIMES.
+// 
+//----------------------------------------------------------------------------
+// User entered comments
+//----------------------------------------------------------------------------
+// None
+//
+//----------------------------------------------------------------------------
+// "Output    Output      Phase     Duty      Pk-to-Pk        Phase"
+// "Clock    Freq (MHz) (degrees) Cycle (%) Jitter (ps)  Error (ps)"
+//----------------------------------------------------------------------------
+// CLK_OUT1___133.014______0.000______50.0______200.000____150.000
+// CLK_OUT2___133.014_____90.000______50.0______300.000____150.000
+// CLK_OUT3___133.014____180.000______50.0______300.000____150.000
+// CLK_OUT4___133.014____270.000______50.0______300.000____150.000
+//
+//----------------------------------------------------------------------------
+// "Input Clock   Freq (MHz)    Input Jitter (UI)"
+//----------------------------------------------------------------------------
+// __primary_________133.014____________0.010
+
+`timescale 1ps/1ps
+
+(* CORE_GENERATION_INFO = "dcm_delay,clk_wiz_v3_6,{component_name=dcm_delay,use_phase_alignment=true,use_min_o_jitter=false,use_max_i_jitter=false,use_dyn_phase_shift=false,use_inclk_switchover=false,use_dyn_reconfig=false,feedback_source=FDBK_ONCHIP,primtype_sel=DCM_SP,num_out_clk=4,clkin1_period=7.518,clkin2_period=7.518,use_power_down=false,use_reset=true,use_locked=true,use_inclk_stopped=false,use_status=false,use_freeze=false,use_clk_valid=false,feedback_type=SINGLE,clock_mgr_type=A [...]
+module dcm_delay
+ (// Clock in ports
+  input         clk133,
+  input         CLKFB_IN,
+  // Clock out ports
+  output        clk133out,
+  output        clk133_p90,
+  output        clk133_p180,
+  output        clk133_p270,
+  output        CLKFB_OUT,
+  // Status and control signals
+  input         RESET,
+  output        LOCKED
+ );
+
+  // Input buffering
+  //------------------------------------
+  assign clkin1 = clk133;
+
+
+  // Clocking primitive
+  //------------------------------------
+
+  // Instantiation of the DCM primitive
+  //    * Unused inputs are tied off
+  //    * Unused outputs are labeled unused
+  wire        psdone_unused;
+  wire        locked_int;
+  wire [7:0]  status_int;
+  wire clk0;
+  wire clk90;
+  wire clk180;
+  wire clk270;
+
+  DCM_SP
+  #(.CLKDV_DIVIDE          (2.000),
+    .CLKFX_DIVIDE          (1),
+    .CLKFX_MULTIPLY        (4),
+    .CLKIN_DIVIDE_BY_2     ("FALSE"),
+    .CLKIN_PERIOD          (7.518),
+    .CLKOUT_PHASE_SHIFT    ("NONE"),
+    .CLK_FEEDBACK          ("1X"),
+    .DESKEW_ADJUST         ("SYSTEM_SYNCHRONOUS"),
+    .PHASE_SHIFT           (0),
+    .STARTUP_WAIT          ("FALSE"))
+  dcm_sp_inst
+    // Input clock
+   (.CLKIN                 (clkin1),
+    .CLKFB                 (CLKFB_IN),
+    // Output clocks
+    .CLK0                  (clk0),
+    .CLK90                 (clk90),
+    .CLK180                (clk180),
+    .CLK270                (clk270),
+    .CLK2X                 (),
+    .CLK2X180              (),
+    .CLKFX                 (),
+    .CLKFX180              (),
+    .CLKDV                 (),
+    // Ports for dynamic phase shift
+    .PSCLK                 (1'b0),
+    .PSEN                  (1'b0),
+    .PSINCDEC              (1'b0),
+    .PSDONE                (),
+    // Other control and status signals
+    .LOCKED                (locked_int),
+    .STATUS                (status_int),
+ 
+    .RST                   (RESET),
+    // Unused pin- tie low
+    .DSSEN                 (1'b0));
+
+    assign LOCKED = locked_int;
+
+  // Output buffering
+  //-----------------------------------
+  assign CLKFB_OUT = clk0;
+
+  BUFG clkout1_buf
+   (.O   (clk133out),
+    .I   (clk0));
+
+
+  BUFG clkout2_buf
+   (.O   (clk133_p90),
+    .I   (clk90));
+
+  BUFG clkout3_buf
+   (.O   (clk133_p180),
+    .I   (clk180));
+
+  BUFG clkout4_buf
+   (.O   (clk133_p270),
+    .I   (clk270));
+
+
+
+endmodule
diff --git a/src/rtl/ip/ddr3_clkgen/ddr3_clkgen.v b/src/rtl/ip/ddr3_clkgen/ddr3_clkgen.v
new file mode 100755
index 0000000..6c2e504
--- /dev/null
+++ b/src/rtl/ip/ddr3_clkgen/ddr3_clkgen.v
@@ -0,0 +1,158 @@
+// file: ddr3_clkgen.v
+// 
+// (c) Copyright 2008 - 2011 Xilinx, Inc. All rights reserved.
+// 
+// This file contains confidential and proprietary information
+// of Xilinx, Inc. and is protected under U.S. and
+// international copyright and other intellectual property
+// laws.
+// 
+// DISCLAIMER
+// This disclaimer is not a license and does not grant any
+// rights to the materials distributed herewith. Except as
+// otherwise provided in a valid license issued to you by
+// Xilinx, and to the maximum extent permitted by applicable
+// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
+// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
+// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
+// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
+// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
+// (2) Xilinx shall not be liable (whether in contract or tort,
+// including negligence, or under any other theory of
+// liability) for any loss or damage of any kind or nature
+// related to, arising under or in connection with these
+// materials, including for any direct, or any indirect,
+// special, incidental, or consequential loss or damage
+// (including loss of data, profits, goodwill, or any type of
+// loss or damage suffered as a result of any action brought
+// by a third party) even if such damage or loss was
+// reasonably foreseeable or Xilinx had been advised of the
+// possibility of the same.
+// 
+// CRITICAL APPLICATIONS
+// Xilinx products are not designed or intended to be fail-
+// safe, or for use in any application requiring fail-safe
+// performance, such as life-support or safety devices or
+// systems, Class III medical devices, nuclear facilities,
+// applications related to the deployment of airbags, or any
+// other applications that could lead to death, personal
+// injury, or severe property or environmental damage
+// (individually and collectively, "Critical
+// Applications"). Customer assumes the sole risk and
+// liability of any use of Xilinx products in Critical
+// Applications, subject only to applicable laws and
+// regulations governing limitations on product liability.
+// 
+// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
+// PART OF THIS FILE AT ALL TIMES.
+// 
+//----------------------------------------------------------------------------
+// User entered comments
+//----------------------------------------------------------------------------
+// None
+//
+//----------------------------------------------------------------------------
+// "Output    Output      Phase     Duty      Pk-to-Pk        Phase"
+// "Clock    Freq (MHz) (degrees) Cycle (%) Jitter (ps)  Error (ps)"
+//----------------------------------------------------------------------------
+// CLK_OUT1____50.000______0.000______50.0______267.325____251.827
+// CLK_OUT2___400.000______0.000______50.0______180.270____251.827
+// CLK_OUT3___100.000______0.000______50.0______231.478____251.827
+//
+//----------------------------------------------------------------------------
+// "Input Clock   Freq (MHz)    Input Jitter (UI)"
+//----------------------------------------------------------------------------
+// __primary______________50____________0.010
+
+`timescale 1ps/1ps
+
+(* CORE_GENERATION_INFO = "ddr3_clkgen,clk_wiz_v4_1,{component_name=ddr3_clkgen,use_phase_alignment=true,use_min_o_jitter=false,use_max_i_jitter=false,use_dyn_phase_shift=false,use_inclk_switchover=false,use_dyn_reconfig=false,feedback_source=FDBK_AUTO,primtype_sel=PLL_BASE,num_out_clk=3,clkin1_period=20.0,clkin2_period=20.0,use_power_down=false,use_reset=true,use_locked=true,use_inclk_stopped=false,use_status=false,use_freeze=false,use_clk_valid=false,feedback_type=SINGLE,clock_mgr_type [...]
+module ddr3_clkgen
+ (// Clock in ports
+  input         clk50in,
+  // Clock out ports
+  output        clk50,
+  output        clk400,
+  output        clk100,
+  // Status and control signals
+  input         RESET,
+  output        LOCKED
+ );
+
+  // Input buffering
+  //------------------------------------
+  BUFG clkin1_buf
+   (.O (clkin1),
+    .I (clk50in));
+
+
+  // Clocking primitive
+  //------------------------------------
+  // Instantiation of the PLL primitive
+  //    * Unused inputs are tied off
+  //    * Unused outputs are labeled unused
+  wire [15:0] do_unused;
+  wire        drdy_unused;
+  wire        clkfbout;
+  wire        clkfbout_buf;
+  wire        clkout3_unused;
+  wire        clkout4_unused;
+  wire        clkout5_unused;
+
+  PLL_BASE
+  #(.BANDWIDTH              ("OPTIMIZED"),
+    .CLK_FEEDBACK           ("CLKFBOUT"),
+    .COMPENSATION           ("SYSTEM_SYNCHRONOUS"),
+    .DIVCLK_DIVIDE          (1),
+    .CLKFBOUT_MULT          (8),
+    .CLKFBOUT_PHASE         (0.000),
+    .CLKOUT0_DIVIDE         (8),
+    .CLKOUT0_PHASE          (0.000),
+    .CLKOUT0_DUTY_CYCLE     (0.500),
+    .CLKOUT1_DIVIDE         (1),
+    .CLKOUT1_PHASE          (0.000),
+    .CLKOUT1_DUTY_CYCLE     (0.500),
+    .CLKOUT2_DIVIDE         (4),
+    .CLKOUT2_PHASE          (0.000),
+    .CLKOUT2_DUTY_CYCLE     (0.500),
+    .CLKIN_PERIOD           (20.0),
+    .REF_JITTER             (0.010))
+  pll_base_inst
+    // Output clocks
+   (.CLKFBOUT              (clkfbout),
+    .CLKOUT0               (clkout0),
+    .CLKOUT1               (clkout1),
+    .CLKOUT2               (clkout2),
+    .CLKOUT3               (clkout3_unused),
+    .CLKOUT4               (clkout4_unused),
+    .CLKOUT5               (clkout5_unused),
+    // Status and control signals
+    .LOCKED                (LOCKED),
+    .RST                   (RESET),
+     // Input clock control
+    .CLKFBIN               (clkfbout_buf),
+    .CLKIN                 (clkin1));
+
+
+  // Output buffering
+  //-----------------------------------
+  BUFG clkf_buf
+   (.O (clkfbout_buf),
+    .I (clkfbout));
+
+  BUFG clkout1_buf
+   (.O   (clk50),
+    .I   (clkout0));
+
+
+  BUFG clkout2_buf
+   (.O   (clk400),
+    .I   (clkout1));
+
+  BUFG clkout3_buf
+   (.O   (clk100),
+    .I   (clkout2));
+
+
+
+endmodule
diff --git a/src/rtl/ip/eimram/eimram.v b/src/rtl/ip/eimram/eimram.v
new file mode 100755
index 0000000..bc0046c
--- /dev/null
+++ b/src/rtl/ip/eimram/eimram.v
@@ -0,0 +1,186 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used solely      *
+*     for design, simulation, implementation and creation of design files      *
+*     limited to Xilinx devices or technologies. Use with non-Xilinx           *
+*     devices or technologies is expressly prohibited and immediately          *
+*     terminates your license.                                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" SOLELY     *
+*     FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR XILINX DEVICES.  BY     *
+*     PROVIDING THIS DESIGN, CODE, OR INFORMATION AS ONE POSSIBLE              *
+*     IMPLEMENTATION OF THIS FEATURE, APPLICATION OR STANDARD, XILINX IS       *
+*     MAKING NO REPRESENTATION THAT THIS IMPLEMENTATION IS FREE FROM ANY       *
+*     CLAIMS OF INFRINGEMENT, AND YOU ARE RESPONSIBLE FOR OBTAINING ANY        *
+*     RIGHTS YOU MAY REQUIRE FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY        *
+*     DISCLAIMS ANY WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE    *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A    *
+*     PARTICULAR PURPOSE.                                                      *
+*                                                                              *
+*     Xilinx products are not intended for use in life support appliances,     *
+*     devices, or systems.  Use in such applications are expressly             *
+*     prohibited.                                                              *
+*                                                                              *
+*     (c) Copyright 1995-2014 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// You must compile the wrapper file eimram.v when simulating
+// the core, eimram. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+`timescale 1ns/1ps
+
+module eimram(
+  clka,
+  ena,
+  wea,
+  addra,
+  dina,
+  clkb,
+  addrb,
+  doutb
+);
+
+input clka;
+input ena;
+input [0 : 0] wea;
+input [14 : 0] addra;
+input [15 : 0] dina;
+input clkb;
+input [13 : 0] addrb;
+output [31 : 0] doutb;
+
+// synthesis translate_off
+
+  BLK_MEM_GEN_V7_3 #(
+    .C_ADDRA_WIDTH(15),
+    .C_ADDRB_WIDTH(14),
+    .C_ALGORITHM(1),
+    .C_AXI_ID_WIDTH(4),
+    .C_AXI_SLAVE_TYPE(0),
+    .C_AXI_TYPE(1),
+    .C_BYTE_SIZE(9),
+    .C_COMMON_CLK(0),
+    .C_DEFAULT_DATA("0"),
+    .C_DISABLE_WARN_BHV_COLL(0),
+    .C_DISABLE_WARN_BHV_RANGE(0),
+    .C_ENABLE_32BIT_ADDRESS(0),
+    .C_FAMILY("spartan6"),
+    .C_HAS_AXI_ID(0),
+    .C_HAS_ENA(1),
+    .C_HAS_ENB(0),
+    .C_HAS_INJECTERR(0),
+    .C_HAS_MEM_OUTPUT_REGS_A(0),
+    .C_HAS_MEM_OUTPUT_REGS_B(0),
+    .C_HAS_MUX_OUTPUT_REGS_A(0),
+    .C_HAS_MUX_OUTPUT_REGS_B(0),
+    .C_HAS_REGCEA(0),
+    .C_HAS_REGCEB(0),
+    .C_HAS_RSTA(0),
+    .C_HAS_RSTB(0),
+    .C_HAS_SOFTECC_INPUT_REGS_A(0),
+    .C_HAS_SOFTECC_OUTPUT_REGS_B(0),
+    .C_INIT_FILE("BlankString"),
+    .C_INIT_FILE_NAME("no_coe_file_loaded"),
+    .C_INITA_VAL("0"),
+    .C_INITB_VAL("0"),
+    .C_INTERFACE_TYPE(0),
+    .C_LOAD_INIT_FILE(0),
+    .C_MEM_TYPE(1),
+    .C_MUX_PIPELINE_STAGES(0),
+    .C_PRIM_TYPE(1),
+    .C_READ_DEPTH_A(32768),
+    .C_READ_DEPTH_B(16384),
+    .C_READ_WIDTH_A(16),
+    .C_READ_WIDTH_B(32),
+    .C_RST_PRIORITY_A("CE"),
+    .C_RST_PRIORITY_B("CE"),
+    .C_RST_TYPE("SYNC"),
+    .C_RSTRAM_A(0),
+    .C_RSTRAM_B(0),
+    .C_SIM_COLLISION_CHECK("ALL"),
+    .C_USE_BRAM_BLOCK(0),
+    .C_USE_BYTE_WEA(0),
+    .C_USE_BYTE_WEB(0),
+    .C_USE_DEFAULT_DATA(0),
+    .C_USE_ECC(0),
+    .C_USE_SOFTECC(0),
+    .C_WEA_WIDTH(1),
+    .C_WEB_WIDTH(1),
+    .C_WRITE_DEPTH_A(32768),
+    .C_WRITE_DEPTH_B(16384),
+    .C_WRITE_MODE_A("WRITE_FIRST"),
+    .C_WRITE_MODE_B("WRITE_FIRST"),
+    .C_WRITE_WIDTH_A(16),
+    .C_WRITE_WIDTH_B(32),
+    .C_XDEVICEFAMILY("spartan6")
+  )
+  inst (
+    .CLKA(clka),
+    .ENA(ena),
+    .WEA(wea),
+    .ADDRA(addra),
+    .DINA(dina),
+    .CLKB(clkb),
+    .ADDRB(addrb),
+    .DOUTB(doutb),
+    .RSTA(),
+    .REGCEA(),
+    .DOUTA(),
+    .RSTB(),
+    .ENB(),
+    .REGCEB(),
+    .WEB(),
+    .DINB(),
+    .INJECTSBITERR(),
+    .INJECTDBITERR(),
+    .SBITERR(),
+    .DBITERR(),
+    .RDADDRECC(),
+    .S_ACLK(),
+    .S_ARESETN(),
+    .S_AXI_AWID(),
+    .S_AXI_AWADDR(),
+    .S_AXI_AWLEN(),
+    .S_AXI_AWSIZE(),
+    .S_AXI_AWBURST(),
+    .S_AXI_AWVALID(),
+    .S_AXI_AWREADY(),
+    .S_AXI_WDATA(),
+    .S_AXI_WSTRB(),
+    .S_AXI_WLAST(),
+    .S_AXI_WVALID(),
+    .S_AXI_WREADY(),
+    .S_AXI_BID(),
+    .S_AXI_BRESP(),
+    .S_AXI_BVALID(),
+    .S_AXI_BREADY(),
+    .S_AXI_ARID(),
+    .S_AXI_ARADDR(),
+    .S_AXI_ARLEN(),
+    .S_AXI_ARSIZE(),
+    .S_AXI_ARBURST(),
+    .S_AXI_ARVALID(),
+    .S_AXI_ARREADY(),
+    .S_AXI_RID(),
+    .S_AXI_RDATA(),
+    .S_AXI_RRESP(),
+    .S_AXI_RLAST(),
+    .S_AXI_RVALID(),
+    .S_AXI_RREADY(),
+    .S_AXI_INJECTSBITERR(),
+    .S_AXI_INJECTDBITERR(),
+    .S_AXI_SBITERR(),
+    .S_AXI_DBITERR(),
+    .S_AXI_RDADDRECC()
+  );
+
+// synthesis translate_on
+
+endmodule
diff --git a/src/rtl/ip/nandadr_fifo/nandadr_fifo.v b/src/rtl/ip/nandadr_fifo/nandadr_fifo.v
new file mode 100755
index 0000000..a0edefe
--- /dev/null
+++ b/src/rtl/ip/nandadr_fifo/nandadr_fifo.v
@@ -0,0 +1,491 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used solely      *
+*     for design, simulation, implementation and creation of design files      *
+*     limited to Xilinx devices or technologies. Use with non-Xilinx           *
+*     devices or technologies is expressly prohibited and immediately          *
+*     terminates your license.                                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" SOLELY     *
+*     FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR XILINX DEVICES.  BY     *
+*     PROVIDING THIS DESIGN, CODE, OR INFORMATION AS ONE POSSIBLE              *
+*     IMPLEMENTATION OF THIS FEATURE, APPLICATION OR STANDARD, XILINX IS       *
+*     MAKING NO REPRESENTATION THAT THIS IMPLEMENTATION IS FREE FROM ANY       *
+*     CLAIMS OF INFRINGEMENT, AND YOU ARE RESPONSIBLE FOR OBTAINING ANY        *
+*     RIGHTS YOU MAY REQUIRE FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY        *
+*     DISCLAIMS ANY WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE    *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A    *
+*     PARTICULAR PURPOSE.                                                      *
+*                                                                              *
+*     Xilinx products are not intended for use in life support appliances,     *
+*     devices, or systems.  Use in such applications are expressly             *
+*     prohibited.                                                              *
+*                                                                              *
+*     (c) Copyright 1995-2013 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// You must compile the wrapper file nandadr_fifo.v when simulating
+// the core, nandadr_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+`timescale 1ns/1ps
+
+module nandadr_fifo(
+  rst,
+  wr_clk,
+  rd_clk,
+  din,
+  wr_en,
+  rd_en,
+  dout,
+  full,
+  overflow,
+  empty,
+  rd_data_count
+);
+
+input rst;
+input wr_clk;
+input rd_clk;
+input [29 : 0] din;
+input wr_en;
+input rd_en;
+output [29 : 0] dout;
+output full;
+output overflow;
+output empty;
+output [13 : 0] rd_data_count;
+
+// synthesis translate_off
+
+  FIFO_GENERATOR_V9_3 #(
+    .C_ADD_NGC_CONSTRAINT(0),
+    .C_APPLICATION_TYPE_AXIS(0),
+    .C_APPLICATION_TYPE_RACH(0),
+    .C_APPLICATION_TYPE_RDCH(0),
+    .C_APPLICATION_TYPE_WACH(0),
+    .C_APPLICATION_TYPE_WDCH(0),
+    .C_APPLICATION_TYPE_WRCH(0),
+    .C_AXI_ADDR_WIDTH(32),
+    .C_AXI_ARUSER_WIDTH(1),
+    .C_AXI_AWUSER_WIDTH(1),
+    .C_AXI_BUSER_WIDTH(1),
+    .C_AXI_DATA_WIDTH(64),
+    .C_AXI_ID_WIDTH(4),
+    .C_AXI_RUSER_WIDTH(1),
+    .C_AXI_TYPE(0),
+    .C_AXI_WUSER_WIDTH(1),
+    .C_AXIS_TDATA_WIDTH(64),
+    .C_AXIS_TDEST_WIDTH(4),
+    .C_AXIS_TID_WIDTH(8),
+    .C_AXIS_TKEEP_WIDTH(4),
+    .C_AXIS_TSTRB_WIDTH(4),
+    .C_AXIS_TUSER_WIDTH(4),
+    .C_AXIS_TYPE(0),
+    .C_COMMON_CLOCK(0),
+    .C_COUNT_TYPE(0),
+    .C_DATA_COUNT_WIDTH(14),
+    .C_DEFAULT_VALUE("BlankString"),
+    .C_DIN_WIDTH(30),
+    .C_DIN_WIDTH_AXIS(1),
+    .C_DIN_WIDTH_RACH(32),
+    .C_DIN_WIDTH_RDCH(64),
+    .C_DIN_WIDTH_WACH(32),
+    .C_DIN_WIDTH_WDCH(64),
+    .C_DIN_WIDTH_WRCH(2),
+    .C_DOUT_RST_VAL("0"),
+    .C_DOUT_WIDTH(30),
+    .C_ENABLE_RLOCS(0),
+    .C_ENABLE_RST_SYNC(1),
+    .C_ERROR_INJECTION_TYPE(0),
+    .C_ERROR_INJECTION_TYPE_AXIS(0),
+    .C_ERROR_INJECTION_TYPE_RACH(0),
+    .C_ERROR_INJECTION_TYPE_RDCH(0),
+    .C_ERROR_INJECTION_TYPE_WACH(0),
+    .C_ERROR_INJECTION_TYPE_WDCH(0),
+    .C_ERROR_INJECTION_TYPE_WRCH(0),
+    .C_FAMILY("spartan6"),
+    .C_FULL_FLAGS_RST_VAL(1),
+    .C_HAS_ALMOST_EMPTY(0),
+    .C_HAS_ALMOST_FULL(0),
+    .C_HAS_AXI_ARUSER(0),
+    .C_HAS_AXI_AWUSER(0),
+    .C_HAS_AXI_BUSER(0),
+    .C_HAS_AXI_RD_CHANNEL(0),
+    .C_HAS_AXI_RUSER(0),
+    .C_HAS_AXI_WR_CHANNEL(0),
+    .C_HAS_AXI_WUSER(0),
+    .C_HAS_AXIS_TDATA(0),
+    .C_HAS_AXIS_TDEST(0),
+    .C_HAS_AXIS_TID(0),
+    .C_HAS_AXIS_TKEEP(0),
+    .C_HAS_AXIS_TLAST(0),
+    .C_HAS_AXIS_TREADY(1),
+    .C_HAS_AXIS_TSTRB(0),
+    .C_HAS_AXIS_TUSER(0),
+    .C_HAS_BACKUP(0),
+    .C_HAS_DATA_COUNT(0),
+    .C_HAS_DATA_COUNTS_AXIS(0),
+    .C_HAS_DATA_COUNTS_RACH(0),
+    .C_HAS_DATA_COUNTS_RDCH(0),
+    .C_HAS_DATA_COUNTS_WACH(0),
+    .C_HAS_DATA_COUNTS_WDCH(0),
+    .C_HAS_DATA_COUNTS_WRCH(0),
+    .C_HAS_INT_CLK(0),
+    .C_HAS_MASTER_CE(0),
+    .C_HAS_MEMINIT_FILE(0),
+    .C_HAS_OVERFLOW(1),
+    .C_HAS_PROG_FLAGS_AXIS(0),
+    .C_HAS_PROG_FLAGS_RACH(0),
+    .C_HAS_PROG_FLAGS_RDCH(0),
+    .C_HAS_PROG_FLAGS_WACH(0),
+    .C_HAS_PROG_FLAGS_WDCH(0),
+    .C_HAS_PROG_FLAGS_WRCH(0),
+    .C_HAS_RD_DATA_COUNT(1),
+    .C_HAS_RD_RST(0),
+    .C_HAS_RST(1),
+    .C_HAS_SLAVE_CE(0),
+    .C_HAS_SRST(0),
+    .C_HAS_UNDERFLOW(0),
+    .C_HAS_VALID(0),
+    .C_HAS_WR_ACK(0),
+    .C_HAS_WR_DATA_COUNT(0),
+    .C_HAS_WR_RST(0),
+    .C_IMPLEMENTATION_TYPE(2),
+    .C_IMPLEMENTATION_TYPE_AXIS(1),
+    .C_IMPLEMENTATION_TYPE_RACH(1),
+    .C_IMPLEMENTATION_TYPE_RDCH(1),
+    .C_IMPLEMENTATION_TYPE_WACH(1),
+    .C_IMPLEMENTATION_TYPE_WDCH(1),
+    .C_IMPLEMENTATION_TYPE_WRCH(1),
+    .C_INIT_WR_PNTR_VAL(0),
+    .C_INTERFACE_TYPE(0),
+    .C_MEMORY_TYPE(1),
+    .C_MIF_FILE_NAME("BlankString"),
+    .C_MSGON_VAL(1),
+    .C_OPTIMIZATION_MODE(0),
+    .C_OVERFLOW_LOW(0),
+    .C_PRELOAD_LATENCY(1),
+    .C_PRELOAD_REGS(0),
+    .C_PRIM_FIFO_TYPE("8kx4"),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL(2),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_AXIS(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_RACH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_RDCH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WACH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WDCH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WRCH(1022),
+    .C_PROG_EMPTY_THRESH_NEGATE_VAL(3),
+    .C_PROG_EMPTY_TYPE(0),
+    .C_PROG_EMPTY_TYPE_AXIS(0),
+    .C_PROG_EMPTY_TYPE_RACH(0),
+    .C_PROG_EMPTY_TYPE_RDCH(0),
+    .C_PROG_EMPTY_TYPE_WACH(0),
+    .C_PROG_EMPTY_TYPE_WDCH(0),
+    .C_PROG_EMPTY_TYPE_WRCH(0),
+    .C_PROG_FULL_THRESH_ASSERT_VAL(16381),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_AXIS(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_RACH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_RDCH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WACH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WDCH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WRCH(1023),
+    .C_PROG_FULL_THRESH_NEGATE_VAL(16380),
+    .C_PROG_FULL_TYPE(0),
+    .C_PROG_FULL_TYPE_AXIS(0),
+    .C_PROG_FULL_TYPE_RACH(0),
+    .C_PROG_FULL_TYPE_RDCH(0),
+    .C_PROG_FULL_TYPE_WACH(0),
+    .C_PROG_FULL_TYPE_WDCH(0),
+    .C_PROG_FULL_TYPE_WRCH(0),
+    .C_RACH_TYPE(0),
+    .C_RD_DATA_COUNT_WIDTH(14),
+    .C_RD_DEPTH(16384),
+    .C_RD_FREQ(1),
+    .C_RD_PNTR_WIDTH(14),
+    .C_RDCH_TYPE(0),
+    .C_REG_SLICE_MODE_AXIS(0),
+    .C_REG_SLICE_MODE_RACH(0),
+    .C_REG_SLICE_MODE_RDCH(0),
+    .C_REG_SLICE_MODE_WACH(0),
+    .C_REG_SLICE_MODE_WDCH(0),
+    .C_REG_SLICE_MODE_WRCH(0),
+    .C_SYNCHRONIZER_STAGE(2),
+    .C_UNDERFLOW_LOW(0),
+    .C_USE_COMMON_OVERFLOW(0),
+    .C_USE_COMMON_UNDERFLOW(0),
+    .C_USE_DEFAULT_SETTINGS(0),
+    .C_USE_DOUT_RST(1),
+    .C_USE_ECC(0),
+    .C_USE_ECC_AXIS(0),
+    .C_USE_ECC_RACH(0),
+    .C_USE_ECC_RDCH(0),
+    .C_USE_ECC_WACH(0),
+    .C_USE_ECC_WDCH(0),
+    .C_USE_ECC_WRCH(0),
+    .C_USE_EMBEDDED_REG(0),
+    .C_USE_FIFO16_FLAGS(0),
+    .C_USE_FWFT_DATA_COUNT(0),
+    .C_VALID_LOW(0),
+    .C_WACH_TYPE(0),
+    .C_WDCH_TYPE(0),
+    .C_WR_ACK_LOW(0),
+    .C_WR_DATA_COUNT_WIDTH(14),
+    .C_WR_DEPTH(16384),
+    .C_WR_DEPTH_AXIS(1024),
+    .C_WR_DEPTH_RACH(16),
+    .C_WR_DEPTH_RDCH(1024),
+    .C_WR_DEPTH_WACH(16),
+    .C_WR_DEPTH_WDCH(1024),
+    .C_WR_DEPTH_WRCH(16),
+    .C_WR_FREQ(1),
+    .C_WR_PNTR_WIDTH(14),
+    .C_WR_PNTR_WIDTH_AXIS(10),
+    .C_WR_PNTR_WIDTH_RACH(4),
+    .C_WR_PNTR_WIDTH_RDCH(10),
+    .C_WR_PNTR_WIDTH_WACH(4),
+    .C_WR_PNTR_WIDTH_WDCH(10),
+    .C_WR_PNTR_WIDTH_WRCH(4),
+    .C_WR_RESPONSE_LATENCY(1),
+    .C_WRCH_TYPE(0)
+  )
+  inst (
+    .RST(rst),
+    .WR_CLK(wr_clk),
+    .RD_CLK(rd_clk),
+    .DIN(din),
+    .WR_EN(wr_en),
+    .RD_EN(rd_en),
+    .DOUT(dout),
+    .FULL(full),
+    .OVERFLOW(overflow),
+    .EMPTY(empty),
+    .RD_DATA_COUNT(rd_data_count),
+    .BACKUP(),
+    .BACKUP_MARKER(),
+    .CLK(),
+    .SRST(),
+    .WR_RST(),
+    .RD_RST(),
+    .PROG_EMPTY_THRESH(),
+    .PROG_EMPTY_THRESH_ASSERT(),
+    .PROG_EMPTY_THRESH_NEGATE(),
+    .PROG_FULL_THRESH(),
+    .PROG_FULL_THRESH_ASSERT(),
+    .PROG_FULL_THRESH_NEGATE(),
+    .INT_CLK(),
+    .INJECTDBITERR(),
+    .INJECTSBITERR(),
+    .ALMOST_FULL(),
+    .WR_ACK(),
+    .ALMOST_EMPTY(),
+    .VALID(),
+    .UNDERFLOW(),
+    .DATA_COUNT(),
+    .WR_DATA_COUNT(),
+    .PROG_FULL(),
+    .PROG_EMPTY(),
+    .SBITERR(),
+    .DBITERR(),
+    .M_ACLK(),
+    .S_ACLK(),
+    .S_ARESETN(),
+    .M_ACLK_EN(),
+    .S_ACLK_EN(),
+    .S_AXI_AWID(),
+    .S_AXI_AWADDR(),
+    .S_AXI_AWLEN(),
+    .S_AXI_AWSIZE(),
+    .S_AXI_AWBURST(),
+    .S_AXI_AWLOCK(),
+    .S_AXI_AWCACHE(),
+    .S_AXI_AWPROT(),
+    .S_AXI_AWQOS(),
+    .S_AXI_AWREGION(),
+    .S_AXI_AWUSER(),
+    .S_AXI_AWVALID(),
+    .S_AXI_AWREADY(),
+    .S_AXI_WID(),
+    .S_AXI_WDATA(),
+    .S_AXI_WSTRB(),
+    .S_AXI_WLAST(),
+    .S_AXI_WUSER(),
+    .S_AXI_WVALID(),
+    .S_AXI_WREADY(),
+    .S_AXI_BID(),
+    .S_AXI_BRESP(),
+    .S_AXI_BUSER(),
+    .S_AXI_BVALID(),
+    .S_AXI_BREADY(),
+    .M_AXI_AWID(),
+    .M_AXI_AWADDR(),
+    .M_AXI_AWLEN(),
+    .M_AXI_AWSIZE(),
+    .M_AXI_AWBURST(),
+    .M_AXI_AWLOCK(),
+    .M_AXI_AWCACHE(),
+    .M_AXI_AWPROT(),
+    .M_AXI_AWQOS(),
+    .M_AXI_AWREGION(),
+    .M_AXI_AWUSER(),
+    .M_AXI_AWVALID(),
+    .M_AXI_AWREADY(),
+    .M_AXI_WID(),
+    .M_AXI_WDATA(),
+    .M_AXI_WSTRB(),
+    .M_AXI_WLAST(),
+    .M_AXI_WUSER(),
+    .M_AXI_WVALID(),
+    .M_AXI_WREADY(),
+    .M_AXI_BID(),
+    .M_AXI_BRESP(),
+    .M_AXI_BUSER(),
+    .M_AXI_BVALID(),
+    .M_AXI_BREADY(),
+    .S_AXI_ARID(),
+    .S_AXI_ARADDR(),
+    .S_AXI_ARLEN(),
+    .S_AXI_ARSIZE(),
+    .S_AXI_ARBURST(),
+    .S_AXI_ARLOCK(),
+    .S_AXI_ARCACHE(),
+    .S_AXI_ARPROT(),
+    .S_AXI_ARQOS(),
+    .S_AXI_ARREGION(),
+    .S_AXI_ARUSER(),
+    .S_AXI_ARVALID(),
+    .S_AXI_ARREADY(),
+    .S_AXI_RID(),
+    .S_AXI_RDATA(),
+    .S_AXI_RRESP(),
+    .S_AXI_RLAST(),
+    .S_AXI_RUSER(),
+    .S_AXI_RVALID(),
+    .S_AXI_RREADY(),
+    .M_AXI_ARID(),
+    .M_AXI_ARADDR(),
+    .M_AXI_ARLEN(),
+    .M_AXI_ARSIZE(),
+    .M_AXI_ARBURST(),
+    .M_AXI_ARLOCK(),
+    .M_AXI_ARCACHE(),
+    .M_AXI_ARPROT(),
+    .M_AXI_ARQOS(),
+    .M_AXI_ARREGION(),
+    .M_AXI_ARUSER(),
+    .M_AXI_ARVALID(),
+    .M_AXI_ARREADY(),
+    .M_AXI_RID(),
+    .M_AXI_RDATA(),
+    .M_AXI_RRESP(),
+    .M_AXI_RLAST(),
+    .M_AXI_RUSER(),
+    .M_AXI_RVALID(),
+    .M_AXI_RREADY(),
+    .S_AXIS_TVALID(),
+    .S_AXIS_TREADY(),
+    .S_AXIS_TDATA(),
+    .S_AXIS_TSTRB(),
+    .S_AXIS_TKEEP(),
+    .S_AXIS_TLAST(),
+    .S_AXIS_TID(),
+    .S_AXIS_TDEST(),
+    .S_AXIS_TUSER(),
+    .M_AXIS_TVALID(),
+    .M_AXIS_TREADY(),
+    .M_AXIS_TDATA(),
+    .M_AXIS_TSTRB(),
+    .M_AXIS_TKEEP(),
+    .M_AXIS_TLAST(),
+    .M_AXIS_TID(),
+    .M_AXIS_TDEST(),
+    .M_AXIS_TUSER(),
+    .AXI_AW_INJECTSBITERR(),
+    .AXI_AW_INJECTDBITERR(),
+    .AXI_AW_PROG_FULL_THRESH(),
+    .AXI_AW_PROG_EMPTY_THRESH(),
+    .AXI_AW_DATA_COUNT(),
+    .AXI_AW_WR_DATA_COUNT(),
+    .AXI_AW_RD_DATA_COUNT(),
+    .AXI_AW_SBITERR(),
+    .AXI_AW_DBITERR(),
+    .AXI_AW_OVERFLOW(),
+    .AXI_AW_UNDERFLOW(),
+    .AXI_AW_PROG_FULL(),
+    .AXI_AW_PROG_EMPTY(),
+    .AXI_W_INJECTSBITERR(),
+    .AXI_W_INJECTDBITERR(),
+    .AXI_W_PROG_FULL_THRESH(),
+    .AXI_W_PROG_EMPTY_THRESH(),
+    .AXI_W_DATA_COUNT(),
+    .AXI_W_WR_DATA_COUNT(),
+    .AXI_W_RD_DATA_COUNT(),
+    .AXI_W_SBITERR(),
+    .AXI_W_DBITERR(),
+    .AXI_W_OVERFLOW(),
+    .AXI_W_UNDERFLOW(),
+    .AXI_B_INJECTSBITERR(),
+    .AXI_W_PROG_FULL(),
+    .AXI_W_PROG_EMPTY(),
+    .AXI_B_INJECTDBITERR(),
+    .AXI_B_PROG_FULL_THRESH(),
+    .AXI_B_PROG_EMPTY_THRESH(),
+    .AXI_B_DATA_COUNT(),
+    .AXI_B_WR_DATA_COUNT(),
+    .AXI_B_RD_DATA_COUNT(),
+    .AXI_B_SBITERR(),
+    .AXI_B_DBITERR(),
+    .AXI_B_OVERFLOW(),
+    .AXI_B_UNDERFLOW(),
+    .AXI_AR_INJECTSBITERR(),
+    .AXI_B_PROG_FULL(),
+    .AXI_B_PROG_EMPTY(),
+    .AXI_AR_INJECTDBITERR(),
+    .AXI_AR_PROG_FULL_THRESH(),
+    .AXI_AR_PROG_EMPTY_THRESH(),
+    .AXI_AR_DATA_COUNT(),
+    .AXI_AR_WR_DATA_COUNT(),
+    .AXI_AR_RD_DATA_COUNT(),
+    .AXI_AR_SBITERR(),
+    .AXI_AR_DBITERR(),
+    .AXI_AR_OVERFLOW(),
+    .AXI_AR_UNDERFLOW(),
+    .AXI_AR_PROG_FULL(),
+    .AXI_AR_PROG_EMPTY(),
+    .AXI_R_INJECTSBITERR(),
+    .AXI_R_INJECTDBITERR(),
+    .AXI_R_PROG_FULL_THRESH(),
+    .AXI_R_PROG_EMPTY_THRESH(),
+    .AXI_R_DATA_COUNT(),
+    .AXI_R_WR_DATA_COUNT(),
+    .AXI_R_RD_DATA_COUNT(),
+    .AXI_R_SBITERR(),
+    .AXI_R_DBITERR(),
+    .AXI_R_OVERFLOW(),
+    .AXI_R_UNDERFLOW(),
+    .AXIS_INJECTSBITERR(),
+    .AXI_R_PROG_FULL(),
+    .AXI_R_PROG_EMPTY(),
+    .AXIS_INJECTDBITERR(),
+    .AXIS_PROG_FULL_THRESH(),
+    .AXIS_PROG_EMPTY_THRESH(),
+    .AXIS_DATA_COUNT(),
+    .AXIS_WR_DATA_COUNT(),
+    .AXIS_RD_DATA_COUNT(),
+    .AXIS_SBITERR(),
+    .AXIS_DBITERR(),
+    .AXIS_OVERFLOW(),
+    .AXIS_UNDERFLOW(),
+    .AXIS_PROG_FULL(),
+    .AXIS_PROG_EMPTY()
+  );
+
+// synthesis translate_on
+
+endmodule
diff --git a/src/rtl/ip/uk_fifo/uk_fifo.v b/src/rtl/ip/uk_fifo/uk_fifo.v
new file mode 100755
index 0000000..97e4309
--- /dev/null
+++ b/src/rtl/ip/uk_fifo/uk_fifo.v
@@ -0,0 +1,491 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used solely      *
+*     for design, simulation, implementation and creation of design files      *
+*     limited to Xilinx devices or technologies. Use with non-Xilinx           *
+*     devices or technologies is expressly prohibited and immediately          *
+*     terminates your license.                                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS" SOLELY     *
+*     FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR XILINX DEVICES.  BY     *
+*     PROVIDING THIS DESIGN, CODE, OR INFORMATION AS ONE POSSIBLE              *
+*     IMPLEMENTATION OF THIS FEATURE, APPLICATION OR STANDARD, XILINX IS       *
+*     MAKING NO REPRESENTATION THAT THIS IMPLEMENTATION IS FREE FROM ANY       *
+*     CLAIMS OF INFRINGEMENT, AND YOU ARE RESPONSIBLE FOR OBTAINING ANY        *
+*     RIGHTS YOU MAY REQUIRE FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY        *
+*     DISCLAIMS ANY WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE    *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A    *
+*     PARTICULAR PURPOSE.                                                      *
+*                                                                              *
+*     Xilinx products are not intended for use in life support appliances,     *
+*     devices, or systems.  Use in such applications are expressly             *
+*     prohibited.                                                              *
+*                                                                              *
+*     (c) Copyright 1995-2013 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// You must compile the wrapper file uk_fifo.v when simulating
+// the core, uk_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+`timescale 1ns/1ps
+
+module uk_fifo(
+  rst,
+  wr_clk,
+  rd_clk,
+  din,
+  wr_en,
+  rd_en,
+  dout,
+  full,
+  overflow,
+  empty,
+  rd_data_count
+);
+
+input rst;
+input wr_clk;
+input rd_clk;
+input [7 : 0] din;
+input wr_en;
+input rd_en;
+output [7 : 0] dout;
+output full;
+output overflow;
+output empty;
+output [11 : 0] rd_data_count;
+
+// synthesis translate_off
+
+  FIFO_GENERATOR_V9_3 #(
+    .C_ADD_NGC_CONSTRAINT(0),
+    .C_APPLICATION_TYPE_AXIS(0),
+    .C_APPLICATION_TYPE_RACH(0),
+    .C_APPLICATION_TYPE_RDCH(0),
+    .C_APPLICATION_TYPE_WACH(0),
+    .C_APPLICATION_TYPE_WDCH(0),
+    .C_APPLICATION_TYPE_WRCH(0),
+    .C_AXI_ADDR_WIDTH(32),
+    .C_AXI_ARUSER_WIDTH(1),
+    .C_AXI_AWUSER_WIDTH(1),
+    .C_AXI_BUSER_WIDTH(1),
+    .C_AXI_DATA_WIDTH(64),
+    .C_AXI_ID_WIDTH(4),
+    .C_AXI_RUSER_WIDTH(1),
+    .C_AXI_TYPE(0),
+    .C_AXI_WUSER_WIDTH(1),
+    .C_AXIS_TDATA_WIDTH(64),
+    .C_AXIS_TDEST_WIDTH(4),
+    .C_AXIS_TID_WIDTH(8),
+    .C_AXIS_TKEEP_WIDTH(4),
+    .C_AXIS_TSTRB_WIDTH(4),
+    .C_AXIS_TUSER_WIDTH(4),
+    .C_AXIS_TYPE(0),
+    .C_COMMON_CLOCK(0),
+    .C_COUNT_TYPE(0),
+    .C_DATA_COUNT_WIDTH(12),
+    .C_DEFAULT_VALUE("BlankString"),
+    .C_DIN_WIDTH(8),
+    .C_DIN_WIDTH_AXIS(1),
+    .C_DIN_WIDTH_RACH(32),
+    .C_DIN_WIDTH_RDCH(64),
+    .C_DIN_WIDTH_WACH(32),
+    .C_DIN_WIDTH_WDCH(64),
+    .C_DIN_WIDTH_WRCH(2),
+    .C_DOUT_RST_VAL("0"),
+    .C_DOUT_WIDTH(8),
+    .C_ENABLE_RLOCS(0),
+    .C_ENABLE_RST_SYNC(1),
+    .C_ERROR_INJECTION_TYPE(0),
+    .C_ERROR_INJECTION_TYPE_AXIS(0),
+    .C_ERROR_INJECTION_TYPE_RACH(0),
+    .C_ERROR_INJECTION_TYPE_RDCH(0),
+    .C_ERROR_INJECTION_TYPE_WACH(0),
+    .C_ERROR_INJECTION_TYPE_WDCH(0),
+    .C_ERROR_INJECTION_TYPE_WRCH(0),
+    .C_FAMILY("spartan6"),
+    .C_FULL_FLAGS_RST_VAL(1),
+    .C_HAS_ALMOST_EMPTY(0),
+    .C_HAS_ALMOST_FULL(0),
+    .C_HAS_AXI_ARUSER(0),
+    .C_HAS_AXI_AWUSER(0),
+    .C_HAS_AXI_BUSER(0),
+    .C_HAS_AXI_RD_CHANNEL(0),
+    .C_HAS_AXI_RUSER(0),
+    .C_HAS_AXI_WR_CHANNEL(0),
+    .C_HAS_AXI_WUSER(0),
+    .C_HAS_AXIS_TDATA(0),
+    .C_HAS_AXIS_TDEST(0),
+    .C_HAS_AXIS_TID(0),
+    .C_HAS_AXIS_TKEEP(0),
+    .C_HAS_AXIS_TLAST(0),
+    .C_HAS_AXIS_TREADY(1),
+    .C_HAS_AXIS_TSTRB(0),
+    .C_HAS_AXIS_TUSER(0),
+    .C_HAS_BACKUP(0),
+    .C_HAS_DATA_COUNT(0),
+    .C_HAS_DATA_COUNTS_AXIS(0),
+    .C_HAS_DATA_COUNTS_RACH(0),
+    .C_HAS_DATA_COUNTS_RDCH(0),
+    .C_HAS_DATA_COUNTS_WACH(0),
+    .C_HAS_DATA_COUNTS_WDCH(0),
+    .C_HAS_DATA_COUNTS_WRCH(0),
+    .C_HAS_INT_CLK(0),
+    .C_HAS_MASTER_CE(0),
+    .C_HAS_MEMINIT_FILE(0),
+    .C_HAS_OVERFLOW(1),
+    .C_HAS_PROG_FLAGS_AXIS(0),
+    .C_HAS_PROG_FLAGS_RACH(0),
+    .C_HAS_PROG_FLAGS_RDCH(0),
+    .C_HAS_PROG_FLAGS_WACH(0),
+    .C_HAS_PROG_FLAGS_WDCH(0),
+    .C_HAS_PROG_FLAGS_WRCH(0),
+    .C_HAS_RD_DATA_COUNT(1),
+    .C_HAS_RD_RST(0),
+    .C_HAS_RST(1),
+    .C_HAS_SLAVE_CE(0),
+    .C_HAS_SRST(0),
+    .C_HAS_UNDERFLOW(0),
+    .C_HAS_VALID(0),
+    .C_HAS_WR_ACK(0),
+    .C_HAS_WR_DATA_COUNT(0),
+    .C_HAS_WR_RST(0),
+    .C_IMPLEMENTATION_TYPE(2),
+    .C_IMPLEMENTATION_TYPE_AXIS(1),
+    .C_IMPLEMENTATION_TYPE_RACH(1),
+    .C_IMPLEMENTATION_TYPE_RDCH(1),
+    .C_IMPLEMENTATION_TYPE_WACH(1),
+    .C_IMPLEMENTATION_TYPE_WDCH(1),
+    .C_IMPLEMENTATION_TYPE_WRCH(1),
+    .C_INIT_WR_PNTR_VAL(0),
+    .C_INTERFACE_TYPE(0),
+    .C_MEMORY_TYPE(1),
+    .C_MIF_FILE_NAME("BlankString"),
+    .C_MSGON_VAL(1),
+    .C_OPTIMIZATION_MODE(0),
+    .C_OVERFLOW_LOW(0),
+    .C_PRELOAD_LATENCY(1),
+    .C_PRELOAD_REGS(0),
+    .C_PRIM_FIFO_TYPE("4kx9"),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL(2),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_AXIS(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_RACH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_RDCH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WACH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WDCH(1022),
+    .C_PROG_EMPTY_THRESH_ASSERT_VAL_WRCH(1022),
+    .C_PROG_EMPTY_THRESH_NEGATE_VAL(3),
+    .C_PROG_EMPTY_TYPE(0),
+    .C_PROG_EMPTY_TYPE_AXIS(0),
+    .C_PROG_EMPTY_TYPE_RACH(0),
+    .C_PROG_EMPTY_TYPE_RDCH(0),
+    .C_PROG_EMPTY_TYPE_WACH(0),
+    .C_PROG_EMPTY_TYPE_WDCH(0),
+    .C_PROG_EMPTY_TYPE_WRCH(0),
+    .C_PROG_FULL_THRESH_ASSERT_VAL(4093),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_AXIS(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_RACH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_RDCH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WACH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WDCH(1023),
+    .C_PROG_FULL_THRESH_ASSERT_VAL_WRCH(1023),
+    .C_PROG_FULL_THRESH_NEGATE_VAL(4092),
+    .C_PROG_FULL_TYPE(0),
+    .C_PROG_FULL_TYPE_AXIS(0),
+    .C_PROG_FULL_TYPE_RACH(0),
+    .C_PROG_FULL_TYPE_RDCH(0),
+    .C_PROG_FULL_TYPE_WACH(0),
+    .C_PROG_FULL_TYPE_WDCH(0),
+    .C_PROG_FULL_TYPE_WRCH(0),
+    .C_RACH_TYPE(0),
+    .C_RD_DATA_COUNT_WIDTH(12),
+    .C_RD_DEPTH(4096),
+    .C_RD_FREQ(1),
+    .C_RD_PNTR_WIDTH(12),
+    .C_RDCH_TYPE(0),
+    .C_REG_SLICE_MODE_AXIS(0),
+    .C_REG_SLICE_MODE_RACH(0),
+    .C_REG_SLICE_MODE_RDCH(0),
+    .C_REG_SLICE_MODE_WACH(0),
+    .C_REG_SLICE_MODE_WDCH(0),
+    .C_REG_SLICE_MODE_WRCH(0),
+    .C_SYNCHRONIZER_STAGE(2),
+    .C_UNDERFLOW_LOW(0),
+    .C_USE_COMMON_OVERFLOW(0),
+    .C_USE_COMMON_UNDERFLOW(0),
+    .C_USE_DEFAULT_SETTINGS(0),
+    .C_USE_DOUT_RST(1),
+    .C_USE_ECC(0),
+    .C_USE_ECC_AXIS(0),
+    .C_USE_ECC_RACH(0),
+    .C_USE_ECC_RDCH(0),
+    .C_USE_ECC_WACH(0),
+    .C_USE_ECC_WDCH(0),
+    .C_USE_ECC_WRCH(0),
+    .C_USE_EMBEDDED_REG(0),
+    .C_USE_FIFO16_FLAGS(0),
+    .C_USE_FWFT_DATA_COUNT(0),
+    .C_VALID_LOW(0),
+    .C_WACH_TYPE(0),
+    .C_WDCH_TYPE(0),
+    .C_WR_ACK_LOW(0),
+    .C_WR_DATA_COUNT_WIDTH(12),
+    .C_WR_DEPTH(4096),
+    .C_WR_DEPTH_AXIS(1024),
+    .C_WR_DEPTH_RACH(16),
+    .C_WR_DEPTH_RDCH(1024),
+    .C_WR_DEPTH_WACH(16),
+    .C_WR_DEPTH_WDCH(1024),
+    .C_WR_DEPTH_WRCH(16),
+    .C_WR_FREQ(1),
+    .C_WR_PNTR_WIDTH(12),
+    .C_WR_PNTR_WIDTH_AXIS(10),
+    .C_WR_PNTR_WIDTH_RACH(4),
+    .C_WR_PNTR_WIDTH_RDCH(10),
+    .C_WR_PNTR_WIDTH_WACH(4),
+    .C_WR_PNTR_WIDTH_WDCH(10),
+    .C_WR_PNTR_WIDTH_WRCH(4),
+    .C_WR_RESPONSE_LATENCY(1),
+    .C_WRCH_TYPE(0)
+  )
+  inst (
+    .RST(rst),
+    .WR_CLK(wr_clk),
+    .RD_CLK(rd_clk),
+    .DIN(din),
+    .WR_EN(wr_en),
+    .RD_EN(rd_en),
+    .DOUT(dout),
+    .FULL(full),
+    .OVERFLOW(overflow),
+    .EMPTY(empty),
+    .RD_DATA_COUNT(rd_data_count),
+    .BACKUP(),
+    .BACKUP_MARKER(),
+    .CLK(),
+    .SRST(),
+    .WR_RST(),
+    .RD_RST(),
+    .PROG_EMPTY_THRESH(),
+    .PROG_EMPTY_THRESH_ASSERT(),
+    .PROG_EMPTY_THRESH_NEGATE(),
+    .PROG_FULL_THRESH(),
+    .PROG_FULL_THRESH_ASSERT(),
+    .PROG_FULL_THRESH_NEGATE(),
+    .INT_CLK(),
+    .INJECTDBITERR(),
+    .INJECTSBITERR(),
+    .ALMOST_FULL(),
+    .WR_ACK(),
+    .ALMOST_EMPTY(),
+    .VALID(),
+    .UNDERFLOW(),
+    .DATA_COUNT(),
+    .WR_DATA_COUNT(),
+    .PROG_FULL(),
+    .PROG_EMPTY(),
+    .SBITERR(),
+    .DBITERR(),
+    .M_ACLK(),
+    .S_ACLK(),
+    .S_ARESETN(),
+    .M_ACLK_EN(),
+    .S_ACLK_EN(),
+    .S_AXI_AWID(),
+    .S_AXI_AWADDR(),
+    .S_AXI_AWLEN(),
+    .S_AXI_AWSIZE(),
+    .S_AXI_AWBURST(),
+    .S_AXI_AWLOCK(),
+    .S_AXI_AWCACHE(),
+    .S_AXI_AWPROT(),
+    .S_AXI_AWQOS(),
+    .S_AXI_AWREGION(),
+    .S_AXI_AWUSER(),
+    .S_AXI_AWVALID(),
+    .S_AXI_AWREADY(),
+    .S_AXI_WID(),
+    .S_AXI_WDATA(),
+    .S_AXI_WSTRB(),
+    .S_AXI_WLAST(),
+    .S_AXI_WUSER(),
+    .S_AXI_WVALID(),
+    .S_AXI_WREADY(),
+    .S_AXI_BID(),
+    .S_AXI_BRESP(),
+    .S_AXI_BUSER(),
+    .S_AXI_BVALID(),
+    .S_AXI_BREADY(),
+    .M_AXI_AWID(),
+    .M_AXI_AWADDR(),
+    .M_AXI_AWLEN(),
+    .M_AXI_AWSIZE(),
+    .M_AXI_AWBURST(),
+    .M_AXI_AWLOCK(),
+    .M_AXI_AWCACHE(),
+    .M_AXI_AWPROT(),
+    .M_AXI_AWQOS(),
+    .M_AXI_AWREGION(),
+    .M_AXI_AWUSER(),
+    .M_AXI_AWVALID(),
+    .M_AXI_AWREADY(),
+    .M_AXI_WID(),
+    .M_AXI_WDATA(),
+    .M_AXI_WSTRB(),
+    .M_AXI_WLAST(),
+    .M_AXI_WUSER(),
+    .M_AXI_WVALID(),
+    .M_AXI_WREADY(),
+    .M_AXI_BID(),
+    .M_AXI_BRESP(),
+    .M_AXI_BUSER(),
+    .M_AXI_BVALID(),
+    .M_AXI_BREADY(),
+    .S_AXI_ARID(),
+    .S_AXI_ARADDR(),
+    .S_AXI_ARLEN(),
+    .S_AXI_ARSIZE(),
+    .S_AXI_ARBURST(),
+    .S_AXI_ARLOCK(),
+    .S_AXI_ARCACHE(),
+    .S_AXI_ARPROT(),
+    .S_AXI_ARQOS(),
+    .S_AXI_ARREGION(),
+    .S_AXI_ARUSER(),
+    .S_AXI_ARVALID(),
+    .S_AXI_ARREADY(),
+    .S_AXI_RID(),
+    .S_AXI_RDATA(),
+    .S_AXI_RRESP(),
+    .S_AXI_RLAST(),
+    .S_AXI_RUSER(),
+    .S_AXI_RVALID(),
+    .S_AXI_RREADY(),
+    .M_AXI_ARID(),
+    .M_AXI_ARADDR(),
+    .M_AXI_ARLEN(),
+    .M_AXI_ARSIZE(),
+    .M_AXI_ARBURST(),
+    .M_AXI_ARLOCK(),
+    .M_AXI_ARCACHE(),
+    .M_AXI_ARPROT(),
+    .M_AXI_ARQOS(),
+    .M_AXI_ARREGION(),
+    .M_AXI_ARUSER(),
+    .M_AXI_ARVALID(),
+    .M_AXI_ARREADY(),
+    .M_AXI_RID(),
+    .M_AXI_RDATA(),
+    .M_AXI_RRESP(),
+    .M_AXI_RLAST(),
+    .M_AXI_RUSER(),
+    .M_AXI_RVALID(),
+    .M_AXI_RREADY(),
+    .S_AXIS_TVALID(),
+    .S_AXIS_TREADY(),
+    .S_AXIS_TDATA(),
+    .S_AXIS_TSTRB(),
+    .S_AXIS_TKEEP(),
+    .S_AXIS_TLAST(),
+    .S_AXIS_TID(),
+    .S_AXIS_TDEST(),
+    .S_AXIS_TUSER(),
+    .M_AXIS_TVALID(),
+    .M_AXIS_TREADY(),
+    .M_AXIS_TDATA(),
+    .M_AXIS_TSTRB(),
+    .M_AXIS_TKEEP(),
+    .M_AXIS_TLAST(),
+    .M_AXIS_TID(),
+    .M_AXIS_TDEST(),
+    .M_AXIS_TUSER(),
+    .AXI_AW_INJECTSBITERR(),
+    .AXI_AW_INJECTDBITERR(),
+    .AXI_AW_PROG_FULL_THRESH(),
+    .AXI_AW_PROG_EMPTY_THRESH(),
+    .AXI_AW_DATA_COUNT(),
+    .AXI_AW_WR_DATA_COUNT(),
+    .AXI_AW_RD_DATA_COUNT(),
+    .AXI_AW_SBITERR(),
+    .AXI_AW_DBITERR(),
+    .AXI_AW_OVERFLOW(),
+    .AXI_AW_UNDERFLOW(),
+    .AXI_AW_PROG_FULL(),
+    .AXI_AW_PROG_EMPTY(),
+    .AXI_W_INJECTSBITERR(),
+    .AXI_W_INJECTDBITERR(),
+    .AXI_W_PROG_FULL_THRESH(),
+    .AXI_W_PROG_EMPTY_THRESH(),
+    .AXI_W_DATA_COUNT(),
+    .AXI_W_WR_DATA_COUNT(),
+    .AXI_W_RD_DATA_COUNT(),
+    .AXI_W_SBITERR(),
+    .AXI_W_DBITERR(),
+    .AXI_W_OVERFLOW(),
+    .AXI_W_UNDERFLOW(),
+    .AXI_B_INJECTSBITERR(),
+    .AXI_W_PROG_FULL(),
+    .AXI_W_PROG_EMPTY(),
+    .AXI_B_INJECTDBITERR(),
+    .AXI_B_PROG_FULL_THRESH(),
+    .AXI_B_PROG_EMPTY_THRESH(),
+    .AXI_B_DATA_COUNT(),
+    .AXI_B_WR_DATA_COUNT(),
+    .AXI_B_RD_DATA_COUNT(),
+    .AXI_B_SBITERR(),
+    .AXI_B_DBITERR(),
+    .AXI_B_OVERFLOW(),
+    .AXI_B_UNDERFLOW(),
+    .AXI_AR_INJECTSBITERR(),
+    .AXI_B_PROG_FULL(),
+    .AXI_B_PROG_EMPTY(),
+    .AXI_AR_INJECTDBITERR(),
+    .AXI_AR_PROG_FULL_THRESH(),
+    .AXI_AR_PROG_EMPTY_THRESH(),
+    .AXI_AR_DATA_COUNT(),
+    .AXI_AR_WR_DATA_COUNT(),
+    .AXI_AR_RD_DATA_COUNT(),
+    .AXI_AR_SBITERR(),
+    .AXI_AR_DBITERR(),
+    .AXI_AR_OVERFLOW(),
+    .AXI_AR_UNDERFLOW(),
+    .AXI_AR_PROG_FULL(),
+    .AXI_AR_PROG_EMPTY(),
+    .AXI_R_INJECTSBITERR(),
+    .AXI_R_INJECTDBITERR(),
+    .AXI_R_PROG_FULL_THRESH(),
+    .AXI_R_PROG_EMPTY_THRESH(),
+    .AXI_R_DATA_COUNT(),
+    .AXI_R_WR_DATA_COUNT(),
+    .AXI_R_RD_DATA_COUNT(),
+    .AXI_R_SBITERR(),
+    .AXI_R_DBITERR(),
+    .AXI_R_OVERFLOW(),
+    .AXI_R_UNDERFLOW(),
+    .AXIS_INJECTSBITERR(),
+    .AXI_R_PROG_FULL(),
+    .AXI_R_PROG_EMPTY(),
+    .AXIS_INJECTDBITERR(),
+    .AXIS_PROG_FULL_THRESH(),
+    .AXIS_PROG_EMPTY_THRESH(),
+    .AXIS_DATA_COUNT(),
+    .AXIS_WR_DATA_COUNT(),
+    .AXIS_RD_DATA_COUNT(),
+    .AXIS_SBITERR(),
+    .AXIS_DBITERR(),
+    .AXIS_OVERFLOW(),
+    .AXIS_UNDERFLOW(),
+    .AXIS_PROG_FULL(),
+    .AXIS_PROG_EMPTY()
+  );
+
+// synthesis translate_on
+
+endmodule
diff --git a/src/rtl/nand_log_tb.v b/src/rtl/nand_log_tb.v
new file mode 100755
index 0000000..51fa59d
--- /dev/null
+++ b/src/rtl/nand_log_tb.v
@@ -0,0 +1,225 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2013, Andrew "bunnie" Huang
+//
+// See the NOTICE file distributed with this work for additional 
+// information regarding copyright ownership.  The copyright holder 
+// licenses this file to you under the Apache License, Version 2.0 
+// (the "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// code distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//////////////////////////////////////////////////////////////////////////////
+`timescale 1ns / 1ps
+
+module nand_log_tb;
+   reg bclk;  // 133 Mhz clock
+   reg clk100;  // 100 Mhz clock
+
+   reg nand_re;
+   reg nand_we;
+   reg nand_ale;
+   reg nand_cle;
+   reg nand_cs;
+   reg nand_rb;
+   reg [7:0] nand_din;
+   reg [9:0] nand_uk;
+
+   reg 	     log_reset;
+   reg 	     log_run;
+   wire      log_cmd_error; // stuck high if cmd fifo overflowed during logging
+   wire      log_data_error; // stuck high if data fifo overflowed during logging
+   wire [26:0] log_entries; // number of entries currently in the log
+   
+   wire [3:0]  ddr3_wr_mask;
+   wire [31:0] ddr3_wr_data;
+   wire        ddr3_wr_en;
+   reg 	       ddr3_wr_full;
+   reg [6:0]   ddr3_wr_count;
+   wire        ddr3_cmd_clk;
+   wire [2:0]  ddr3_cmd_instr;
+   wire        ddr3_cmd_en;
+   wire [5:0]  ddr3_cmd_burstlen;
+   wire [29:0] ddr3_cmd_addr;
+   reg 	       ddr3_cmd_full;
+
+   wire [63:0] time_t_clk100; // note synched to clk100
+   reg 	       reset;
+
+nand_log uut(
+		  bclk,  // 133 Mhz clock
+		  clk100,  // 100 Mhz clock
+
+		  nand_re,
+		  nand_we,
+		  nand_ale,
+		  nand_cle,
+		  nand_cs,
+		  nand_rb,
+		  nand_din,
+		  nand_uk,
+
+		  log_reset,
+		  log_run,
+		  log_cmd_error, // stuck high if cmd fifo overflowed during logging
+		  log_data_error, // stuck high if data fifo overflowed during logging
+		  log_entries, // number of entries currently in the log
+
+		  ddr3_wr_mask,
+		  ddr3_wr_data,
+		  ddr3_wr_en,
+		  ddr3_wr_full,
+		  ddr3_wr_count,
+		  ddr3_cmd_clk,
+		  ddr3_cmd_instr,
+		  ddr3_cmd_en,
+		  ddr3_cmd_burstlen,
+		  ddr3_cmd_addr,
+		  ddr3_cmd_full,
+
+		  time_t_clk100, // note synched to clk100
+		  reset
+		);
+
+   parameter PERIOD_BCLK = 16'd8;   // 125 MHz (close to 133 MHz actual)
+   always begin
+      bclk = 1'b0;
+      #(PERIOD_BCLK/2) bclk = 1'b1;
+      #(PERIOD_BCLK/2);
+   end
+
+   parameter PERIOD_CLK100 = 16'd10;
+   always begin
+      clk100 = 1'b0;
+      #(PERIOD_CLK100/2) clk100 = 1'b1;
+      #(PERIOD_CLK100/2);
+   end
+
+   task nand_reset;
+      begin
+	 nand_we = 1'b1;
+	 nand_re = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_cle = 1'b0;
+	 nand_cs = 1'b1;
+	 nand_din = 8'hZZ;
+
+	 log_reset = 1'b1;
+	 log_run = 1'b0;
+	 #1000;
+	 log_reset = 1'b0;
+	 log_run = 1'b1;
+	 #1000;
+      end
+   endtask // nand_reset
+   
+   task nand_idle;
+      begin
+	 nand_we = 1'b1;
+	 nand_re = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_cle = 1'b0;
+	 nand_cs = 1'b1;
+	 nand_din = 8'hZZ;
+      end
+   endtask // nand_idle
+
+   task nand_read_id;
+      begin
+	 nand_cs = 1'b0;
+	 
+	 nand_cle = 1'b1;
+	 nand_we = 1'b0;
+	 nand_din = 8'h90;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_din = 8'h01;
+	 #20;
+
+	 nand_ale = 1'b1;
+	 #25;
+
+	 nand_we = 1'b0;
+	 nand_din = 8'h00;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_din = 8'h23;
+	 #20;
+
+	 nand_ale = 1'b0;
+
+	 #10;
+	 nand_re = 1'b0;
+	 nand_din = 8'h45;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 nand_din = 8'h67;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 nand_din = 8'h89;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 nand_din = 8'hAB;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 nand_din = 8'hCD;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // nand_read_id
+
+   initial begin
+      nand_re = 1;
+      nand_we = 1;
+      nand_ale = 0;
+      nand_cle = 0;
+      nand_rb = 1;
+      nand_din = 8'h00;
+      nand_uk[9:0] = 10'h0;
+      nand_cs = 1;
+
+      log_reset = 1'b0;
+      log_run = 1'b0;
+      ddr3_wr_full = 1'b0;
+      ddr3_wr_count = 7'b0;
+      ddr3_cmd_full = 1'b0;
+      
+      reset = 1;
+      #1000;
+      reset = 0;
+      nand_reset();
+      #1000;
+      
+      nand_idle();
+      #200;
+      nand_read_id();
+
+      $stop;
+
+      #1000;
+      $stop;
+   end
+   
+endmodule // nand_log_tb
+
+
diff --git a/src/rtl/novena.ucf b/src/rtl/novena.ucf
new file mode 100755
index 0000000..8d67f0e
--- /dev/null
+++ b/src/rtl/novena.ucf
@@ -0,0 +1,602 @@
+###  Autogenerated on 2013-May-10 01:38 by edifToUcf.py
+###  Extracting designator U800 from EDIF netlist novena-dvt.EDF
+
+### extended performance annotation
+CONFIG VCCAUX  = 3.3;
+# Valid values are 2.5 and 3.3
+CONFIG MCB_PERFORMANCE  = EXTENDED;
+
+### define setup/hold constraints for EIM
+OFFSET = IN 4125 ps VALID 4750 ps BEFORE "bclk";
+# this was also supposed to be IN 4125 ps VALID 625 ps BEFORE "bclk", but
+# the computation gives me -3.5ns slack on hold time, so...add 4750 ps to the hold again.
+#5200 ps is the true limit
+OFFSET = OUT 5100 ps AFTER "bclk";
+
+## 37ns / 2 = 18.5. 18.5 - 7ns c-q = 11.5ns. Give 8ns to tpd of buffers -> 3.5ns
+OFFSET = IN 18500 ps VALID 3500ps BEFORE "spiclk";
+OFFSET = OUT 3500 ps after "spiclk";
+NET "spiclk" TNM_NET = "spiclk_tnm";
+TIMESPEC TS_spiclk = PERIOD "spiclk_tnm" 37 ns;
+
+##################################################################################
+# Timing Ignore constraints for paths crossing the clock domain 
+##################################################################################
+#NET "u_ddr3_if/memc?_wrapper_inst/mcb_ui_top_inst/mcb_raw_wrapper_inst/selfrefresh_mcb_mode" TIG;
+#NET "u_ddr3_if/c?_pll_lock" TIG;
+#INST "u_ddr3_if/memc?_wrapper_inst/mcb_ui_top_inst/mcb_raw_wrapper_inst/gen_term_calib.mcb_soft_calibration_top_inst/mcb_soft_calibration_inst/DONE_SOFTANDHARD_CAL*" TIG;
+# NET "ddr3_reset_local" TIG;  ## removed
+
+NET "eim_d_t*" TIG;
+INST "oddr2_eim0" IOB =FORCE;
+INST "oddr2_eim1" IOB =FORCE;
+INST "oddr2_eim2" IOB =FORCE;
+INST "oddr2_eim3" IOB =FORCE;
+INST "oddr2_eim4" IOB =FORCE;
+INST "oddr2_eim5" IOB =FORCE;
+INST "oddr2_eim6" IOB =FORCE;
+INST "oddr2_eim7" IOB =FORCE;
+INST "oddr2_eim8" IOB =FORCE;
+INST "oddr2_eim9" IOB =FORCE;
+INST "oddr2_eimA" IOB =FORCE;
+INST "oddr2_eimB" IOB =FORCE;
+INST "oddr2_eimC" IOB =FORCE;
+INST "oddr2_eimD" IOB =FORCE;
+INST "oddr2_eimE" IOB =FORCE;
+INST "oddr2_eimF" IOB =FORCE;
+
+# eliminate false timing path of bclk to nand data output. Actual path is
+# from nand_re and has at a minimum 15ns available, typically much much more
+#NET "romulator_ddr3/nand_dout[*]" TIG;
+
+NET "reg_wo_40102/state[*]" TIG;
+NET "reg_wo_40100/state[*]" TIG;
+NET "reg_ro_41010/state[*]" TIG;
+# don't sweat the R/B signal, we have tons of margin on it
+
+#Please uncomment the below TIG if used in a design which enables self-refresh mode
+#NET "u_ddr3_if/memc?_wrapper_inst/mcb_ui_top_inst/mcb_raw_wrapper_inst/gen_term_calib.mcb_soft_calibration_top_inst/mcb_soft_calibration_inst/SELFREFRESH_MCB_REQ" TIG;
+     
+
+############################################################################
+## Memory Controller 1                               
+## Memory Device: DDR3_SDRAM->MT41J128M16XX-125 
+## Frequency: 400 MHz
+## Time Period: 2500 ps
+## Supported Part Numbers: MT41J128M16HA-125
+############################################################################
+
+############################################################################
+## Clock constraints                                                        
+############################################################################
+NET "u_ddr3_if/memc1_infrastructure_inst/sys_clk_ibufg" TNM_NET = "SYS_CLK1";
+############################################################################
+
+NET "clk" TNM_NET = "clk_tnm";
+NET "bclk" TNM_NET = "bclk_tnm";
+TIMESPEC TS_clk = PERIOD "clk_tnm" 50 MHz;
+TIMESPEC TS_bclk = PERIOD "bclk_tnm" 133 MHz;
+
+### a failed experiment to make timing close using DCM explicit phase shifts
+#NET "bclk_early" TNM_NET = "bclkp90_tnm";
+#TIMESPEC TS_bclkp90 = PERIOD "bclkp90_tnm" TS_bclk PHASE + 1870 ps; # spec in the phase shift manually
+#OFFSET = OUT 6870 ps AFTER "bclk_early"; # add in the phase shift manually for this clock
+
+NET "APOPTOSIS" LOC = K1;
+NET "APOPTOSIS" IOSTANDARD = LVCMOS33;
+
+NET "AUD6_TFS" LOC = A4;
+NET "AUD6_TFS" IOSTANDARD = LVCMOS33;
+NET "AUD6_TXC" LOC = B4;
+NET "AUD6_TXC" IOSTANDARD = LVCMOS33;
+NET "AUD6_TXD" LOC = A6;
+NET "AUD6_TXD" IOSTANDARD = LVCMOS33;
+NET "AUD_MCLK" LOC = H6;
+NET "AUD_MCLK" IOSTANDARD = LVCMOS33;
+NET "AUD_MIC_CLK" LOC = G3;
+NET "AUD_MIC_CLK" IOSTANDARD = LVCMOS33;
+NET "AUD_MIC_DAT" LOC = C5;
+NET "AUD_MIC_DAT" IOSTANDARD = LVCMOS33;
+
+NET "BATT_NRST" LOC = N1;
+NET "BATT_NRST" IOSTANDARD = LVCMOS33;
+NET "BATT_REFLASH_ALRT" LOC = N2;
+NET "BATT_REFLASH_ALRT" IOSTANDARD = LVCMOS33;
+
+NET "CLK2_N" LOC = H1;
+NET "CLK2_N" IOSTANDARD = LVDS_33;
+NET "CLK2_N" DIFF_TERM = "TRUE";
+NET "CLK2_P" LOC = H2;
+NET "CLK2_P" IOSTANDARD = LVDS_33;
+NET "CLK2_P" DIFF_TERM = "TRUE";
+
+NET "DDC_SCL" LOC = J6;
+NET "DDC_SCL" IOSTANDARD = LVCMOS33;
+NET "DDC_SDA" LOC = F2;
+NET "DDC_SDA" IOSTANDARD = LVCMOS33;
+
+NET "ECSPI3_MISO" LOC = A3;
+NET "ECSPI3_MISO" IOSTANDARD = LVCMOS33;
+# R13 
+NET "ECSPI3_MOSI" LOC = A2;
+NET "ECSPI3_MOSI" IOSTANDARD = LVCMOS33;
+NET "ECSPI3_RDY" LOC = A5;
+NET "ECSPI3_RDY" IOSTANDARD = LVCMOS33;
+# R15 
+NET "ECSPI3_SCLK" LOC = D9;
+NET "ECSPI3_SCLK" IOSTANDARD = LVCMOS33;
+NET "ECSPI3_SS2" LOC = B3;
+NET "ECSPI3_SS2" IOSTANDARD = LVCMOS33;
+
+NET "EIM_BCLK" LOC = C9;
+NET "EIM_BCLK" IOSTANDARD = LVCMOS33;
+NET "EIM_CS[0]" LOC = B11;
+NET "EIM_CS[0]" IOSTANDARD = LVCMOS33;
+NET "EIM_CS[1]" LOC = A15;
+NET "EIM_CS[1]" IOSTANDARD = LVCMOS33;
+
+NET "EIM_DA[0]" LOC = G9;
+NET "EIM_DA[0]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[0]" SLEW = FAST;
+NET "EIM_DA[1]" LOC = A10;
+NET "EIM_DA[1]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[1]" SLEW = FAST;
+NET "EIM_DA[2]" LOC = F9;
+NET "EIM_DA[2]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[2]" SLEW = FAST;
+NET "EIM_DA[3]" LOC = B9;
+NET "EIM_DA[3]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[3]" SLEW = FAST;
+NET "EIM_DA[4]" LOC = E13;
+NET "EIM_DA[4]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[4]" SLEW = FAST;
+NET "EIM_DA[5]" LOC = F13;
+NET "EIM_DA[5]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[5]" SLEW = FAST;
+NET "EIM_DA[6]" LOC = A9;
+NET "EIM_DA[6]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[6]" SLEW = FAST;
+NET "EIM_DA[7]" LOC = A8;
+NET "EIM_DA[7]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[7]" SLEW = FAST;
+NET "EIM_DA[8]" LOC = B8;
+NET "EIM_DA[8]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[8]" SLEW = FAST;
+NET "EIM_DA[9]" LOC = D8;
+NET "EIM_DA[9]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[9]" SLEW = FAST;
+NET "EIM_DA[10]" LOC = D11;
+NET "EIM_DA[10]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[10]" SLEW = FAST;
+NET "EIM_DA[11]" LOC = C8;
+NET "EIM_DA[11]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[11]" SLEW = FAST;
+NET "EIM_DA[12]" LOC = C7;
+NET "EIM_DA[12]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[12]" SLEW = FAST;
+NET "EIM_DA[13]" LOC = C11;
+NET "EIM_DA[13]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[13]" SLEW = FAST;
+NET "EIM_DA[14]" LOC = C4;
+NET "EIM_DA[14]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[14]" SLEW = FAST;
+NET "EIM_DA[15]" LOC = B6;
+NET "EIM_DA[15]" IOSTANDARD = LVCMOS33;
+NET "EIM_DA[15]" SLEW = FAST;
+
+NET "EIM_A[16]" LOC = A11;
+NET "EIM_A[16]" IOSTANDARD = LVCMOS33;
+NET "EIM_A[17]" LOC = B12;
+NET "EIM_A[17]" IOSTANDARD = LVCMOS33;
+NET "EIM_A[18]" LOC = D14;
+NET "EIM_A[18]" IOSTANDARD = LVCMOS33;
+
+NET "EIM_LBA" LOC = B14;
+NET "EIM_LBA" IOSTANDARD = LVCMOS33;
+NET "EIM_OE" LOC = C10;
+NET "EIM_OE" IOSTANDARD = LVCMOS33;
+NET "EIM_RW" LOC = C14;
+NET "EIM_RW" IOSTANDARD = LVCMOS33;
+NET "EIM_WAIT" LOC = A7;
+NET "EIM_WAIT" IOSTANDARD = LVCMOS33;
+
+#NET "FPGA_DONE" LOC = V17;
+#NET "FPGA_DONE" IOSTANDARD = LVCMOS33;
+#NET "FPGA_HSWAPEN" LOC = D4;
+#NET "FPGA_HSWAPEN" IOSTANDARD = LVCMOS33;
+#NET "FPGA_INIT_N" LOC = U3;
+#NET "FPGA_INIT_N" IOSTANDARD = TMDS_33;
+
+NET "FPGA_LED2" LOC = A16;
+NET "FPGA_LED2" IOSTANDARD = LVCMOS33;
+
+NET "FPGA_LSPI_CLK" LOC = D3;
+NET "FPGA_LSPI_CLK" IOSTANDARD = LVCMOS33;
+NET "FPGA_LSPI_CS" LOC = D1;
+NET "FPGA_LSPI_CS" IOSTANDARD = LVCMOS33;
+NET "FPGA_LSPI_HOLD" LOC = E3;
+NET "FPGA_LSPI_HOLD" IOSTANDARD = LVCMOS33;
+NET "FPGA_LSPI_MISO" LOC = D2;
+NET "FPGA_LSPI_MISO" IOSTANDARD = LVCMOS33;
+NET "FPGA_LSPI_MOSI" LOC = C2;
+NET "FPGA_LSPI_MOSI" IOSTANDARD = LVCMOS33;
+NET "FPGA_LSPI_WP" LOC = C1;
+NET "FPGA_LSPI_WP" IOSTANDARD = LVCMOS33;
+
+#NET "FPGA_M0" LOC = T15;
+#NET "FPGA_M0" IOSTANDARD = LVCMOS33;
+#NET "FPGA_M1" LOC = N12;
+#NET "FPGA_M1" IOSTANDARD = LVCMOS33;
+
+#NET "FPGA_RESET_N" LOC = V2;
+#NET "FPGA_RESET_N" IOSTANDARD = TMDS_33;
+#NET "FPGA_SUSPEND" LOC = R16;
+#NET "FPGA_SUSPEND" IOSTANDARD = LVCMOS33;
+#NET "FPGA_TCK" LOC = A17;
+#NET "FPGA_TCK" IOSTANDARD = LVCMOS33;
+#NET "FPGA_TDI" LOC = D15;
+#NET "FPGA_TDI" IOSTANDARD = LVCMOS33;
+#NET "FPGA_TDO" LOC = D16;
+#NET "FPGA_TDO" IOSTANDARD = LVCMOS33;
+#NET "FPGA_TMS" LOC = B18;
+#NET "FPGA_TMS" IOSTANDARD = LVCMOS33;
+
+# NET "GND" LOC = A1;# A18 B7 B13 C3 C16 D5 D10 E15 G2 G5 G12 G17 H8 H10 J4 J9 J11 J15 K8 K10 L9 L11 M2 M6 M17 N13 R1 R4 R9 R14 R18 T16 U6 U12 V1 V18 
+# NET "GND" IOSTANDARD = LVCMOS33;
+
+NET "I2C3_SCL" LOC = P4;
+NET "I2C3_SCL" IOSTANDARD = LVCMOS33;
+NET "I2C3_SDA" LOC = P3;
+NET "I2C3_SDA" IOSTANDARD = LVCMOS33;
+
+# NET "P1_2V" LOC = G7;# H9 H11 J8 J10 K9 K11 L8 L10 M7 M12 
+# NET "P1_2V" IOSTANDARD = LVCMOS33;
+# NET "P3_3V_DELAYED" LOC = B1;# B5 B10 B15 B17 D7 D13 E2 E5 E9 E10 E14 E17 G4 G10 G15 J2 J5 J12 J14 J17 K7 M4 M9 M15 P5 P9 P10 P14 R2 R6 R12 R17 U4 U9 U14 
+# NET "P3_3V_DELAYED" IOSTANDARD = LVCMOS33;
+
+NET "RESETBMCU" LOC = F1;
+NET "RESETBMCU" IOSTANDARD = LVCMOS33;
+
+NET "SMB_SCL" LOC = N3;
+NET "SMB_SCL" IOSTANDARD = LVCMOS33;
+NET "SMB_SDA" LOC = N4;
+NET "SMB_SDA" IOSTANDARD = LVCMOS33;
+
+NET "UART4_CTS" LOC = U1;
+NET "UART4_CTS" IOSTANDARD = LVCMOS33;
+NET "UART4_RTS" LOC = U2;
+NET "UART4_RTS" IOSTANDARD = LVCMOS33;
+NET "UART4_RXD" LOC = T1;
+NET "UART4_RXD" IOSTANDARD = LVCMOS33;
+NET "UART4_TXD" LOC = P1;
+NET "UART4_TXD" IOSTANDARD = LVCMOS33;
+
+#NET "UIM_CLK" LOC = B16;
+#NET "UIM_CLK" IOSTANDARD = LVCMOS33;
+#NET "UIM_DATA" LOC = A12;
+#NET "UIM_DATA" IOSTANDARD = LVCMOS33;
+#NET "UIM_PWR" LOC = C18;
+#NET "UIM_PWR" IOSTANDARD = SSTL15_II;
+#NET "UIM_PWRON" LOC = A14;
+#NET "UIM_PWRON" IOSTANDARD = LVCMOS33;
+#NET "UIM_RESET" LOC = C15;
+#NET "UIM_RESET" IOSTANDARD = LVCMOS33;
+
+##############
+# DDR3
+##############
+
+# NET "F_BA[2]" IOSTANDARD = SSTL15_II;
+# NET "F_BA[1]" IOSTANDARD = SSTL15_II;
+# NET "F_BA[0]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[13]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[12]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[11]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[10]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[9]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[8]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[7]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[6]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[5]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[4]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[3]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[2]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[1]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_A[0]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[15]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[14]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[13]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[12]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[11]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[10]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[9]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[8]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[7]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[6]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[5]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[4]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[3]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[2]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[1]" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_D[0]" IOSTANDARD = SSTL15_II;
+# NET "F_CAS_N" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_CKE" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_ODT" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_RST_N" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_RZQ" IOSTANDARD = SSTL15_II;
+# NET "F_DDR3_ZIO" IOSTANDARD = SSTL15_II;
+# NET "F_LDM" IOSTANDARD = SSTL15_II;
+# NET "F_RAS_N" IOSTANDARD = SSTL15_II;
+# NET "F_UDM" IOSTANDARD = SSTL15_II;
+# NET "F_WE_N" IOSTANDARD = SSTL15_II;
+
+
+# NET "F_BA[0]" LOC = H13;
+# NET "F_BA[1]" LOC = H14;
+# NET "F_BA[2]" LOC = K13;
+# NET "F_BA[0]" OUT_TERM = UNTUNED_50;
+# NET "F_BA[1]" OUT_TERM = UNTUNED_50;
+# NET "F_BA[2]" OUT_TERM = UNTUNED_50;
+
+# NET "F_CAS_N" LOC = K16;
+# NET "F_CAS_N" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_A[0]" LOC = H15;
+# NET "F_DDR3_A[1]" LOC = H16;
+# NET "F_DDR3_A[10]" LOC = E16;
+# NET "F_DDR3_A[11]" LOC = G14;
+# NET "F_DDR3_A[12]" LOC = D18;
+# NET "F_DDR3_A[13]" LOC = C17;
+# NET "F_DDR3_A[2]" LOC = F18;
+# NET "F_DDR3_A[3]" LOC = J13;
+# NET "F_DDR3_A[4]" LOC = E18;
+# NET "F_DDR3_A[5]" LOC = L12;
+# NET "F_DDR3_A[6]" LOC = L13;
+# NET "F_DDR3_A[7]" LOC = F17;
+# NET "F_DDR3_A[8]" LOC = H12;
+# NET "F_DDR3_A[9]" LOC = G13;
+# NET "F_DDR3_A[0]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[10]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[11]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[12]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[13]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[1]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[2]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[3]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[4]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[5]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[6]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[7]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[8]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_A[9]" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_CKE" LOC = D17;
+# NET "F_DDR3_CKE" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_CK_N" LOC = G18;
+# NET "F_DDR3_CK_N" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_DDR3_CK_N" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_CK_P" LOC = G16;
+# NET "F_DDR3_CK_P" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_DDR3_CK_P" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_D[0]" LOC = M16;
+# NET "F_DDR3_D[1]" LOC = M18;
+# NET "F_DDR3_D[10]" LOC = P17;
+# NET "F_DDR3_D[11]" LOC = P18;
+# NET "F_DDR3_D[12]" LOC = T17;
+# NET "F_DDR3_D[13]" LOC = T18;
+# NET "F_DDR3_D[14]" LOC = U17;
+# NET "F_DDR3_D[15]" LOC = U18;
+# NET "F_DDR3_D[2]" LOC = L17;
+# NET "F_DDR3_D[3]" LOC = L18;
+# NET "F_DDR3_D[4]" LOC = H17;
+# NET "F_DDR3_D[5]" LOC = H18;
+# NET "F_DDR3_D[6]" LOC = J16;
+# NET "F_DDR3_D[7]" LOC = J18;
+# NET "F_DDR3_D[8]" LOC = N17;
+# NET "F_DDR3_D[9]" LOC = N18;
+# NET "F_DDR3_D[0]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[10]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[11]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[12]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[13]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[14]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[15]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[1]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[2]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[3]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[4]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[5]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[6]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[7]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[8]" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_D[9]" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_RST_N" LOC = F14;
+# NET "F_DDR3_RST_N" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_ODT" LOC = K14;
+# NET "F_DDR3_ODT" OUT_TERM = UNTUNED_50;
+
+# NET "F_RAS_N" LOC = K15;
+# NET "F_RAS_N" OUT_TERM = UNTUNED_50;
+# NET "F_UDM" LOC = L15;
+# NET "F_UDM" OUT_TERM = UNTUNED_50;
+# NET "F_UDQS_N" LOC = N16;
+# NET "F_UDQS_N" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_UDQS_N" OUT_TERM = UNTUNED_50;
+# NET "F_UDQS_P" LOC = N15;
+# NET "F_UDQS_P" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_UDQS_P" OUT_TERM = UNTUNED_50;
+# NET "F_LDM" LOC = L16;
+# NET "F_LDM" OUT_TERM = UNTUNED_50;
+# NET "F_LDQS_N" LOC = K18;
+# NET "F_LDQS_N" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_LDQS_N" OUT_TERM = UNTUNED_50;
+# NET "F_LDQS_P" LOC = K17;
+# NET "F_LDQS_P" IOSTANDARD = DIFF_SSTL15_II;
+# NET "F_LDQS_P" OUT_TERM = UNTUNED_50;
+# NET "F_WE_N" LOC = K12;
+# NET "F_WE_N" OUT_TERM = UNTUNED_50;
+
+# NET "F_DDR3_RZQ" LOC = F15;
+# NET "F_DDR3_RZQ" OUT_TERM = UNTUNED_50;
+# NET "F_DDR3_ZIO" LOC = M14;
+# NET "F_DDR3_ZIO" OUT_TERM = UNTUNED_50;
+
+#NET "F_BA[*]" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_CAS_N" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_DDR3_A[*]" SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_DDR3_CKE" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+
+#NET "F_DDR3_D[*]" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_DDR3_RST_N" IOSTANDARD = SSTL15_II | OUT_TERM = UNTUNED_50;
+
+#NET "F_DDR3_ODT" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_RAS_N" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+#NET "F_UDM" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+#NET "F_LDM" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+#NET "F_WE_N" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+#NET "F_DDR3_RZQ" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+#NET "F_DDR3_ZIO" IOSTANDARD = SSTL15_II  | OUT_TERM = UNTUNED_50;
+
+
+##############
+# I/O connector
+##############
+# NET "F_DX0" LOC = K6;
+# NET "F_DX0" IOSTANDARD = LVCMOS33;
+# NET "F_DX0" SLEW = SLOW;
+#NET "F_DX1" LOC = L7;
+#NET "F_DX1" IOSTANDARD = LVCMOS33;
+#NET "F_DX1" SLEW = FAST;
+# NET "F_DX2" LOC = H3;
+# NET "F_DX2" IOSTANDARD = LVCMOS33;
+# NET "F_DX2" SLEW = SLOW;
+# NET "F_DX3" LOC = H4;
+# NET "F_DX3" IOSTANDARD = LVCMOS33;
+# NET "F_DX3" SLEW = SLOW;
+# NET "F_DX[4]" LOC = J1;
+# NET "F_DX[5]" LOC = J3;
+# NET "F_DX[6]" LOC = L3;
+# NET "F_DX[7]" LOC = L4;
+# NET "F_DX[8]" LOC = K2;
+# NET "F_DX11" LOC = M1;
+# NET "F_DX11" IOSTANDARD = LVCMOS33;
+# NET "F_DX11" SLEW = SLOW;
+# NET "F_DX[12]" LOC = M3;
+# NET "F_DX[13]" LOC = P2;
+# NET "F_DX14" LOC = T2;
+# NET "F_DX14" IOSTANDARD = LVCMOS33;
+# NET "F_DX14" SLEW = SLOW;
+# NET "F_DX[15]" LOC = M5;
+# NET "F_DX[16]" LOC = L6;
+# NET "F_DX17" LOC = G1;
+# NET "F_DX17" IOSTANDARD = LVCMOS33;
+# NET "F_DX17" SLEW = SLOW;
+NET "F_DX18" LOC = H7;
+NET "F_DX18" IOSTANDARD = LVCMOS33;
+NET "F_DX18" SLEW = SLOW;
+
+# NET "F_LVDS_N0" LOC = P6;
+# NET "F_LVDS_N0" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_N0" SLEW = SLOW;
+# NET "F_LVDS_P0" LOC = N5;
+# NET "F_LVDS_P0" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_P0" SLEW = SLOW;
+# NET "F_LVDS_N1" LOC = V4;
+# NET "F_LVDS_N1" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_N1" SLEW = SLOW;
+# NET "F_LVDS_P1" LOC = T4;
+# NET "F_LVDS_P1" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_P1" SLEW = SLOW;
+# NET "F_LVDS_N2" LOC = T3;
+# NET "F_LVDS_N2" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_N2" SLEW = SLOW;
+# NET "F_LVDS_P[2]" LOC = R3;
+# NET "F_LVDS_N[3]" LOC = V5;
+# NET "F_LVDS_P[3]" LOC = U5;
+# NET "F_LVDS_N4" LOC = T5;
+# NET "F_LVDS_N4" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_N4" SLEW = SLOW;
+# NET "F_LVDS_P4" LOC = R5;
+# NET "F_LVDS_P4" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_P4" SLEW = SLOW;
+# NET "F_LVDS_N[5]" LOC = T7;
+# NET "F_LVDS_P[5]" LOC = R7;
+# NET "F_LVDS_N[6]" LOC = V6;
+# NET "F_LVDS_P[6]" LOC = T6;
+NET "F_LVDS_N7" LOC = V7;
+NET "F_LVDS_N7" IOSTANDARD = LVCMOS33;
+NET "F_LVDS_N7" SLEW = SLOW;
+NET "F_LVDS_P7" LOC = U7;
+NET "F_LVDS_P7" IOSTANDARD = LVCMOS33;
+NET "F_LVDS_P7" SLEW = SLOW;
+# NET "F_LVDS_N[8]" LOC = V8;
+# NET "F_LVDS_P[8]" LOC = U8;
+# NET "F_LVDS_N[9]" LOC = V9;
+# NET "F_LVDS_P[9]" LOC = T9;
+# NET "F_LVDS_N[10]" LOC = V11;
+# NET "F_LVDS_P[10]" LOC = U11;
+# NET "F_LVDS_N11" LOC = T11;
+# NET "F_LVDS_N11" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_N11" SLEW = SLOW;
+NET "F_LVDS_P11" LOC = R11;
+NET "F_LVDS_P11" IOSTANDARD = LVCMOS33;
+NET "F_LVDS_P11" SLEW = FAST;
+# NET "F_LVDS_N[12]" LOC = V13;
+# NET "F_LVDS_P[12]" LOC = U13;
+# NET "F_LVDS_N[13]" LOC = V14;
+# NET "F_LVDS_P[13]" LOC = T14;
+# NET "F_LVDS_N[14]" LOC = V16;
+# NET "F_LVDS_P[14]" LOC = U16;
+#NET "F_LVDS_N15" LOC = V10;
+#NET "F_LVDS_N15" IOSTANDARD = LVCMOS33;
+#NET "F_LVDS_N15" SLEW = SLOW;
+NET "F_LVDS_P15" LOC = U10;
+NET "F_LVDS_P15" IOSTANDARD = LVCMOS33;
+NET "F_LVDS_P15" SLEW = SLOW;
+
+# NET "F_LVDS_P[*]" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
+# NET "F_LVDS_N[*]" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
+
+# NET "F_LVDS_NA" LOC = K3;
+# NET "F_LVDS_NA" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
+# NET "F_LVDS_PA" LOC = K4;
+# NET "F_LVDS_PA" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
+# NET "F_LVDS_NB" LOC = K5;
+# NET "F_LVDS_NB" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_NB" SLEW = SLOW;
+# NET "F_LVDS_PB" LOC = L5;
+# NET "F_LVDS_PB" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_PB" SLEW = SLOW;
+# NET "F_LVDS_NC" LOC = L1;
+# NET "F_LVDS_NC" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_NC" SLEW = SLOW;
+# NET "F_LVDS_PC" LOC = L2;
+# NET "F_LVDS_PC" IOSTANDARD = LVCMOS33;
+# NET "F_LVDS_PC" SLEW = SLOW;
+
+NET "F_LVDS_CK_N0" LOC = T8;
+NET "F_LVDS_CK_N0" IOSTANDARD = LVCMOS33;
+
+NET "F_LVDS_CK_P0" LOC = R8;
+NET "F_LVDS_CK_P0" IOSTANDARD = LVCMOS33;
+
+#NET "F_LVDS_CK_N1" LOC = T10;
+#NET "F_LVDS_CK_N1" IOSTANDARD = LVCMOS33;
+#NET "F_LVDS_CK_N1" SLEW = SLOW;
+#NET "F_LVDS_CK_P1" LOC = R10;
+#NET "F_LVDS_CK_P1" IOSTANDARD = LVCMOS33;
+#NET "F_LVDS_CK_P1" SLEW = SLOW;
+
+# NET "F_LVDS_CK_N[*]" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
+# NET "F_LVDS_CK_P[*]" IOSTANDARD = LVDS_33 | DIFF_TERM = TRUE;
diff --git a/src/rtl/novena_fpga.v b/src/rtl/novena_fpga.v
new file mode 100755
index 0000000..442bbf4
--- /dev/null
+++ b/src/rtl/novena_fpga.v
@@ -0,0 +1,659 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2013, Andrew "bunnie" Huang
+//
+// See the NOTICE file distributed with this work for additional 
+// information regarding copyright ownership.  The copyright holder 
+// licenses this file to you under the Apache License, Version 2.0 
+// (the "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// code distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//////////////////////////////////////////////////////////////////////////////
+`timescale 1ns / 1ps
+
+//`define USE_ROMULATOR 1
+
+module novena_fpga(
+		   output wire       APOPTOSIS,
+		   
+		   input wire AUD6_TFS,
+		   input wire AUD6_TXC,
+		   input wire AUD6_TXD,
+		   input wire AUD_MCLK,
+		   input wire AUD_MIC_CLK,
+		   input wire AUD_MIC_DAT,
+		   
+		   input wire BATT_NRST,
+		   input wire BATT_REFLASH_ALRT,
+		   
+		   input wire CLK2_N,
+		   input wire CLK2_P,
+		   
+		   input wire DDC_SCL,
+		   input wire DDC_SDA,
+		   
+		   output wire ECSPI3_MISO,
+		   input wire ECSPI3_MOSI,
+		   input wire ECSPI3_RDY,
+		   input wire ECSPI3_SCLK,
+		   input wire ECSPI3_SS2,
+		   
+		   input wire EIM_BCLK,
+		   input wire [1:0] EIM_CS,
+		   inout wire [15:0] EIM_DA,
+		   input wire [18:16] EIM_A,
+		   input wire EIM_LBA,
+		   input wire EIM_OE,
+		   input wire EIM_RW,
+		   input wire EIM_WAIT,
+		   
+		   output wire FPGA_LED2,
+		   input wire FPGA_LSPI_CLK,
+		   input wire FPGA_LSPI_CS,
+		   input wire FPGA_LSPI_HOLD,
+		   input wire FPGA_LSPI_MISO,
+		   input wire FPGA_LSPI_MOSI,
+		   input wire FPGA_LSPI_WP,
+		   
+		   input wire I2C3_SCL,
+		   input wire I2C3_SDA,
+		   
+		   input wire SMB_SCL,
+		   input wire SMB_SDA,
+		   
+		   input wire UART4_CTS,
+		   input wire UART4_RTS,
+		   input wire UART4_RXD,
+		   input wire UART4_TXD,
+		   
+		   // input wire UIM_CLK,
+		   // input wire UIM_DATA,
+		   // input wire UIM_PWR,
+		   // input wire UIM_PWRON,
+		   // input wire UIM_RESET,
+
+		   input wire  F_LVDS_CK_P0, // sck
+		   input wire  F_LVDS_CK_N0, // si
+		   input wire  F_DX18, // cs input
+		   output wire F_LVDS_P11,  // so
+		   output wire F_LVDS_P15,  // cs output
+
+		   output wire F_LVDS_N7, // drivers
+		   output wire F_LVDS_P7,
+		   
+		   input wire RESETBMCU
+	 );
+
+   wire [15:0] 		      eim_dout;
+   wire [15:0] 		      eim_din;
+   wire 		      clk;   // free-runs at 50 MHz, unbuffered
+   wire 		      clk50; // zero-delay, DLL version of above. Use this.
+   wire 		      clk100; // doubled-up version of the above. For time base applications.
+   wire 		      bclk;  // NOTE: doesn't run until first CPU access to EIM; then free-runs at 133 MHz
+   reg [23:0] 		      counter;
+   
+   wire 		      ddr3_dll_locked;
+   wire 		      ddr3clk;
+   
+   
+   wire 		      reset;
+   reg 			      emulate_r;
+
+   always @(posedge spiclk) begin
+      emulate_r <= emulate;
+   end
+
+   assign F_LVDS_N7 = !emulate_r; // drives so when low -- set to 1 for bypass mode
+   assign F_LVDS_P7 = 1'b0; // drives cs when low
+
+   // P15 is cs output to SPINOR
+   assign F_LVDS_P15 = emulate_r ? 1'b1 : F_DX18;
+//   assign F_LVDS_P15 = F_DX18; // set this for bypass mode
+//   assign F_LVDS_P15 = 1'b1; // set to 1 to disable SPINOR during emulation
+//   assign F_LVDS_P11 = 1'bz;
+
+   
+   ////////////////////////////////////
+   ///// MASTER RESET
+   ////////////////////////////////////
+   
+   sync_reset master_res_sync( .glbl_reset(!RESETBMCU), .clk(clk), .reset(reset) );
+     
+   wire 	      bclk_dll, bclk_div2_dll, bclk_div4_dll, bclk_locked;
+   wire 	      bclk_early;
+   
+   ////////////////////////////////////
+   ///// BCLK DLL -- generate zero-delay clock plus slower versions for internal use
+   ////////////////////////////////////
+   wire 	      bclk_int_in, bclk_io_in;
+   IBUFG   clkibufg (.I(EIM_BCLK), .O(bclk) );
+   BUFG    bclk_dll_bufg(.I(bclk), .O(bclk_int_in) );
+   
+   bclk_dll bclk_dll_mod( .clk133in(bclk_int_in), .clk133(bclk_dll),
+			  .RESET(reset), .LOCKED(bclk_locked));
+
+   wire 	      i_reset, i_locked;
+   wire 	      o_reset, o_locked;
+   wire 	      bclk_i, bclk_o;
+   wire 	      i_fbk_out, i_fbk_in;
+   wire 	      o_fbk_out, o_fbk_in;
+   
+   dcm_delay bclk_i_dll( .clk133(bclk_int_in), .clk133out(bclk_i),
+			  .CLKFB_IN(i_fbk_in), .CLKFB_OUT(i_fbk_out),
+			  .RESET(i_reset), .LOCKED(i_locked));
+
+   dcm_delay bclk_o_dll( .clk133(bclk_int_in), .clk133out(bclk_o),
+			  .CLKFB_IN(o_fbk_in), .CLKFB_OUT(o_fbk_out),
+			  .RESET(o_reset), .LOCKED(o_locked));
+   
+   // lock it to the input path
+   BUFIO2FB bclk_o_fbk(.I(bclk_o), .O(o_fbk_in));
+   // assign o_fbk_in = bclk_o;
+//   BUFG bclk_io_fbk(.I(bclk_io), .O(io_fbk_in));
+   
+   assign i_fbk_in = bclk_i;
+
+   ////////////////////////////////////
+   ///// Register set -- area-inefficient, high fan-out/in registers for controlling/monitoring internal signals
+   ///// All registers split into write or read only blanks
+   ///// 0x40000 - 0x40FFF is reserved for w/o
+   ///// 0x41000 - 0x41FFF is reserved for r/o
+   /////   -> if you want to check a w/o value, loop it back to an r/o register
+   ////////////////////////////////////
+   
+   reg 		      cs0_r, rw_r;
+   reg [15:0] 	      din_r;
+   reg [18:0] 	      bus_addr_r;
+   reg 		      adv_r;
+
+   reg 		      cs0_in, rw_in, adv_in;
+   reg [15:0] 	      din_in;
+   reg [2:0] 	      a_in;
+   
+   always @(posedge bclk_i) begin
+      cs0_in <= EIM_CS[0];
+      rw_in <= EIM_RW;
+      din_in <= eim_din;
+      adv_in <= !EIM_LBA; // latch address on LBA low
+      a_in <= EIM_A[18:16];
+
+      cs0_r <= cs0_in;
+      rw_r <= rw_in;
+      din_r <= din_in;
+      adv_r <= adv_in;
+   end
+   
+   always @(posedge bclk_i) begin 
+      if( adv_in ) begin
+	 bus_addr_r <= {a_in, din_in};
+      end else begin
+	 bus_addr_r <= bus_addr_r;
+      end
+   end
+
+   wire [15:0] r40000wo;
+   wire [15:0] r40002wo;
+
+   wire [15:0] ro_d;
+
+   //////// write-only registers
+   reg_wo reg_wo_40000 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h40000),
+			 .bus_d(din_r), .we(!cs0_r && !rw_r), .re(!cs0_r && rw_r), .rbk_d(ro_d), 
+			 .reg_d( r40000wo[15:0] ) );
+   
+   reg_wo reg_wo_40002 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h40002),
+			 .bus_d(din_r), .we(!cs0_r && !rw_r), .re(1'b0), .rbk_d(ro_d), // unreadable
+			 .reg_d( r40002wo[15:0] ) );
+
+   wire [15:0]        romulator_ctl;
+   wire 	      emulate;
+   wire 	      spi_uk_rd_en;
+   wire 	      spi_uk_rst;
+   wire 	      spi_out_rd_en;
+   wire 	      spi_out_rst;
+   wire 	      spi_adr_rd_en;
+   wire 	      spi_adr_rst;
+   assign emulate = romulator_ctl[0];       // 1
+   assign spi_uk_rd_en = romulator_ctl[1];  // 2
+   assign spi_uk_rst = romulator_ctl[2];    // 4
+   assign spi_adr_rd_en = romulator_ctl[3]; // 8
+   assign spi_adr_rst = romulator_ctl[4];   // 10
+   assign spi_out_rd_en = romulator_ctl[5];  // 20
+   assign spi_out_rst = romulator_ctl[6];    // 40
+   
+   reg_wo reg_wo_40010 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h40010),
+			 .bus_d(din_r), .we(!cs0_r && !rw_r), .re(!cs0_r && rw_r), 
+			 .rbk_d(ro_d), .reg_d( romulator_ctl[15:0] ) );
+
+
+   //////// read-only registers
+   // loopback readback
+   reg_ro reg_ro_41000 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41000),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( r40000wo[15:0] ) );
+
+   reg_ro reg_ro_41002 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41002),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( r40002wo[15:0] ) );
+
+
+   wire [15:0] 	      romulator_stat;
+   reg_ro reg_ro_41100 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41100),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_stat[15:0] ) );
+
+   wire [15:0] 	      romulator_count;
+   reg_ro reg_ro_41102 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41102),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_count[15:0] ) );
+
+   wire [15:0] 	      romulator_adr_stat;
+   reg_ro reg_ro_41104 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41104),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_adr_stat[15:0] ) );
+
+   wire [15:0] 	      romulator_adr_count;
+   reg_ro reg_ro_41106 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41106),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_adr_count[15:0] ) );
+
+   wire [23:0] 	      romulator_adr_dout;
+   reg_ro reg_ro_41108 ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41108),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_adr_dout[15:0] ) );
+
+   reg_ro reg_ro_4110A ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h4110A),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( {8'b0,romulator_adr_dout[23:16]} ) );
+
+   wire [15:0] 	      romulator_out_stat;
+   reg_ro reg_ro_4110C ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h4110C),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_out_stat[15:0] ) );
+
+   wire [15:0] 	      romulator_out_count;
+   reg_ro reg_ro_4110E ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h4110E),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( romulator_out_count[15:0] ) );
+   
+   ///////////////////////
+   ///////////////////////
+   // CS1 bank registers: minimum size here is 64-bit, tuned for synchronous burst access only
+   ///////////////////////
+
+   wire [63:0] 	     rC04_0000wo;
+   wire [63:0] 	     rC04_0008wo;
+   wire [15:0] 	     ro_d_b;
+   
+   ///////// write registers
+   // loopback test
+   reg_wo_4burst reg_wo_4b_C04_0000( .clk(bclk_i), .bus_ad(eim_din), .my_a(19'h4_0000), 
+				     .bus_a(EIM_A[18:16]), .adv(!EIM_LBA), .rw(EIM_RW), .cs(!EIM_CS[1]), 
+				     .reg_d( rC04_0000wo[63:0] ), .rbk_d(ro_d_b) );
+
+   reg_wo_4burst reg_wo_4b_C04_0008( .clk(bclk_i), .bus_ad(eim_din), .my_a(19'h4_0008),
+				     .bus_a(EIM_A[18:16]), .adv(!EIM_LBA), .rw(EIM_RW), .cs(!EIM_CS[1]),
+				     .reg_d( rC04_0008wo[63:0] ), .rbk_d(ro_d_b) );
+
+   ///////// read registers
+   // loopback test
+   reg_ro_4burst reg_ro_4b_C04_1000( .clk(bclk_i), .bus_ad(eim_din), .my_a(19'h4_1000),
+				     .bus_a(EIM_A[18:16]), .adv(!EIM_LBA), .rw(EIM_RW), .cs(!EIM_CS[1]),
+				     .reg_d( rC04_0000wo[63:0] ), .rbk_d(ro_d_b) );
+
+   reg_ro_4burst reg_ro_4b_C04_1008( .clk(bclk_i), .bus_ad(eim_din), .my_a(19'h4_1008),
+				     .bus_a(EIM_A[18:16]), .adv(!EIM_LBA), .rw(EIM_RW), .cs(!EIM_CS[1]),
+				     .reg_d( rC04_0008wo[63:0] ), .rbk_d(ro_d_b) );
+
+   // FPGA minor version code
+   reg_ro reg_ro_41FFC ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41FFC),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( 16'h0001 ) ); // minor version
+
+   // FPGA major version code
+   reg_ro reg_ro_41FFE ( .clk(bclk_dll), .bus_a(bus_addr_r), .my_a(19'h41FFE),
+			 .bus_d(ro_d), .re(!cs0_r && rw_r),
+			 .reg_d( 16'h000A ) ); // 000A is for the SPI romulator
+
+   ////////// VERSION LOG (major version 000A) /////////////
+   //////
+   // Minor version 0001, May 13 2014
+   //   Initial cull to the SPI ROM feature set
+   //
+   
+   // mux between block memory and register set based on high bits
+   //   assign eim_dout = (bus_addr[18:16] != 3'b000) ? ro_d : bram_dout;
+   // pipeline to improve timing
+   reg [15:0]		     ro_d_r;
+   reg [15:0] 		     ro_d_b_r;
+   reg [1:0] 		     eim_rdcs;
+   reg [15:0] 		     eim_dout_pipe;
+   reg [15:0] 		     eim_dout_pipe2;
+
+   always @(posedge bclk_i) begin
+      ro_d_b_r <= ro_d_b;
+   end
+   
+   always @(posedge bclk_dll) begin
+      ro_d_r <= ro_d;
+      eim_rdcs[1:0] <= EIM_CS[1:0];
+      eim_dout_pipe <= (eim_rdcs[1:0] == 2'b10) ? ro_d_r : ro_d_b_r;
+   end
+
+   always @(posedge bclk_o) begin
+      eim_dout_pipe2 <= eim_dout_pipe; // retime near the source to allow max time for wire delay
+   end;
+
+   wire [15:0] 	      spi_eeprom_rbk;
+   wire [7:0] 	      spi_uk_cmd;
+   wire 	      spi_uk_cmd_updated;
+
+   wire 	      spiclk_i;
+   wire 	      spiclk;
+   
+   IBUFG spi_clkibufg( .I(F_LVDS_CK_P0), .O(spiclk_i));
+   BUFG spi_clkbufg( .I(spiclk_i), .O(spiclk) );
+
+   wire [23:0] 	      spi_adr;
+   wire 	      spi_adr_updated;
+
+   wire [7:0] 	      spi_byte;
+   wire 	      spi_byte_updated;
+   
+   wire [7:0] 	      spi_obyte;
+   wire 	      spi_obyte_updated;
+   
+   spi_eeprom spi_eeprom(
+			 .sdout(F_LVDS_P11),
+			 .sdin(F_LVDS_CK_N0),
+			 .scs(F_DX18),
+			 .sclk(spiclk),
+			 .swp(1'b1),
+			 .shold(1'b1),
+
+			 .eim_clk(bclk_i),
+			 .bus_a(bus_addr_r),
+			 .bus_d(din_r),
+			 .we(!cs0_r && !rw_r),
+			 .re(!cs0_r && rw_r),
+			 .reg_d(spi_eeprom_rbk),
+			 .rbk_d(ro_d),
+		  
+			 .spi_uk_cmd(spi_uk_cmd),  // bitbucket for unknown commands
+			 .spi_uk_cmd_updated(spi_uk_cmd_updated),
+
+			 .spi_byte(spi_byte),
+			 .spi_byte_updated(spi_byte_updated),
+
+			 .spi_obyte(spi_obyte),
+			 .spi_obyte_updated(spi_obyte_updated),
+			 
+			 .spi_adr(spi_adr),
+			 .spi_adr_updated(spi_adr_updated),
+		  
+			 .reset(reset)
+			 );
+
+   wire [11:0] 	      spi_uk_rd_data_count;
+   wire 	      spi_uk_empty;
+   wire 	      spi_uk_overflow;
+   wire 	      spi_uk_full;
+   wire [7:0] 	      spi_uk_dout;
+   wire 	      spi_uk_rd_en_pulse;
+
+   rising_edge spi_re( .clk(bclk_dll),
+		       .level(spi_uk_rd_en),
+		       .pulse(spi_uk_rd_en_pulse)
+		       );
+
+   assign romulator_stat[7:0] = spi_uk_dout[7:0];
+   assign romulator_stat[8] = spi_uk_empty;
+   assign romulator_stat[9] = spi_uk_full;
+   assign romulator_stat[10] = spi_uk_overflow;
+   assign romulator_count[11:0] = spi_uk_rd_data_count[11:0];
+   
+   uk_fifo spi_uk (
+		   .rst(spi_uk_rst), // input rst
+//		   .wr_clk(spiclk), // input wr_clk
+		   .wr_clk(!spiclk), // input wr_clk  // invert for monitoring
+
+//		   .din(spi_uk_cmd), // input [7 : 0] din
+//		   .wr_en(spi_uk_cmd_updated), // input wr_en
+		   .din(spi_byte),
+		   .wr_en(spi_byte_updated),
+
+		   .rd_clk(bclk_dll), // input rd_clk
+		   .rd_en(spi_uk_rd_en_pulse), // input rd_en
+		   .dout(spi_uk_dout), // output [7 : 0] dout
+		   .full(spi_uk_full), // output full
+		   .overflow(spi_uk_overflow), // output overflow
+		   .empty(spi_uk_empty), // output empty
+		   .rd_data_count(spi_uk_rd_data_count) // output [11 : 0] rd_data_count
+		   );
+   
+   wire [11:0] 	      spi_out_rd_data_count;
+   wire 	      spi_out_empty;
+   wire 	      spi_out_overflow;
+   wire 	      spi_out_full;
+   wire [7:0] 	      spi_out_dout;
+   wire 	      spi_out_rd_en_pulse;
+
+   rising_edge spi_out_re( .clk(bclk_dll),
+			   .level(spi_out_rd_en),
+			   .pulse(spi_out_rd_en_pulse)
+			   );
+
+   assign romulator_out_stat[7:0] = spi_out_dout[7:0];
+   assign romulator_out_stat[8] = spi_out_empty;
+   assign romulator_out_stat[9] = spi_out_full;
+   assign romulator_out_stat[10] = spi_out_overflow;
+   assign romulator_out_count[11:0] = spi_out_rd_data_count[11:0];
+   
+   uk_fifo spi_out (
+		   .rst(spi_out_rst), // input rst
+		   .wr_clk(!spiclk), // input wr_clk
+		    // invert from negedge sampling of sdout
+
+		   .din(spi_obyte),
+		   .wr_en(spi_obyte_updated),
+
+		   .rd_clk(bclk_dll), // input rd_clk
+		   .rd_en(spi_out_rd_en_pulse), // input rd_en
+		   .dout(spi_out_dout), // output [7 : 0] dout
+		   .full(spi_out_full), // output full
+		   .overflow(spi_out_overflow), // output overflow
+		   .empty(spi_out_empty), // output empty
+		   .rd_data_count(spi_out_rd_data_count) // output [11 : 0] rd_data_count
+		   );
+   
+   wire 	      spi_adr_rd_en_pulse;
+   wire [23:0] 	      spi_adr_dout;
+   wire 	      spi_adr_full;
+   wire 	      spi_adr_overflow;
+   wire 	      spi_adr_empty;
+   wire [13:0] 	      spi_adr_rd_data_count;
+   assign romulator_adr_count[13:0] = spi_adr_rd_data_count[13:0];
+   assign romulator_adr_stat[8] = spi_adr_empty;
+   assign romulator_adr_stat[9] = spi_adr_full;
+   assign romulator_adr_stat[10] = spi_adr_overflow;
+
+   assign romulator_adr_dout[23:0] = spi_adr_dout[23:0];
+   
+   rising_edge spi_adr_re( .clk(bclk_dll),
+		       .level(spi_adr_rd_en),
+		       .pulse(spi_adr_rd_en_pulse)
+		       );
+   nandadr_fifo spi_adr_fifo (
+			 .rst(spi_adr_rst), // input rst
+			 .wr_clk(spiclk), // input wr_clk
+			 .rd_clk(bclk_dll), // input rd_clk
+			 .din({6'b0,spi_adr[23:0]}), // input [29 : 0] din
+			 .wr_en(spi_adr_updated), // input wr_en
+			 .rd_en(spi_adr_rd_en_pulse), // input rd_en
+			 .dout(spi_adr_dout[23:0]), // output [29 : 0] dout
+			 .full(spi_adr_full), // output full
+			 .overflow(spi_adr_overflow), // output overflow
+			 .empty(spi_adr_empty), // output empty
+			 .rd_data_count(spi_adr_rd_data_count) // output [13 : 0] rd_data_count
+			 );
+   
+   //////////////
+   // Output pipeline registers -- explicit instantiation as their LOCs are controlled in the UCF.
+   //////////////
+   FDSE oddr2_eim0( .D( eim_dout_pipe2[0] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[0] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim1( .D( eim_dout_pipe2[1] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[1] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim2( .D( eim_dout_pipe2[2] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[2] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim3( .D( eim_dout_pipe2[3] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[3] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+
+   FDSE oddr2_eim4( .D( eim_dout_pipe2[4] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[4] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim5( .D( eim_dout_pipe2[5] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[5] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim6( .D( eim_dout_pipe2[6] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[6] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim7( .D( eim_dout_pipe2[7] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[7] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+
+   FDSE oddr2_eim8( .D( eim_dout_pipe2[8] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[8] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eim9( .D( eim_dout_pipe2[9] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[9] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eimA( .D( eim_dout_pipe2[10] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[10] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eimB( .D( eim_dout_pipe2[11] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[11] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+
+   FDSE oddr2_eimC( .D( eim_dout_pipe2[12] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[12] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eimD( .D( eim_dout_pipe2[13] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[13] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eimE( .D( eim_dout_pipe2[14] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[14] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   FDSE oddr2_eimF( .D( eim_dout_pipe2[15] ),
+		     .C( bclk_o ),
+		     .Q( eim_dout[15] ),
+		     .CE( 1'b1 ), .S(1'b0) );
+   
+
+   //////////////
+   /// "heartbeat" counter
+   //////////////
+   always @(posedge clk50) begin
+      counter <= counter + 1;
+   end
+
+   assign FPGA_LED2 = counter[23];
+
+   //////////////
+   // IOBUFs as required by design
+   //////////////
+   IBUFGDS clkibufgds( .I(CLK2_P), .IB(CLK2_N), .O(clk) );
+
+   reg [15:0]	      eim_d_t;
+   reg 		      eim_lba_reg;
+   reg 		      eim_oe_reg;
+
+   always @(posedge bclk_i) begin
+      eim_lba_reg <= EIM_LBA;
+      eim_oe_reg <= EIM_OE;
+   end
+   
+   always @(posedge bclk_o) begin
+      eim_d_t[ 0] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 1] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 2] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 3] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 4] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 5] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 6] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 7] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 8] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[ 9] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[10] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[11] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[12] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[13] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[14] = eim_oe_reg | !eim_lba_reg;
+      eim_d_t[15] = eim_oe_reg | !eim_lba_reg;
+   end
+   
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim0 (.IO(EIM_DA[ 0]), .I(eim_dout[ 0]), .T(eim_d_t), .O(eim_din[ 0]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim1 (.IO(EIM_DA[ 1]), .I(eim_dout[ 1]), .T(eim_d_t), .O(eim_din[ 1]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim2 (.IO(EIM_DA[ 2]), .I(eim_dout[ 2]), .T(eim_d_t), .O(eim_din[ 2]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim3 (.IO(EIM_DA[ 3]), .I(eim_dout[ 3]), .T(eim_d_t), .O(eim_din[ 3]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim4 (.IO(EIM_DA[ 4]), .I(eim_dout[ 4]), .T(eim_d_t), .O(eim_din[ 4]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim5 (.IO(EIM_DA[ 5]), .I(eim_dout[ 5]), .T(eim_d_t), .O(eim_din[ 5]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim6 (.IO(EIM_DA[ 6]), .I(eim_dout[ 6]), .T(eim_d_t), .O(eim_din[ 6]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim7 (.IO(EIM_DA[ 7]), .I(eim_dout[ 7]), .T(eim_d_t), .O(eim_din[ 7]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim8 (.IO(EIM_DA[ 8]), .I(eim_dout[ 8]), .T(eim_d_t), .O(eim_din[ 8]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim9 (.IO(EIM_DA[ 9]), .I(eim_dout[ 9]), .T(eim_d_t), .O(eim_din[ 9]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim10 (.IO(EIM_DA[10]), .I(eim_dout[10]), .T(eim_d_t), .O(eim_din[10]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim11 (.IO(EIM_DA[11]), .I(eim_dout[11]), .T(eim_d_t), .O(eim_din[11]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim12 (.IO(EIM_DA[12]), .I(eim_dout[12]), .T(eim_d_t), .O(eim_din[12]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim13 (.IO(EIM_DA[13]), .I(eim_dout[13]), .T(eim_d_t), .O(eim_din[13]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim14 (.IO(EIM_DA[14]), .I(eim_dout[14]), .T(eim_d_t), .O(eim_din[14]));
+   IOBUF #(.DRIVE(12), .SLEW("FAST")) IOBUF_eim15 (.IO(EIM_DA[15]), .I(eim_dout[15]), .T(eim_d_t), .O(eim_din[15]));
+
+   //////////////
+   // DDR3 interface macro
+   //////////////
+
+   wire c1_clk0, c1_rst0;
+   
+   ddr3_clkgen ddr3_clkgen (
+			    .clk50in(clk),
+			    .clk50(clk50),
+			    .clk400(ddr3clk),
+			    .clk100(clk100),
+			    .RESET(reset),
+			    .LOCKED(ddr3_dll_locked)
+			    );
+
+   //////////////
+   // tie downs (unused signals as of this rev of design)
+   //////////////
+   assign APOPTOSIS = 1'b0; // make apoptosis inactive, tigh high to force reboot on config
+   assign ECSPI3_MISO = 1'b0;
+   
+endmodule
diff --git a/src/rtl/romulator_ddr3_tb.v b/src/rtl/romulator_ddr3_tb.v
new file mode 100755
index 0000000..2ac7ecc
--- /dev/null
+++ b/src/rtl/romulator_ddr3_tb.v
@@ -0,0 +1,802 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2013, Andrew "bunnie" Huang
+//
+// See the NOTICE file distributed with this work for additional 
+// information regarding copyright ownership.  The copyright holder 
+// licenses this file to you under the Apache License, Version 2.0 
+// (the "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// code distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//////////////////////////////////////////////////////////////////////////////
+
+`timescale 1ns / 1ps
+
+module romulator_ddr3_tb;
+   reg clk;
+   
+   reg nand_we;
+   reg nand_re;
+   reg nand_ale;
+   reg nand_cle;
+   wire nand_rb;
+   reg 	nand_wp;
+   reg 	nand_cs;
+   
+   reg [7:0]  nand_din;
+   wire [7:0] nand_dout;
+   wire       nand_drive_out;
+
+   wire [15:0] ram_adr;
+   wire [7:0]  ram_d_to_ram;
+   wire [7:0]   ram_d_from_ram;
+   wire        ram_we;
+   wire        ram_clk_to_ram;
+
+   wire [7:0]  nand_uk_cmd;    // pipe to a FIFO to store unknown commands
+   wire        nand_uk_cmd_updated;
+
+   reg 	       reset;
+
+   reg 	       rom_ddr3_reset; // reset just the ddr3-romulator interface
+		      
+   wire        ddr3_wr_clk;
+   wire        ddr3_wr_cmd_en;
+   wire [2:0]  ddr3_wr_cmd_instr;
+   wire [5:0]  ddr3_wr_cmd_bl;
+   wire [29:0] ddr3_wr_adr;
+   reg 	       ddr3_wr_cmd_full;
+   wire        ddr3_wr_cmd_empty;
+
+   wire        ddr3_wr_dat_en;
+   wire [31:0] ddr3_wr_dat;
+   reg 	       ddr3_wr_full;
+   reg 	       ddr3_wr_empty;
+	      
+   wire        ddr3_rd_clk;
+   wire        ddr3_rd_cmd_en;
+   wire [2:0]  ddr3_rd_cmd_instr;
+   wire [5:0]  ddr3_rd_cmd_bl;
+   wire [29:0] ddr3_rd_adr;
+   reg 	       ddr3_rd_cmd_full;
+   wire        ddr3_rd_dat_en;
+   reg [31:0]  ddr3_rd_dat;
+   reg 	       ddr3_rd_dat_empty;
+   reg 	       ddr3_rd_dat_full;
+   reg 	       ddr3_rd_dat_overflow; // need to monitor this
+   
+   wire        page_addra_over;
+   wire        outstanding_under;
+
+   glbl glbl(); // add global reset capabiilities
+   
+   romulator_ddr3 dut (
+		       .clk(clk),
+		       .nand_we(  nand_we),
+		       .nand_re(  nand_re),
+		       .nand_ale( nand_ale),
+		       .nand_cle( nand_cle),
+		       .nand_rb(  nand_rb),
+		       .nand_wp(  nand_wp),
+		       .nand_cs(  nand_cs),
+
+		       .nand_din(nand_din),
+		       .nand_dout(nand_dout),
+		       .nand_drive_out(nand_drive_out),
+
+		       .rom_ddr3_reset(rom_ddr3_reset),
+		      
+		       .ddr3_wr_clk(ddr3_wr_clk),
+		       .ddr3_wr_cmd_en(ddr3_wr_cmd_en),
+		       .ddr3_wr_cmd_instr(ddr3_wr_cmd_instr[2:0]),
+		       .ddr3_wr_cmd_bl(ddr3_wr_cmd_bl[5:0]),
+		       .ddr3_wr_adr(ddr3_wr_adr[29:0]),
+		       .ddr3_wr_cmd_full(ddr3_wr_cmd_full),
+		       .ddr3_wr_cmd_empty(ddr3_wr_cmd_empty),
+		       .ddr3_wr_dat_en(ddr3_wr_dat_en),
+		       .ddr3_wr_dat(ddr3_wr_dat[31:0]),
+		       .ddr3_wr_full(ddr3_wr_full),
+		       .ddr3_wr_empty(ddr3_wr_empty),
+		      
+		       .ddr3_rd_clk(ddr3_rd_clk),
+		       .ddr3_rd_cmd_en(ddr3_rd_cmd_en),
+		       .ddr3_rd_cmd_instr(ddr3_rd_cmd_instr[2:0]),
+		       .ddr3_rd_cmd_bl(ddr3_rd_cmd_bl[5:0]),
+		       .ddr3_rd_adr(ddr3_rd_adr[29:0]),
+		       .ddr3_rd_cmd_full(ddr3_rd_cmd_full),
+		       .ddr3_rd_dat_en(ddr3_rd_dat_en),
+		       .ddr3_rd_dat(ddr3_rd_dat[31:0]),
+		       .ddr3_rd_dat_empty(ddr3_rd_dat_empty),
+		       .ddr3_rd_dat_count(data_count[6:0]),
+		       .ddr3_rd_dat_full(ddr3_rd_dat_full),
+		       .ddr3_rd_dat_overflow(ddr3_rd_dat_overflow),
+		       
+		       .page_addra_over(page_addra_over),
+		       .outstanding_under(outstanding_under),
+
+		       .nand_uk_cmd(nand_uk_cmd),
+		       .nand_uk_cmd_updated(nand_uk_cmd_updated),
+
+		       .reset(reset)
+		  );
+
+   // emulate a very simple DDR3-MIG moderated memory interface here
+   reg [1:0]   cmd_count;
+   reg [7:0]   data_count;
+   reg [7:0]   wdata_count;
+   reg [31:0]  latency_timer;
+   reg [31:0]  wlatency_timer;
+   reg 	       add_data;
+   reg 	       minus_data;
+   reg 	       ddr3_go;
+   reg 	       ddr3_wgo;
+   reg [1:0]   wcmd_count;
+
+   parameter DDR3_TURNAROUND_TIME = 32'h20;
+
+   assign ddr3_wr_cmd_empty = (wcmd_count == 2'b00);
+   
+   // we're going to cheat and use blocking assignments.
+   always @(posedge clk) begin
+      if( ddr3_rd_cmd_en ) begin
+	 if( cmd_count == 2'b11 ) begin
+	    cmd_count = cmd_count;
+	 end else begin
+	    cmd_count = cmd_count + 1;
+	 end
+      end else begin
+	 cmd_count = cmd_count;
+      end
+
+      if( ddr3_wr_cmd_en ) begin
+	 if( wcmd_count == 2'b11 ) begin
+	    wcmd_count = wcmd_count;
+	 end else begin
+	    wcmd_count = wcmd_count + 1;
+	 end
+      end else begin
+	 wcmd_count = wcmd_count;
+      end
+      
+      if( cmd_count == 2'b11 ) begin
+	 ddr3_rd_cmd_full = 1'b1;
+      end else begin
+	 ddr3_rd_cmd_full = 1'b0;
+      end
+
+      if( wcmd_count == 2'b11  || (wcmd_count == 2'b10 && ddr3_wr_cmd_en)) begin
+	 ddr3_wr_cmd_full = 1'b1;
+      end else begin
+	 ddr3_wr_cmd_full = 1'b0;
+      end
+      
+      if( cmd_count > 2'b00 ) begin
+	 ddr3_go = 1;
+      end else begin
+	 ddr3_go = 0;
+      end
+
+      if( wcmd_count > 2'b00 ) begin
+	 ddr3_wgo = 1;
+      end else begin
+	 ddr3_wgo = 0;
+      end
+
+      if( ddr3_wr_dat_en ) begin
+	 wdata_count <= wdata_count + 1;
+      end
+
+      if( wdata_count > 0 ) begin
+	 ddr3_wr_empty <= 1'b0;
+      end else begin
+	 ddr3_wr_empty <= 1'b1;
+      end
+
+      if( ddr3_wgo ) begin
+	 wlatency_timer = wlatency_timer + 1;
+	 if( wdata_count != 8'b0 ) begin
+	    wdata_count <= wdata_count - 1;
+	 end
+      end
+
+      if( wlatency_timer >= 32'd16 ) begin
+	 wlatency_timer <= 32'd0;
+	 wcmd_count <= wcmd_count - 2'b1;
+      end
+
+      if( wdata_count >= 8'd64 ) begin
+	 ddr3_wr_full <= 1'b1;
+      end else begin
+	 ddr3_wr_full <= 1'b0;
+      end
+      
+      if( ddr3_go && !add_data ) begin
+	 latency_timer = latency_timer + 1;
+      end else if (add_data || !ddr3_go) begin
+	 latency_timer = 0;
+      end else begin
+	 latency_timer = latency_timer;
+      end
+	 
+      if( latency_timer > DDR3_TURNAROUND_TIME ) begin
+	 add_data = 1;
+	 cmd_count = cmd_count - 1;
+	 latency_timer = 0;
+      end else begin
+	 add_data = 0;
+      end
+
+      if( add_data ) begin
+	 if( data_count < 8'd64 ) begin
+	    data_count = data_count + 8'd16;  // we get 16 words at a time
+	    ddr3_rd_dat_full = 1'b0;
+	 end else if (data_count == 8'd64) begin
+	    ddr3_rd_dat_overflow = 1'b0;
+	    ddr3_rd_dat_full = 1'b1;
+	    data_count = data_count;
+	 end else begin
+	    ddr3_rd_dat_overflow = 1'b1;
+	    ddr3_rd_dat_full = 1'b1;
+	    data_count = data_count;
+	 end
+      end else begin
+	 // need a delete data entry here too
+	 data_count = data_count;
+      end // else: !if( add_data )
+
+
+      if( ddr3_rd_dat_en ) begin
+	 if( data_count > 8'd0 ) begin
+	    ddr3_rd_dat = ddr3_rd_dat + 32'h01010101;
+	    data_count = data_count - 1;
+	 end else begin
+	    data_count = 0;
+	 end
+      end
+
+      if( data_count[6:0] > 7'd0 ) begin
+	 ddr3_rd_dat_empty = 1'b0;
+      end else begin
+	 ddr3_rd_dat_empty = 1'b1;
+      end
+      
+   end
+   
+   parameter PERIOD = 16'd8;   // 125 MHz (close to 133 MHz actual)
+   always begin
+      clk = 1'b0;
+      #(PERIOD/2) clk = 1'b1;
+      #(PERIOD/2);
+   end
+
+   task nand_idle;
+      begin
+	 nand_we = 1'b1;
+	 nand_re = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_cle = 1'b0;
+	 nand_cs = 1'b1;
+	 nand_din = 8'hZZ;
+      end
+   endtask // nand_idle
+
+   task nand_read_id;
+      begin
+	 nand_cs = 1'b0;
+	 
+	 nand_cle = 1'b1;
+	 nand_we = 1'b0;
+	 nand_din = 8'h90;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_din = 8'hZZ;
+	 #20;
+
+	 nand_ale = 1'b1;
+	 #25;
+
+	 nand_we = 1'b0;
+	 nand_din = 8'h00;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_din = 8'hZZ;
+	 #20;
+
+	 nand_ale = 1'b0;
+
+	 #10;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // nand_read_id
+
+   task unknown_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h44; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // unknown_op
+   
+   task reset_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'hFF; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 nand_cs = 1'b1;
+
+	 #6000;
+      end
+   endtask; // reset_op
+   
+   task status_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h70; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 #100;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #20;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // status_op
+   
+
+   task nand_read_op;
+      input [29:0] adr;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h00;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_ale = 1'b1;
+	 
+	 nand_din = adr[7:0];
+	 #20;
+	 
+	 nand_we = 1'b0; 
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {4'b0,adr[11:8]};
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[19:12];
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[27:20];
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {6'b0,adr[29:28]};
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b1;
+	 nand_din = 8'h30;
+	 nand_ale = 1'b0;
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b0;
+
+	 while( nand_rb == 1'b0 ) begin
+	    #50;
+	 end
+	 
+//	 #7000;
+	 #500;
+
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 
+	 nand_cs = 1'b1;
+	 
+      end
+   endtask; // nand_read_op
+
+   reg [7:0] testdat;
+   task nand_write_op;
+      input [29:0] adr;
+      begin
+	 testdat  = 8'b0;
+	 
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h80;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_ale = 1'b1;
+	 
+	 nand_din = adr[7:0];
+	 #20;
+	 
+	 nand_we = 1'b0; 
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {4'b0,adr[11:8]};
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[19:12];
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[27:20];
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {6'b0,adr[29:28]};
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_ale = 1'b0;
+	 #25;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_we = 1'b0;
+	 nand_din = testdat;
+	 #25;
+	 nand_we = 1'b1;
+	 #25;
+	 testdat = testdat + 8'b10;
+
+	 nand_cle = 1'b1;
+	 nand_din = 8'h10;
+	 nand_ale = 1'b0;
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b0;
+
+	 while( nand_rb == 1'b0 ) begin
+	    #50;
+	 end
+	 
+	 nand_cs = 1'b1;
+	 
+      end
+   endtask; // nand_write_op
+
+
+   task nand_erase_op;
+      input [29:0] adr;
+      begin
+	 testdat  = 8'b0;
+	 
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h60;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_ale = 1'b1;
+	 
+	 nand_din = adr[19:12];
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[27:20];
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {6'b0,adr[29:28]};
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_ale = 1'b0;
+	 #25;
+
+	 nand_cle = 1'b1;
+	 nand_din = 8'hD0;
+	 nand_ale = 1'b0;
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b0;
+
+	 while( nand_rb == 1'b0 ) begin
+	    #50;
+	 end
+	 
+	 nand_cs = 1'b1;
+	 
+      end
+   endtask; // nand_erase_op
+   
+   
+   initial begin
+      // emu params
+      cmd_count = 0;
+      data_count = 0;
+      latency_timer = 0;
+      add_data = 0;
+      ddr3_go = 0;
+      
+      wcmd_count = 0;
+      wdata_count = 0;
+      wlatency_timer = 0;
+      ddr3_wgo = 0;
+      
+      // normal params
+      nand_wp = 0;
+      reset = 0;
+      ddr3_wr_cmd_full = 0;
+      ddr3_wr_full = 0;
+      ddr3_rd_cmd_full = 0;
+      ddr3_rd_dat = 32'h04030201;
+      ddr3_rd_dat_full = 0;
+      ddr3_rd_dat_overflow = 0;
+      rom_ddr3_reset = 0;
+
+      // reset
+      nand_idle();
+      #(PERIOD*4);
+      reset = 1;
+      #(PERIOD*4);
+      reset = 0;
+      #(PERIOD*4);
+      #(PERIOD*4);
+      reset = 1;
+      #(PERIOD*4);
+      reset = 0;
+      #(PERIOD*4);
+      #100;
+      
+
+      // now test
+      nand_read_id();
+      #100;
+      nand_read_id();
+
+      // postamble
+      #50;
+      nand_idle();
+      #100;
+
+      unknown_op();
+      #100;
+
+      status_op();
+      #100;
+
+      nand_read_op(29'h0);
+      #100;
+      // address 20100 -> row/col 40100
+      nand_read_op(29'h40100); // maps to 100
+      #100;
+      // address 80210 -> 100210
+      nand_read_op(29'h100210); // maps to 8210
+      #100;
+      // address 81000 -> 120000
+      nand_read_op(29'h120000); // maps to invalid
+      #100;
+      // address 100000 -> 200000
+      nand_read_op(29'h200000); // maps to 9000
+      #100;
+      // address 120000 -> 240000
+      nand_read_op(29'h240000); // maps to invalid
+
+      #1000;
+
+      nand_write_op(29'h0);
+      
+      #1000;
+      
+      nand_write_op(29'h100210);
+      #100;
+      
+      #1000;
+
+      nand_read_op(29'h0);
+      #100;
+      // address 20100 -> row/col 40100
+      
+      reset_op();
+      #100;
+
+      nand_erase_op(29'h100210);
+      #1000;
+      
+      nand_read_id();
+      #500;
+      
+      $stop;
+   end // initial begin
+
+endmodule // romulator_ddr3_tb
+
diff --git a/src/rtl/romulator_tb.v b/src/rtl/romulator_tb.v
new file mode 100755
index 0000000..5954580
--- /dev/null
+++ b/src/rtl/romulator_tb.v
@@ -0,0 +1,370 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2013, Andrew "bunnie" Huang
+//
+// See the NOTICE file distributed with this work for additional 
+// information regarding copyright ownership.  The copyright holder 
+// licenses this file to you under the Apache License, Version 2.0 
+// (the "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// code distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//////////////////////////////////////////////////////////////////////////////
+
+`timescale 1ns / 1ps
+
+module romulator_tb;
+   reg clk;
+   
+   reg nand_we;
+   reg nand_re;
+   reg nand_ale;
+   reg nand_cle;
+   wire nand_rb;
+   reg 	nand_wp;
+   reg 	nand_cs;
+   
+   reg [7:0]  nand_din;
+   wire [7:0] nand_dout;
+   wire       nand_drive_out;
+
+   wire [15:0] ram_adr;
+   wire [7:0]  ram_d_to_ram;
+   wire [7:0]   ram_d_from_ram;
+   wire        ram_we;
+   wire        ram_clk_to_ram;
+
+   wire [7:0]  nand_uk_cmd;    // pipe to a FIFO to store unknown commands
+   wire        nand_uk_cmd_updated;
+
+   reg 	       reset;
+
+   // a RAM for sim sake
+   reg [15:0]  extram [7:0];
+
+   assign  ram_d_from_ram[7:0] = extram[ram_adr];
+   
+   romulator dut (
+		  .clk(clk),
+		 .nand_we(  nand_we),
+		 .nand_re(  nand_re),
+		 .nand_ale( nand_ale),
+		 .nand_cle( nand_cle),
+		 .nand_rb(  nand_rb),
+		 .nand_wp(  nand_wp),
+		 .nand_cs(  nand_cs),
+
+		  .nand_din(nand_din),
+		  .nand_dout(nand_dout),
+		  .nand_drive_out(nand_drive_out),
+
+		  .ram_adr(ram_adr),
+		  .ram_d_to_ram(ram_d_to_ram),
+		  .ram_d_from_ram(ram_d_from_ram),
+		  .ram_we(ram_we),
+		  .ram_clk_to_ram(ram_clk_to_ram),
+
+		  .nand_uk_cmd(nand_uk_cmd),
+		  .nand_uk_cmd_updated(nand_uk_cmd_updated),
+
+		  .reset(reset)
+		  );
+   
+   
+   parameter PERIOD = 16'd8;   // 125 MHz (close to 133 MHz actual)
+   always begin
+      clk = 1'b0;
+      #(PERIOD/2) clk = 1'b1;
+      #(PERIOD/2);
+   end
+
+   task nand_idle;
+      begin
+	 nand_we = 1'b1;
+	 nand_re = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_cle = 1'b0;
+	 nand_cs = 1'b1;
+	 nand_din = 8'hZZ;
+      end
+   endtask // nand_idle
+
+   task nand_read_id;
+      begin
+	 nand_cs = 1'b0;
+	 
+	 nand_cle = 1'b1;
+	 nand_we = 1'b0;
+	 nand_din = 8'h90;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_din = 8'hZZ;
+	 #20;
+
+	 nand_ale = 1'b1;
+	 #25;
+
+	 nand_we = 1'b0;
+	 nand_din = 8'h00;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_din = 8'hZZ;
+	 #20;
+
+	 nand_ale = 1'b0;
+
+	 #10;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // nand_read_id
+
+   task unknown_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h44; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // unknown_op
+   
+   task reset_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'hFF; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 nand_cs = 1'b1;
+
+	 #6000;
+      end
+   endtask; // reset_op
+   
+   task status_op;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h70; // some random unknown opcode
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+
+	 #100;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #20;
+
+	 nand_cs = 1'b1;
+      end
+   endtask; // status_op
+   
+
+   task nand_read_op;
+      input [29:0] adr;
+      begin
+	 nand_cs = 1'b0;
+
+	 nand_cle = 1'b1;
+	 nand_ale = 1'b0;
+	 nand_we = 1'b0;
+	 nand_din = 8'h00;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 nand_cle = 1'b0;
+	 nand_ale = 1'b1;
+	 
+	 nand_din = adr[7:0];
+	 #20;
+	 
+	 nand_we = 1'b0; 
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {4'b0,adr[11:8]};
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[19:12];
+	 #20;
+
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = adr[27:20];
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+	 
+	 nand_din = {6'b0,adr[29:28]};
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b1;
+	 nand_din = 8'h30;
+	 nand_ale = 1'b0;
+	 #20;
+	 
+	 nand_we = 1'b0;
+	 #25;
+	 nand_we = 1'b1;
+	 #5;
+
+	 nand_cle = 1'b0;
+
+//	 #7000;
+	 #500;
+
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 nand_re = 1'b0;
+	 #25;
+	 nand_re = 1'b1;
+	 #25;
+	 
+	 nand_cs = 1'b1;
+	 
+      end
+   endtask; // nand_read_op
+   
+   
+   initial begin
+      nand_wp = 0;
+      reset = 0;
+      $readmemb( "f:\largework\fpga\novena-sd-fpga\extram.bin", extram, 0, 65535 );
+
+      $stop;
+
+      // reset
+      nand_idle();
+      #(PERIOD*4);
+      reset = 1;
+      #(PERIOD*4);
+      reset = 0;
+      #(PERIOD*4);
+      #(PERIOD*4);
+      reset = 1;
+      #(PERIOD*4);
+      reset = 0;
+      #(PERIOD*4);
+      #100;
+      
+
+      // now test
+      nand_read_id();
+      #100;
+      nand_read_id();
+
+      // postamble
+      #50;
+      nand_idle();
+      #100;
+
+      unknown_op();
+      #100;
+
+      status_op();
+      #100;
+
+      nand_read_op(29'h0);
+      #100;
+      // address 20100 -> row/col 40100
+      nand_read_op(29'h40100); // maps to 100
+      #100;
+      // address 80210 -> 100210
+      nand_read_op(29'h100210); // maps to 8210
+      #100;
+      // address 81000 -> 120000
+      nand_read_op(29'h120000); // maps to invalid
+      #100;
+      // address 100000 -> 200000
+      nand_read_op(29'h200000); // maps to 9000
+      #100;
+      // address 120000 -> 240000
+      nand_read_op(29'h240000); // maps to invalid
+
+      #100;
+      
+      reset_op();
+      #100;
+      
+      nand_read_id();
+      #500;
+      
+      $stop;
+   end // initial begin
+
+endmodule // romulator_tb
diff --git a/src/rtl/spi_eeprom.v b/src/rtl/spi_eeprom.v
new file mode 100755
index 0000000..5e9f6c4
--- /dev/null
+++ b/src/rtl/spi_eeprom.v
@@ -0,0 +1,255 @@
+module spi_eeprom(
+		  output  wire sdout,
+		  input   wire sdin,
+		  input   wire scs,
+		  input   wire sclk,
+		  input   wire swp,
+		  input   wire shold,
+
+		  input wire eim_clk,
+		  input wire [18:0] bus_a,
+		  input wire [15:0] bus_d,
+		  input wire we,
+		  input wire re,
+		  output wire [15:0] reg_d,
+		  output reg [15:0] rbk_d,
+		  
+		  output reg [7:0] spi_uk_cmd,  // bitbucket for unknown commands
+		  output reg spi_uk_cmd_updated,
+		  
+		  output reg spi_byte_updated,  // updates every byte
+		  output reg [7:0] spi_byte,
+		  output reg spi_obyte_updated,
+		  output reg [7:0] spi_obyte,
+
+		  output wire [23:0] spi_adr,
+		  output reg spi_adr_updated,
+		  
+		  input   reset
+		  );
+
+   reg [7:0] 		  inreg;
+   reg [31:0] 		  addreg; // top 8 bits are dummy (replica of command)
+   reg [23:0] 		  address; // address to read
+   reg [15:0] 		  cycle; // cycle counter
+   reg [7:0] 		  command; 
+
+   reg [23:0] 		  shiftout; // ID shift out register
+   reg 			  readmode; // 1 if in read mode
+   reg [7:0] 		  uk_cmd;  // capture unknown commands
+   reg 			  uk_cmd_stb; // pulse high once sync to sclk for fifo
+
+   wire [31:0] 		  rom_dout;
+   reg [31:0] 		  dataout; // shift register for data going out
+
+   assign spi_adr = address;
+   
+   always @(posedge sclk or posedge scs) begin
+      if( scs ) begin
+	 inreg[7:0] <= 8'b0;
+      end else begin
+	 inreg[7:0] <= {inreg[6:0],sdin};
+      end
+   end
+   
+   always @(posedge sclk or posedge scs) begin
+      if( scs ) begin
+	 addreg[31:0] <= 32'b0;
+      end else begin
+	 addreg[31:0] <= {addreg[30:0],sdin};
+      end
+   end
+
+   always @(posedge sclk or posedge scs) begin
+      if( scs ) begin
+	 cycle <= 16'b0;
+      end else begin
+	 cycle <= cycle + 16'b1;
+      end
+   end
+
+   always @(posedge sclk) begin
+      spi_obyte[7:0] <= {spi_obyte[6:0],sdout};
+      
+      if( cycle[2:0] == 3'b111 ) begin
+	 spi_obyte_updated <= 1'b1;
+      end else begin
+	 spi_obyte_updated <= 1'b0;
+      end
+   end
+
+   always @(posedge sclk) begin
+      spi_byte[7:0] <= {spi_byte[6:0],sdin};
+      
+      if( cycle[2:0] == 3'b111 ) begin
+	 spi_byte_updated <= 1'b1;
+      end else begin
+	 spi_byte_updated <= 1'b0;
+      end
+   end
+	 
+   always @(negedge sclk) begin
+      if( cycle == 16'd9 ) begin
+	 command <= inreg;
+      end else begin
+	 command <= command;
+      end
+      
+      if( cycle == 16'h20 ) begin
+	 address[23:0] <= {addreg[23:2],2'b00}; // we're just going to make this bad
+	 spi_adr_updated <= 1'b1;
+	 // assumption that all reads are word-aligned
+      end else if( (cycle > 16'd40) && (cycle[4:0] == 5'b1_1111) ) begin
+	 address[23:0] <= address[23:0] + 24'd4;
+	 spi_adr_updated <= 1'b0;
+      end else begin
+	 address <= address;
+	 spi_adr_updated <= 1'b0;
+      end
+   end
+
+ `ifdef ORIG_CODE
+   always @(negedge sclk) begin
+      if( cycle < 16'd8 ) begin // command not valid until 9 but need to know sooner
+	 shiftout <= shiftout;
+	 readmode <= 1'b0;
+	 spi_uk_cmd_updated <= 1'b0;
+	 spi_uk_cmd <= spi_uk_cmd;
+      end else if( cycle == 16'd8 ) begin // do command dispatch
+	 if( inreg[7:0] == 8'h9f ) begin
+	    shiftout <= 24'hc86017;
+	    readmode <= 1'b0;
+	    spi_uk_cmd_updated <= 1'b1; // grab known for now
+	    spi_uk_cmd <= inreg[7:0];
+	 end else if( inreg[7:0] == 8'h0b ) begin
+	    readmode <= 1'b1;
+	    shiftout <= shiftout;
+	    spi_uk_cmd_updated <= 1'b1; // grab known for now
+	    spi_uk_cmd <= inreg[7:0];
+	 end else begin
+	    spi_uk_cmd <= inreg[7:0]; // record the unknown command
+	    spi_uk_cmd_updated <= 1'b1;
+	    readmode <= 1'b0;
+	    shiftout <= shiftout;
+	 end
+      end else begin  // cycle > 16'd7
+	 shiftout[23:0] <= {shiftout[22:0],shiftout[23]};
+	 readmode <= readmode;
+	 spi_uk_cmd_updated <= 1'b0;
+	 spi_uk_cmd <= spi_uk_cmd;
+      end
+   end // always @ (negedge sclk)
+
+   assign sdout = readmode ? dataout[31] : shiftout[23];
+
+   always @(negedge sclk) begin
+      // 8 cycles for command
+      // 24 cycles for address
+      // 8 dummy cycles because it's command 0B (only command we support)
+      // 40th cycle start shifting the data
+      if ( (cycle >= 16'd33) && (cycle[4:0] == 5'b0_1000) ) begin
+	 dataout[31:0] <= {rom_dout[7:0],rom_dout[15:8],rom_dout[23:16],rom_dout[31:24]};
+      end else if( cycle >= 16'd41 ) begin
+	 dataout[31:0] <= {dataout[30:0],dataout[31]};
+      end else begin
+	 dataout <= dataout;
+      end
+   end // always @ (negedge sclk)
+ `else // !`ifdef ORIG_CODE
+   always @(posedge sclk) begin
+      if( cycle < 16'd7 ) begin // command not valid until 9 but need to know sooner
+	 shiftout <= shiftout;
+	 readmode <= 1'b0;
+	 spi_uk_cmd_updated <= 1'b0;
+	 spi_uk_cmd <= spi_uk_cmd;
+      end else if( cycle == 16'd7 ) begin // do command dispatch
+	 if( {inreg[6:0],sdin} == 8'h9f ) begin
+	    shiftout <= 24'hc86017;
+	    readmode <= 1'b0;
+	    spi_uk_cmd_updated <= 1'b1; // grab known for now
+	    spi_uk_cmd <= inreg[7:0];
+	 end else if( {inreg[6:0],sdin} == 8'h0b ) begin
+	    readmode <= 1'b1;
+	    shiftout <= shiftout;
+	    spi_uk_cmd_updated <= 1'b1; // grab known for now
+	    spi_uk_cmd <= inreg[7:0];
+	 end else begin
+	    spi_uk_cmd <= inreg[7:0]; // record the unknown command
+	    spi_uk_cmd_updated <= 1'b1;
+	    readmode <= 1'b0;
+	    shiftout <= shiftout;
+	 end
+      end else begin  // cycle > 16'd7
+	 shiftout[23:0] <= {shiftout[22:0],shiftout[23]};
+	 readmode <= readmode;
+	 spi_uk_cmd_updated <= 1'b0;
+	 spi_uk_cmd <= spi_uk_cmd;
+      end
+   end // always @ (negedge sclk)
+
+   assign sdout = readmode ? dataout[31] : shiftout[23];
+
+   always @(posedge sclk) begin
+      // 8 cycles for command
+      // 24 cycles for address
+      // 8 dummy cycles because it's command 0B (only command we support)
+      // 40th cycle start shifting the data
+      if ( (cycle >= 16'd32) && (cycle[4:0] == 5'b0_0111) ) begin
+	 dataout[31:0] <= {rom_dout[7:0],rom_dout[15:8],rom_dout[23:16],rom_dout[31:24]};
+      end else if( cycle >= 16'd40 ) begin
+	 dataout[31:0] <= {dataout[30:0],dataout[31]};
+      end else begin
+	 dataout <= dataout;
+      end
+   end // always @ (negedge sclk)
+   
+`endif   
+
+   // if cycle == 8'd34 && readmode, issue the read request to the dram interface
+   // time to respond is thus 6 cycles * 37ns = 222 ns. Max DDR3 latency is 85ns. We should be ok.
+   // this gives 130ns for data readback, which is enough time to trasnfer 208 bytes -- not 
+   // quite a whole page. We need 160ns to get the full page.
+   // see if we can optimize to 7 cycles allowed response time to first data out
+
+   wire my_a;
+
+   // 19'h1_0000 >> 1 due to short-align
+   assign my_a = (bus_a[18:16] == 3'b001); 
+   
+   eimram spi_romulator_64k (
+     .clka(eim_clk), // input clka
+     .ena(my_a), // input ena
+     .wea(we), // input [0 : 0] wea
+     .addra(bus_a[15:1]), // input [14 : 0] addra
+     .dina(bus_d[15:0]), // input [15 : 0] dina
+     .clkb(sclk), // input clkb
+     .addrb(address[15:2]), // input [13 : 0] addrb
+     .doutb(rom_dout) // output [31 : 0] doutb  // 4 bytes at a time
+     );
+
+   // shadow for readback (chee-z three portnedness
+   wire [31:0] state;
+   eimram spi_romulator_64k_shadow (
+     .clka(eim_clk), // input clka
+     .ena(my_a), // input ena
+     .wea(we), // input [0 : 0] wea
+     .addra(bus_a[15:1]), // input [14 : 0] addra
+     .dina(bus_d[15:0]), // input [15 : 0] dina
+     .clkb(eim_clk), // input clkb
+     .addrb(bus_a[15:2]), // input [13 : 0] addrb
+     .doutb(state) // output [31 : 0] doutb
+     );
+   
+   always @(bus_a or my_a or re or state) begin
+      if( my_a && re ) begin
+	 if( bus_a[1] == 1'b0 ) begin
+	    rbk_d = state[15:0];
+	 end else begin
+	    rbk_d = state[31:16];
+	 end
+      end else begin
+	 rbk_d = 16'hZZZZ;
+      end
+   end
+   
+endmodule // spi_eeprom
diff --git a/src/rtl/spi_eeprom_tb.v b/src/rtl/spi_eeprom_tb.v
new file mode 100755
index 0000000..431c214
--- /dev/null
+++ b/src/rtl/spi_eeprom_tb.v
@@ -0,0 +1,317 @@
+//////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2013, Andrew "bunnie" Huang
+//
+// See the NOTICE file distributed with this work for additional 
+// information regarding copyright ownership.  The copyright holder 
+// licenses this file to you under the Apache License, Version 2.0 
+// (the "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// code distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//////////////////////////////////////////////////////////////////////////////
+
+`timescale 1ns / 1ps
+
+module spi_eeprom_tb;
+
+   wire sdout;
+   reg 	sdin;
+   reg 	scs;
+   reg 	sclk;
+   reg 	swp;
+   reg 	shold;
+
+   reg 	eim_clk;
+   reg [18:0] bus_a;
+   reg [15:0] bus_d;
+   reg 	      we;
+   reg 	      re;
+   wire [15:0] reg_d;
+   wire [15:0] rbk_d;
+   
+   wire [7:0]  spi_uk_cmd;  // bitbucket for unknown commands
+   wire        spi_uk_cmd_updated;
+   
+   reg 	       reset;
+   reg 	       sclk_on;
+
+   // a RAM for sim sake
+//   reg [15:0]  extram [7:0];
+
+   spi_eeprom dut (
+		   .sdout(sdout),
+		   .sdin(sdin),
+		   .scs(scs),
+		   .sclk(sclk),
+		   .swp(swp),
+		   .shold(shold),
+
+		   .eim_clk(eim_clk),
+		   .bus_a(bus_a),
+		   .bus_d(bus_d),
+		   .we(we),
+		   .re(re),
+		   .reg_d(reg_d),
+		   .rbk_d(rbk_d),
+
+		   .spi_uk_cmd(spi_uk_cmd),
+		   .spi_uk_cmd_updated(spi_uk_cmd_updated),
+
+		   .reset(reset)
+		   );
+
+   parameter PERIOD = 16'd26;   // 37 MHz
+   always begin
+      if( sclk_on ) begin
+	 sclk = 1'b0;
+	 #(PERIOD/2) sclk = 1'b1;
+	 #(PERIOD/2);
+      end else begin
+	 sclk = 1'b0;
+	 #PERIOD;
+      end
+   end
+
+   parameter PERIOD_EIM = 16'd26; // should be 125 MHz but adjust to this due to sim artifacts
+   always begin
+      eim_clk = 1'b0;
+      #(PERIOD_EIM/2) eim_clk = 1'b1;
+      #(PERIOD_EIM/2);
+   end
+
+   task seeprom_idle;
+      begin
+	 sclk_on = 0;
+	 scs = 1;
+	 swp = 1;
+	 shold = 1;
+      end
+   endtask // nand_idle
+
+   task eim_idle;
+      begin
+	 we = 1'b0;
+	 re = 1'b0;
+	 bus_a = 18'h4E73;
+	 bus_d = 16'hzzzz;
+      end
+   endtask // nand_read_id
+
+   task eim_write;
+      input [18:0] t_a;
+      input [15:0] t_d;
+      begin   // a poor approximation of what happens but good enough
+	 bus_a = t_a;
+	 #PERIOD_EIM;
+	 bus_d = t_d;
+	 #PERIOD_EIM;
+	 we = 1;
+	 #PERIOD_EIM;
+	 we = 0;
+	 #PERIOD_EIM;
+      end
+   endtask // eim_write
+
+   task seeprom_id;
+      begin
+	 scs = 1;
+	 sdin = 0;
+	 #PERIOD;
+
+	 scs = 0;
+	 sdin = 0;
+	 #PERIOD;
+
+	 // 9f
+	 sclk_on = 1;
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+
+	 #(PERIOD*24);
+	 sclk_on = 0;
+
+	 #PERIOD;
+	 scs = 1;
+      end
+   endtask // seeprom_id
+
+   task seeprom_invalid;
+      begin
+	 scs = 1;
+	 sdin = 0;
+	 #PERIOD;
+
+	 scs = 0;
+	 sdin = 0;
+	 #PERIOD;
+
+	 // 03
+	 sclk_on = 1;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+
+	 sdin = 1'b0;
+	 #(PERIOD*31);
+	 sclk_on = 0;
+
+	 #PERIOD;
+	 scs = 1;
+      end
+   endtask // seeprom_invalid
+
+   integer index;
+   task seeprom_read;
+      input [15:0] t_a;
+      begin
+	 scs = 1;
+	 sdin = 0;
+	 #PERIOD;
+
+	 scs = 0;
+	 sdin = 0;
+	 #PERIOD;
+
+	 // 0b
+	 sclk_on = 1;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b0;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+	 sdin = 1'b1;
+	 #PERIOD;
+
+	 // top 8 bits always 0
+	 sdin = 1'b0;
+	 #(PERIOD*8);
+
+	 for( index = 15; index >=0; index = index - 1 ) begin
+	    sdin = t_a[index];
+	    #PERIOD;
+	 end
+
+	 // dummy cycle
+	 sdin = 1'b0;
+	 #(PERIOD*8);
+
+	 // and eight bytes of data
+	 sdin = 1'b0;
+	 #(PERIOD*64);
+	 
+	 sclk_on = 0;
+
+	 #PERIOD;
+	 scs = 1;
+	 #PERIOD;
+	 #PERIOD;
+      end
+   endtask // seeprom_read
+   
+   initial begin
+      reset = 0;
+
+      sclk_on = 0;
+      sdin = 0;
+      scs = 1;
+      swp = 1;
+      shold = 1;
+
+      bus_a = 0;
+      bus_d = 0;
+      we = 0;
+      re = 0;
+
+      #(PERIOD);
+      scs = 0;
+      #(PERIOD*2);
+      scs = 1;
+      //      $readmemb( "f:\largework\fpga\novena-sd-fpga\extram.bin", extram, 0, 65535 );
+
+      #(PERIOD*4);
+      $stop;
+
+      // reset
+      eim_idle();
+      seeprom_idle();
+      #(PERIOD*4);
+      reset = 1;
+      #(PERIOD*4);
+      reset = 0;
+      #(PERIOD*4);
+      #(PERIOD*4);
+
+      #(PERIOD*4);
+
+      // now test
+      eim_write(18'h1_0000, 16'hbeef);
+      eim_write(18'h1_0004, 16'hdead);
+      eim_write(18'h1_0006, 16'h8181);
+      eim_write(18'h1_0008, 16'ha581);
+      eim_write(18'h1_000A, 16'h6009);
+
+      eim_write(18'h0_0000, 16'h3333);  // this should do nothing
+      #(PERIOD*4);
+
+      seeprom_id();
+      
+      #(PERIOD*4);
+      
+      seeprom_read(16'h0004);
+
+      #(PERIOD*4);
+      
+      seeprom_invalid();
+      
+      #(PERIOD*4);
+      
+      $stop;
+   end // initial begin
+
+endmodule // romulator_tb
diff --git a/src/sw/Makefile b/src/sw/Makefile
new file mode 100644
index 0000000..caf7e0f
--- /dev/null
+++ b/src/sw/Makefile
@@ -0,0 +1,15 @@
+SOURCES=novena-fpga.c gpio.c eim.c 
+OBJECTS=$(SOURCES:.c=.o)
+EXEC=novena-fpga
+MY_CFLAGS += -Wall -O0 -g
+MY_LIBS +=
+
+all: $(OBJECTS)
+	$(CC) $(LIBS) $(LDFLAGS) $(OBJECTS) $(MY_LIBS) -o $(EXEC)
+
+clean:
+	rm -f $(EXEC) $(OBJECTS)
+
+.c.o:
+	$(CC) -c $(CFLAGS) $(MY_CFLAGS) $< -o $@
+
diff --git a/src/sw/eim.c b/src/sw/eim.c
new file mode 100644
index 0000000..e5c7a61
--- /dev/null
+++ b/src/sw/eim.c
@@ -0,0 +1,285 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <stdint.h>
+
+#include "gpio.h"
+#include "eim.h"
+
+#define EIM_BASE (0x08040000)
+#define EIM_DOUT (0x0010)
+#define EIM_DIR (0x0012)
+#define EIM_DIN (0x1010)
+
+static int   *mem_32 = 0;
+static short *mem_16 = 0;
+static char  *mem_8  = 0;
+static int   *prev_mem_range = 0;
+
+uint8_t cached_dout = 0;
+uint8_t cached_dir = 0;
+
+static int read_kernel_memory(long offset, int virtualized, int size) {
+	int result;
+	static int mem_fd;
+
+	int *mem_range = (int *)(offset & ~0xFFFF);
+	if( mem_range != prev_mem_range ) {
+		prev_mem_range = mem_range;
+
+		if(mem_32)
+			munmap(mem_32, 0xFFFF);
+		if(mem_fd)
+			close(mem_fd);
+
+		if(virtualized) {
+			mem_fd = open("/dev/kmem", O_RDWR);
+			if( mem_fd < 0 ) {
+				perror("Unable to open /dev/kmem");
+				mem_fd = 0;
+				return -1;
+			}
+		}
+		else {
+			mem_fd = open("/dev/mem", O_RDWR);
+			if( mem_fd < 0 ) {
+				perror("Unable to open /dev/mem");
+				mem_fd = 0;
+				return -1;
+			}
+		}
+
+		mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset&~0xFFFF);
+		if( -1 == (int)mem_32 ) {
+			perror("Unable to mmap file");
+
+			if( -1 == close(mem_fd) )
+				perror("Also couldn't close file");
+
+			mem_fd=0;
+			return -1;
+		}
+		mem_16 = (short *)mem_32;
+		mem_8  = (char  *)mem_32;
+	}
+
+	int scaled_offset = (offset-(offset&~0xFFFF));
+	if(size==1)
+		result = mem_8[scaled_offset/sizeof(char)];
+	else if(size==2)
+		result = mem_16[scaled_offset/sizeof(short)];
+	else
+		result = mem_32[scaled_offset/sizeof(long)];
+
+	return result;
+}
+
+static int write_kernel_memory(long offset, long value, int virtualized, int size) {
+	int old_value = read_kernel_memory(offset, virtualized, size);
+	int scaled_offset = (offset-(offset&~0xFFFF));
+	if(size==1)
+		mem_8[scaled_offset/sizeof(char)]   = value;
+	else if(size==2)
+		mem_16[scaled_offset/sizeof(short)] = value;
+	else
+		mem_32[scaled_offset/sizeof(long)]  = value;
+	return old_value;
+}
+
+
+static int prep_eim(void) {
+	int i;
+	// set up pads to be mapped to EIM
+	for( i = 0; i < 16; i++ ) {
+		write_kernel_memory( 0x20e0114 + i*4, 0x0, 0, 4 );  // mux mapping
+		write_kernel_memory( 0x20e0428 + i*4, 0xb0b1, 0, 4 ); // pad strength config'd for a 100MHz rate 
+	}
+
+	// mux mapping
+	write_kernel_memory( 0x20e046c - 0x314, 0x0, 0, 4 ); // BCLK
+	write_kernel_memory( 0x20e040c - 0x314, 0x0, 0, 4 ); // CS0
+	write_kernel_memory( 0x20e0410 - 0x314, 0x0, 0, 4 ); // CS1
+	write_kernel_memory( 0x20e0414 - 0x314, 0x0, 0, 4 ); // OE
+	write_kernel_memory( 0x20e0418 - 0x314, 0x0, 0, 4 ); // RW
+	write_kernel_memory( 0x20e041c - 0x314, 0x0, 0, 4 ); // LBA
+	write_kernel_memory( 0x20e0468 - 0x314, 0x0, 0, 4 ); // WAIT
+	write_kernel_memory( 0x20e0408 - 0x314, 0x0, 0, 4 ); // A16
+	write_kernel_memory( 0x20e0404 - 0x314, 0x0, 0, 4 ); // A17
+	write_kernel_memory( 0x20e0400 - 0x314, 0x0, 0, 4 ); // A18
+
+	// pad strength
+	write_kernel_memory( 0x20e046c, 0xb0b1, 0, 4 ); // BCLK
+	write_kernel_memory( 0x20e040c, 0xb0b1, 0, 4 ); // CS0
+	write_kernel_memory( 0x20e0410, 0xb0b1, 0, 4 ); // CS1
+	write_kernel_memory( 0x20e0414, 0xb0b1, 0, 4 ); // OE
+	write_kernel_memory( 0x20e0418, 0xb0b1, 0, 4 ); // RW
+	write_kernel_memory( 0x20e041c, 0xb0b1, 0, 4 ); // LBA
+	write_kernel_memory( 0x20e0468, 0xb0b1, 0, 4 ); // WAIT
+	write_kernel_memory( 0x20e0408, 0xb0b1, 0, 4 ); // A16
+	write_kernel_memory( 0x20e0404, 0xb0b1, 0, 4 ); // A17
+	write_kernel_memory( 0x20e0400, 0xb0b1, 0, 4 ); // A18
+
+	write_kernel_memory( 0x020c4080, 0xcf3, 0, 4 ); // ungate eim slow clocks
+
+	// EIM_CS0GCR1   
+	// 0101 0  001 1   001    0   001 11  00  0  000  1    0   1   1   1   0   0   1
+	// PSZ  WP GBC AUS CSREC  SP  DSZ BCS BCD WC BL   CREP CRE RFL WFL MUM SRD SWR CSEN
+	//
+	// PSZ = 0101  256 words page size
+	// WP = 0      (not protected)
+	// GBC = 001   min 1 cycles between chip select changes
+	// AUS = 0     address shifted according to port size
+	// CSREC = 001 min 1 cycles between CS, OE, WE signals
+	// SP = 0      no supervisor protect (user mode access allowed)
+	// DSZ = 001   16-bit port resides on DATA[15:0]
+	// BCS = 11    3 clock delay for burst generation
+	// BCD = 00    divide EIM clock by 1 for burst clock
+	// WC = 0      specify write bust according to BL
+	// BL = 000    4 words wrap burst length
+	// CREP = 1    non-PSRAM, set to 1
+	// CRE = 0     CRE is disabled
+	// RFL = 1     fixed latency reads (don't monitor WAIT)
+	// WFL = 1     fixed latency writes (don't monitor WAIT)
+	// MUM = 1     multiplexed mode enabled
+	// SRD = 0     no synch reads
+	// SWR = 0     no synch writes
+	// CSEN = 1    chip select is enabled
+
+	// 0101 0111 1111    0001 1100  0000  1011   1   0   0   1
+	// 0x5  7    F        1   C     0     B    9
+
+	// 0101 0001 1001    0001 1100  0000  1011   1001
+	// 5     1    9       1    c     0     B      9
+
+	write_kernel_memory( 0x21b8000, 0x5191C0B9, 0, 4 );
+
+	// EIM_CS0GCR2   
+	//  MUX16_BYP_GRANT = 1
+	//  ADH = 1 (1 cycles)
+	//  0x1001
+	write_kernel_memory( 0x21b8004, 0x1001, 0, 4 );
+
+
+	// EIM_CS0RCR1   
+	// 00 000101 0 000   0   000   0 000 0 000 0 000 0 000
+	//    RWSC     RADVA RAL RADVN   OEA   OEN   RCSA  RCSN
+	// RWSC 000101    5 cycles for reads to happen
+	//
+	// 0000 0111 0000   0011   0000 0000 0000 0000
+	//  0    7     0     3      0  0    0    0
+	// 0000 0101 0000   0000   0 000 0 000 0 000 0 000
+	//  write_kernel_memory( 0x21b8008, 0x05000000, 0, 4 );
+	write_kernel_memory( 0x21b8008, 0x0A024000, 0, 4 );
+	// EIM_CS0RCR2  
+	// 0000 0000 0   000 00 00 0 010  0 001 
+	//           APR PAT    RL   RBEA   RBEN
+	// APR = 0   mandatory because MUM = 1
+	// PAT = XXX because APR = 0
+	// RL = 00   because async mode
+	// RBEA = 000  these match RCSA/RCSN from previous field
+	// RBEN = 000
+	// 0000 0000 0000 0000 0000  0000
+	write_kernel_memory( 0x21b800c, 0x00000000, 0, 4 );
+
+	// EIM_CS0WCR1
+	// 0   0    000100 000   000   000  000  010 000 000  000
+	// WAL WBED WWSC   WADVA WADVN WBEA WBEN WEA WEN WCSA WCSN
+	// WAL = 0       use WADVN
+	// WBED = 0      allow BE during write
+	// WWSC = 000100 4 write wait states
+	// WADVA = 000   same as RADVA
+	// WADVN = 000   this sets WE length to 1 (this value +1)
+	// WBEA = 000    same as RBEA
+	// WBEN = 000    same as RBEN
+	// WEA = 010     2 cycles between beginning of access and WE assertion
+	// WEN = 000     1 cycles to end of WE assertion
+	// WCSA = 000    cycles to CS assertion
+	// WCSN = 000    cycles to CS negation
+	// 1000 0111 1110 0001 0001  0100 0101 0001
+	// 8     7    E    1    1     4    5    1
+	// 0000 0111 0000 0100 0000  1000 0000 0000
+	// 0      7    0   4    0     8    0     0
+	// 0000 0100 0000 0000 0000  0100 0000 0000
+	//  0    4    0    0     0    4     0    0
+
+	write_kernel_memory( 0x21b8010, 0x09080800, 0, 4 );
+
+	// EIM_WCR
+	// BCM = 1   free-run BCLK
+	// GBCD = 0  don't divide the burst clock
+	write_kernel_memory( 0x21b8090, 0x1, 0, 4 );
+
+	// EIM_WIAR 
+	// ACLK_EN = 1
+	write_kernel_memory( 0x21b8094, 0x10, 0, 4 );
+
+	return 0;
+}
+
+uint16_t *eim_get(enum eim_type type) {
+	static uint16_t *mem = NULL;
+	static int fd;
+
+	if (mem)
+		return ((uint16_t *) (((uint8_t *)mem)+type));
+
+	prep_eim();
+
+	fd = open("/dev/mem", O_RDWR);
+	if (fd == -1) {
+		perror("Couldn't open /dev/mem");
+		return NULL;
+	}
+
+	mem = mmap(NULL, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, EIM_BASE);
+	if (mem == ((uint16_t *)-1)) {
+		perror("Couldn't mmap EIM region");
+		return NULL;
+	}
+
+	return eim_get(type);
+}
+
+int eim_set_direction(int gpio, int is_output) {
+	uint16_t *mem = eim_get(fpga_w_gpioa_dir);
+	if (!mem)
+		return -1;
+	gpio &= ~GPIO_IS_EIM;
+	if (is_output)
+		cached_dir |=  (1<<gpio);
+	else
+		// Clear direction
+		cached_dir &= ~(1<<gpio);
+
+	*mem = cached_dir;
+	return 0;
+}
+
+
+int eim_set_value(int gpio, int value) {
+	uint16_t *mem = eim_get(fpga_w_gpioa_dout);
+	if (!mem)
+		return -1;
+	gpio &= ~GPIO_IS_EIM;
+	if (value)
+		cached_dout |= (1<<gpio);
+	else
+		cached_dout &= ~(1<<gpio);
+	*mem = cached_dout;
+	return 0;
+}
+
+int eim_get_value(int gpio) {
+	uint16_t *mem = eim_get(fpga_r_gpioa_din);
+	if (!mem)
+		return -1;
+	gpio &= ~GPIO_IS_EIM;
+	return (*mem >> gpio)&1;
+}
+
diff --git a/src/sw/eim.h b/src/sw/eim.h
new file mode 100644
index 0000000..ee7def3
--- /dev/null
+++ b/src/sw/eim.h
@@ -0,0 +1,46 @@
+#ifndef __eim_h__
+#define __eim_h__
+
+enum eim_type {
+	fpga_w_test0        = 0x0000,
+	fpga_w_test1        = 0x0002,
+	fpga_w_gpioa_dout   = 0x0010,
+	fpga_w_gpioa_dir    = 0x0012,
+
+	fpga_w_ddr3_p2_cmd  = 0x0020,
+	fpga_w_ddr3_p2_ladr = 0x0022,
+	fpga_w_ddr3_p2_hadr = 0x0024,
+	fpga_w_ddr3_p2_wen  = 0x0026,
+	fpga_w_ddr3_p2_ldat = 0x0028,
+	fpga_w_ddr3_p2_hdat = 0x002A,
+
+	fpga_w_ddr3_p3_cmd  = 0x0030,
+	fpga_w_ddr3_p3_ladr = 0x0032,
+	fpga_w_ddr3_p3_hadr = 0x0034,
+	fpga_w_ddr3_p3_ren  = 0x0036,
+
+	fpga_w_nand_uk_ctl  = 0x0100,
+	fpga_w_nand_power   = 0x0102,
+
+	fpga_r_test0        = 0x1000,
+	fpga_r_test1        = 0x1002,
+	fpga_r_ddr3_cal     = 0x1004,
+	fpga_r_gpioa_din    = 0x1010,
+
+	fpga_r_ddr3_p2_stat = 0x1020,
+	fpga_r_ddr3_p3_stat = 0x1030,
+	fpga_r_ddr3_p3_ldat = 0x1032,
+	fpga_r_ddr3_p3_hdat = 0x1034,
+
+	// every time I read it auto-advances the queue
+	fpga_r_nand_uk_data = 0x1100,
+	fpga_r_nand_uk_stat = 0x1102,
+
+	fpga_r_ddr3_v_minor = 0x1FFC,
+	fpga_r_ddr3_v_major = 0x1FFE,
+
+};
+
+uint16_t *eim_get(enum eim_type type);
+
+#endif // __eim_h__
diff --git a/src/sw/gpio.c b/src/sw/gpio.c
new file mode 100644
index 0000000..43a67f4
--- /dev/null
+++ b/src/sw/gpio.c
@@ -0,0 +1,163 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <stdint.h>
+
+#include "gpio.h"
+
+#define GPIO_PATH "/sys/class/gpio"
+#define EXPORT_PATH GPIO_PATH "/export"
+#define UNEXPORT_PATH GPIO_PATH "/unexport"
+
+static int gpio_is_exported(int gpio) {
+	char gpio_path[256];
+	struct stat buf;
+	int ret;
+	snprintf(gpio_path, sizeof(gpio_path)-1, GPIO_PATH "/gpio%d/direction", gpio);
+	ret = stat(gpio_path, &buf);
+	if (ret == -1)
+		return 0;
+	return 1;
+}
+
+
+static int gpio_export_unexport(char *path, int gpio) {
+	int fd;
+	char str[16];
+	int bytes;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1) {
+		perror("Unable to find GPIO files -- /sys/class/gpio enabled?");
+		return -errno;
+	}
+
+	bytes = snprintf(str, sizeof(str)-1, "%d", gpio) + 1;
+
+	if (-1 == write(fd, str, bytes)) {
+		fprintf(stderr, "Unable to modify gpio%d: %s",
+			gpio, strerror(errno));
+		close(fd);
+		return -errno;
+	}
+
+	close(fd);
+	return 0;
+}
+
+int gpio_export(int gpio) {
+	if (gpio&GPIO_IS_EIM)
+		return 0;
+	if (gpio_is_exported(gpio))
+		return 0;
+	return gpio_export_unexport(EXPORT_PATH, gpio);
+}
+
+int gpio_unexport(int gpio) {
+	if (gpio&GPIO_IS_EIM)
+		return 0;
+	if (!gpio_is_exported(gpio))
+		return 0;
+	return gpio_export_unexport(UNEXPORT_PATH, gpio);
+}
+
+int gpio_set_direction(int gpio, int is_output) {
+	char gpio_path[256];
+	int fd;
+	int ret;
+
+	if (gpio&GPIO_IS_EIM)
+		return eim_set_direction(gpio&(~GPIO_IS_EIM), is_output);
+
+	snprintf(gpio_path, sizeof(gpio_path)-1, GPIO_PATH "/gpio%d/direction", gpio);
+
+	fd = open(gpio_path, O_WRONLY);
+	if (fd == -1) {
+		fprintf(stderr, "Direction file: [%s]\n", gpio_path);
+		perror("Couldn't open direction file for gpio");
+		return -errno;
+	}
+
+	if (is_output)
+		ret = write(fd, "out", 4);
+	else
+		ret = write(fd, "in", 3);
+
+	if (ret == -1) {
+		perror("Couldn't set output direction");
+		close(fd);
+		return -errno;
+	}
+
+	close(fd);
+	return 0;
+}
+
+
+int gpio_set_value(int gpio, int value) {
+	char gpio_path[256];
+	int fd;
+	int ret;
+
+	if (gpio&GPIO_IS_EIM)
+		return eim_set_value(gpio&(~GPIO_IS_EIM), value);
+
+	snprintf(gpio_path, sizeof(gpio_path)-1, GPIO_PATH "/gpio%d/value", gpio);
+
+	fd = open(gpio_path, O_WRONLY);
+	if (fd == -1) {
+		fprintf(stderr, "Value file: [%s]\n", gpio_path);
+		perror("Couldn't open value file for gpio");
+		return -errno;
+	}
+
+	if (value)
+		ret = write(fd, "1", 2);
+	else
+		ret = write(fd, "0", 2);
+
+	if (ret == -1) {
+		fprintf(stderr, "Couldn't set GPIO %d output value: %s\n",
+			gpio, strerror(errno));
+		close(fd);
+		return -errno;
+	}
+
+	close(fd);
+	return 0;
+}
+
+
+int gpio_get_value(int gpio) {
+	char gpio_path[256];
+	int fd;
+
+	if (gpio&GPIO_IS_EIM)
+		return eim_get_value(gpio&(~GPIO_IS_EIM));
+
+	snprintf(gpio_path, sizeof(gpio_path)-1, GPIO_PATH "/gpio%d/value", gpio);
+
+	fd = open(gpio_path, O_RDONLY);
+	if (fd == -1) {
+		perror("Couldn't open value file for gpio");
+		return -errno;
+	}
+
+	if (read(fd, gpio_path, sizeof(gpio_path)) <= 0) {
+		perror("Couldn't get input value");
+		close(fd);
+		return -errno;
+	}
+
+	close(fd);
+
+	return gpio_path[0] != '0';
+}
+
+
diff --git a/src/sw/gpio.h b/src/sw/gpio.h
new file mode 100644
index 0000000..59716c2
--- /dev/null
+++ b/src/sw/gpio.h
@@ -0,0 +1,21 @@
+#ifndef __GPIO_H__
+#define __GPIO_H__
+
+#define GPIO_IS_EIM (0x80000000)
+
+enum gpio_dir {
+	GPIO_IN = 0,
+	GPIO_OUT = 1,
+};
+
+int gpio_export(int gpio);
+int gpio_unexport(int gpio);
+int gpio_set_direction(int gpio, int is_output);
+int gpio_set_value(int gpio, int value);
+int gpio_get_value(int gpio);
+
+
+int eim_set_direction(int gpio, int is_output);
+int eim_set_value(int gpio, int value);
+int eim_get_value(int gpio);
+#endif /* __GPIO_H__ */
diff --git a/src/sw/novena-fpga.c b/src/sw/novena-fpga.c
new file mode 100644
index 0000000..3a7735c
--- /dev/null
+++ b/src/sw/novena-fpga.c
@@ -0,0 +1,1032 @@
+//#define _GNU_SOURCE // for O_DIRECT
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+//#include <sys/types.h>
+//#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include "gpio.h"
+#include "sd.h"
+
+/** Definitions for Novena EIM interface */
+#define CS_PIN    GPIO_IS_EIM | 3
+#define MISO_PIN  GPIO_IS_EIM | 0
+#define CLK_PIN   GPIO_IS_EIM | 4
+#define MOSI_PIN  GPIO_IS_EIM | 5
+#define POWER_PIN 17 //GPIO1_IO17
+
+static int fd = 0;
+static int   *mem_32 = 0;
+static short *mem_16 = 0;
+static char  *mem_8  = 0;
+static int *prev_mem_range = 0;
+
+#define FPGA_REG_OFFSET    0x08040000
+#define FPGA_CS1_REG_OFFSET    0x0C040000
+
+#define FPGA_MAP(x)         ( (x - FPGA_REG_OFFSET) >> 1 )
+#define F(x)                ( (x - FPGA_REG_OFFSET) >> 1 )
+#define F1(x)                ( (x - FPGA_CS1_REG_OFFSET) >> 3 )
+
+#define FPGA_W_TEST0       0x08040000
+#define FPGA_W_TEST1       0x08040002
+
+#define FPGA_W_ROMULATE_CTL 0x08040010
+
+#define FPGA_R_TEST0        0x08041000
+#define FPGA_R_TEST1        0x08041002
+
+#define FPGA_R_ROMULATE_STAT 0x08041100
+#define FPGA_R_ROMULATE_CNT  0x08041102
+
+#define FPGA_R_ROMADR_STAT 0x08041104
+#define FPGA_R_ROMADR_CNT  0x08041106
+#define FPGA_R_ROMADR_DL   0x08041108
+#define FPGA_R_ROMADR_DH   0x0804110A
+
+#define FPGA_R_ROMOUT_STAT 0x0804110C
+#define FPGA_R_ROMOUT_CNT  0x0804110E
+
+#define FPGA_R_DDR3_V_MINOR 0x08041FFC
+#define FPGA_R_DDR3_V_MAJOR 0x08041FFE
+
+// burst access registers (in CS1 bank -- only 64-bit access allowed)
+#define FPGA_WB_LOOP0       0x0C040000
+#define FPGA_WB_LOOP1       0x0C040008
+
+#define FPGA_RB_LOOP0       0x0C041000
+#define FPGA_RB_LOOP1       0x0C041008
+
+#define FPGA_RB_DDR3_RD_DATA 0x0C041100
+#define FPGA_RB_DDR3_RD_STAT 0x0C041108
+
+
+int read_kernel_memory(long offset, int virtualized, int size) {
+  int result;
+
+  int *mem_range = (int *)(offset & ~0xFFFF);
+  if( mem_range != prev_mem_range ) {
+    //        fprintf(stderr, "New range detected.  Reopening at memory range %p\n", mem_range);
+    prev_mem_range = mem_range;
+
+    if(mem_32)
+      munmap(mem_32, 0xFFFF);
+    if(fd)
+      close(fd);
+
+    if(virtualized) {
+      fd = open("/dev/kmem", O_RDWR);
+      if( fd < 0 ) {
+	perror("Unable to open /dev/kmem");
+	fd = 0;
+	return -1;
+      }
+    }
+    else {
+      fd = open("/dev/mem", O_RDWR);
+      if( fd < 0 ) {
+	perror("Unable to open /dev/mem");
+	fd = 0;
+	return -1;
+      }
+    }
+
+    mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset&~0xFFFF);
+    if( -1 == (int)mem_32 ) {
+      perror("Unable to mmap file");
+
+      if( -1 == close(fd) )
+	perror("Also couldn't close file");
+
+      fd=0;
+      return -1;
+    }
+    mem_16 = (short *)mem_32;
+    mem_8  = (char  *)mem_32;
+  }
+
+  int scaled_offset = (offset-(offset&~0xFFFF));
+  //    fprintf(stderr, "Returning offset 0x%08x\n", scaled_offset);
+  if(size==1)
+    result = mem_8[scaled_offset/sizeof(char)];
+  else if(size==2)
+    result = mem_16[scaled_offset/sizeof(short)];
+  else
+    result = mem_32[scaled_offset/sizeof(long)];
+
+  return result;
+}
+
+#define TEST_LEN 32768
+
+void test_fpga(void) {
+  volatile unsigned short *cs0;
+  int i;
+  unsigned short test[TEST_LEN];
+  unsigned short tval;
+  unsigned int iters = 0, errs = 0;
+
+  if(mem_32)
+    munmap(mem_32, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08000000);
+  cs0 = (volatile unsigned short *)mem_32;
+
+  i = 0;
+  while(1) {
+    for( i = 0; i < TEST_LEN; i ++ ) {
+      test[i] = (unsigned short) rand();
+#if 0
+      if( !(i % 16) )
+	printf( "\n" );
+      printf( "%04x ", test[i] );
+#endif
+    }
+
+    for( i = 0; i < TEST_LEN; i ++ ) {
+      cs0[i] = test[i];
+    }
+#if 0
+    printf( "\n. . . . . . . . .\n" );
+#endif
+    
+    for( i = 0; i < TEST_LEN; i ++ ) {
+      iters++;
+#if 0
+      if( !(i % 16) )
+	printf( "\n" );
+      printf( "%04x ", cs0[i] );
+#else
+      tval = cs0[i];
+      if( test[i] != tval ) {
+	printf( "\nFail at %d: wrote %04x, got %04x/%04x\n", i, test[i], tval, cs0[i] );
+	errs++;
+      } else {
+#if 0
+	if( !(i % 16) )
+	  printf( "\n" );
+	printf( "%04x ", test[i] );
+#endif
+      }
+#endif
+    }
+#if 0
+    printf( "\n---------------\n" );
+#endif
+    if( !(iters % 0x100000) ) {
+      printf( "%d iters, %d errs\n", iters, errs );
+      fflush(stdout);
+    }
+  }
+    
+}
+
+void romulate(int onoff) {
+  volatile unsigned short *cs0;
+
+  if(mem_16)
+    munmap(mem_16, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_16 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08040000);
+  cs0 = (volatile unsigned short *)mem_16;
+
+  if( onoff )
+    cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x1;
+  else
+    cs0[F(FPGA_W_ROMULATE_CTL)] &= 0xFFFE;
+
+}
+
+void rom_uk(int mode) {
+  volatile unsigned short *cs0;
+  int i, tot;
+
+  if(mem_16)
+    munmap(mem_16, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_16 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08040000);
+  cs0 = (volatile unsigned short *)mem_16;
+
+  if( mode == 0 ) {  // status update
+    printf( "ROMulator UK FIFO reports %d entries, full: %d, over: %d, empty: %d\n", 
+	    cs0[F(FPGA_R_ROMULATE_CNT)] & 0x3FF, 
+	    cs0[F(FPGA_R_ROMULATE_STAT)] & 0x200 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMULATE_STAT)] & 0x400 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMULATE_STAT)] & 0x100 ? 1 : 0 );
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 1 ) { // reset count
+    printf( "resetting ROMulator UK FIFO\n" );
+    cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x4;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+    cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x4;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 2 ) { // read out
+    tot = cs0[F(FPGA_R_ROMULATE_CNT)] & 0x3FF; 
+    i = 0;
+    printf( "Reading out %d entries\n", tot );
+    while( tot > 0 ) {
+      cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x2; // pulse read
+      cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x2;
+      printf( "%d: %02x\n", i, cs0[F(FPGA_R_ROMULATE_STAT)] & 0xFF );
+      i++;
+      tot--;
+    }
+  }
+
+}
+
+void rom_adr(int mode) {
+  volatile unsigned short *cs0;
+  int i, tot;
+
+  if(mem_16)
+    munmap(mem_16, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_16 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08040000);
+  cs0 = (volatile unsigned short *)mem_16;
+
+  if( mode == 0 ) {  // status update
+    printf( "ROMulator ADR FIFO reports %d entries, full: %d, over: %d, empty: %d\n", 
+	    cs0[F(FPGA_R_ROMADR_CNT)] & 0x3FF, 
+	    cs0[F(FPGA_R_ROMADR_STAT)] & 0x200 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMADR_STAT)] & 0x400 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMADR_STAT)] & 0x100 ? 1 : 0 );
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 1 ) { // reset count
+    printf( "resetting ROMulator ADR FIFO\n" );
+    cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x10;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+    cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x10;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 2 ) { // read out
+    tot = cs0[F(FPGA_R_ROMADR_CNT)] & 0x3FF; 
+    i = 1;
+    printf( "Reading out %d entries\n", i );
+    while( tot > 0 ) {
+      cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x8; // pulse read
+      cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x8;
+      printf( "%d: %06x\n", i, ((cs0[F(FPGA_R_ROMADR_DH)] & 0xFF) << 16) | 
+	      (cs0[F(FPGA_R_ROMADR_DL)] & 0xFFFF));
+      i++;
+      tot--;
+    }
+  }
+
+}
+
+
+void rom_out(int mode) {
+  volatile unsigned short *cs0;
+  int i, tot;
+
+  if(mem_16)
+    munmap(mem_16, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_16 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08040000);
+  cs0 = (volatile unsigned short *)mem_16;
+
+  if( mode == 0 ) {  // status update
+    printf( "ROMulator UK FIFO reports %d entries, full: %d, over: %d, empty: %d\n", 
+	    cs0[F(FPGA_R_ROMOUT_CNT)] & 0x3FF, 
+	    cs0[F(FPGA_R_ROMOUT_STAT)] & 0x200 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMOUT_STAT)] & 0x400 ? 1 : 0,
+	    cs0[F(FPGA_R_ROMOUT_STAT)] & 0x100 ? 1 : 0 );
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 1 ) { // reset count
+    printf( "resetting ROMulator out FIFO\n" );
+    cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x40;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+    cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x40;
+    printf( "ROMulator control register: %04x\n", cs0[F(FPGA_W_ROMULATE_CTL)] );
+  } else if( mode == 2 ) { // read out
+    tot = cs0[F(FPGA_R_ROMOUT_CNT)] & 0x3FF; 
+    i = 0;
+    printf( "Reading out %d entries\n", tot );
+    while( tot > 0 ) {
+      cs0[F(FPGA_W_ROMULATE_CTL)] |= 0x20; // pulse read
+      cs0[F(FPGA_W_ROMULATE_CTL)] &= ~0x20;
+      printf( "%d: %02x\n", i, cs0[F(FPGA_R_ROMOUT_STAT)] & 0xFF );
+      i++;
+      tot--;
+    }
+  }
+
+}
+
+int write_kernel_memory(long offset, long value, int virtualized, int size) {
+  int old_value = read_kernel_memory(offset, virtualized, size);
+  int scaled_offset = (offset-(offset&~0xFFFF));
+  if(size==1)
+    mem_8[scaled_offset/sizeof(char)]   = value;
+  else if(size==2)
+    mem_16[scaled_offset/sizeof(short)] = value;
+  else
+    mem_32[scaled_offset/sizeof(long)]  = value;
+  return old_value;
+}
+
+void print_usage(char *progname) {
+  printf("Usage:\n"
+        "%s [-h]\n"
+        "\t-h  This help message\n"
+        "\t-s  Set up FPGA comms parameter\n"
+	"\t-t  Test FPGA memory interface\n"
+	 "", progname);
+}
+
+
+//static inline int swab(int arg) {
+//  return ((arg&0xff)<<24) | ((arg&0xff00)<<8) | ((arg&0xff0000)>>8) | ((arg&0xff000000)>>24);
+//}
+
+void setup_fpga() {
+  int i;
+  printf( "setting up EIM CS0 (register interface) pads and configuring timing\n" );
+  // set up pads to be mapped to EIM
+  for( i = 0; i < 16; i++ ) {
+    write_kernel_memory( 0x20e0114 + i*4, 0x0, 0, 4 );  // mux mapping
+    write_kernel_memory( 0x20e0428 + i*4, 0xb0b1, 0, 4 ); // pad strength config'd for a 100MHz rate 
+  }
+
+  // mux mapping
+  write_kernel_memory( 0x20e046c - 0x314, 0x0, 0, 4 ); // BCLK
+  write_kernel_memory( 0x20e040c - 0x314, 0x0, 0, 4 ); // CS0
+  write_kernel_memory( 0x20e0410 - 0x314, 0x0, 0, 4 ); // CS1
+  write_kernel_memory( 0x20e0414 - 0x314, 0x0, 0, 4 ); // OE
+  write_kernel_memory( 0x20e0418 - 0x314, 0x0, 0, 4 ); // RW
+  write_kernel_memory( 0x20e041c - 0x314, 0x0, 0, 4 ); // LBA
+  write_kernel_memory( 0x20e0468 - 0x314, 0x0, 0, 4 ); // WAIT
+  write_kernel_memory( 0x20e0408 - 0x314, 0x0, 0, 4 ); // A16
+  write_kernel_memory( 0x20e0404 - 0x314, 0x0, 0, 4 ); // A17
+  write_kernel_memory( 0x20e0400 - 0x314, 0x0, 0, 4 ); // A18
+
+  // pad strength
+  write_kernel_memory( 0x20e046c, 0xb0b1, 0, 4 ); // BCLK
+  write_kernel_memory( 0x20e040c, 0xb0b1, 0, 4 ); // CS0
+  write_kernel_memory( 0x20e0410, 0xb0b1, 0, 4 ); // CS1
+  write_kernel_memory( 0x20e0414, 0xb0b1, 0, 4 ); // OE
+  write_kernel_memory( 0x20e0418, 0xb0b1, 0, 4 ); // RW
+  write_kernel_memory( 0x20e041c, 0xb0b1, 0, 4 ); // LBA
+  write_kernel_memory( 0x20e0468, 0xb0b1, 0, 4 ); // WAIT
+  write_kernel_memory( 0x20e0408, 0xb0b1, 0, 4 ); // A16
+  write_kernel_memory( 0x20e0404, 0xb0b1, 0, 4 ); // A17
+  write_kernel_memory( 0x20e0400, 0xb0b1, 0, 4 ); // A18
+
+  write_kernel_memory( 0x020c4080, 0xcf3, 0, 4 ); // ungate eim slow clocks
+
+  // rework timing for sync use
+  // 0011 0  001 1   001    0   001 00  00  1  011  1    0   1   1   1   1   1   1
+  // PSZ  WP GBC AUS CSREC  SP  DSZ BCS BCD WC BL   CREP CRE RFL WFL MUM SRD SWR CSEN
+  //
+  // PSZ = 0011  64 words page size
+  // WP = 0      (not protected)
+  // GBC = 001   min 1 cycles between chip select changes
+  // AUS = 0     address shifted according to port size
+  // CSREC = 001 min 1 cycles between CS, OE, WE signals
+  // SP = 0      no supervisor protect (user mode access allowed)
+  // DSZ = 001   16-bit port resides on DATA[15:0]
+  // BCS = 00    0 clock delay for burst generation
+  // BCD = 00    divide EIM clock by 0 for burst clock
+  // WC = 1      write accesses are continuous burst length
+  // BL = 011    32 word memory wrap length
+  // CREP = 1    non-PSRAM, set to 1
+  // CRE = 0     CRE is disabled
+  // RFL = 1     fixed latency reads
+  // WFL = 1     fixed latency writes
+  // MUM = 1     multiplexed mode enabled
+  // SRD = 1     synch reads
+  // SWR = 1     synch writes
+  // CSEN = 1    chip select is enabled
+
+  //  write_kernel_memory( 0x21b8000, 0x5191C0B9, 0, 4 );
+  write_kernel_memory( 0x21b8000, 0x31910BBF, 0, 4 );
+
+  // EIM_CS0GCR2   
+  //  MUX16_BYP_GRANT = 1
+  //  ADH = 1 (1 cycles)
+  //  0x1001
+  write_kernel_memory( 0x21b8004, 0x1000, 0, 4 );
+
+
+  // EIM_CS0RCR1   
+  // 00 000101 0 000   0   000   0 000 0 000 0 000 0 000
+  //    RWSC     RADVA RAL RADVN   OEA   OEN   RCSA  RCSN
+  // RWSC 000101    5 cycles for reads to happen
+  //
+  // 0000 0111 0000   0011   0000 0000 0000 0000
+  //  0    7     0     3      0  0    0    0
+  // 0000 0101 0000   0000   0 000 0 000 0 000 0 000
+//  write_kernel_memory( 0x21b8008, 0x05000000, 0, 4 );
+//  write_kernel_memory( 0x21b8008, 0x0A024000, 0, 4 );
+  write_kernel_memory( 0x21b8008, 0x09014000, 0, 4 );
+  // EIM_CS0RCR2  
+  // 0000 0000 0   000 00 00 0 010  0 001 
+  //           APR PAT    RL   RBEA   RBEN
+  // APR = 0   mandatory because MUM = 1
+  // PAT = XXX because APR = 0
+  // RL = 00   because async mode
+  // RBEA = 000  these match RCSA/RCSN from previous field
+  // RBEN = 000
+  // 0000 0000 0000 0000 0000  0000
+  write_kernel_memory( 0x21b800c, 0x00000000, 0, 4 );
+
+  // EIM_CS0WCR1
+  // 0   0    000100 000   000   000  000  010 000 000  000
+  // WAL WBED WWSC   WADVA WADVN WBEA WBEN WEA WEN WCSA WCSN
+  // WAL = 0       use WADVN
+  // WBED = 0      allow BE during write
+  // WWSC = 000100 4 write wait states
+  // WADVA = 000   same as RADVA
+  // WADVN = 000   this sets WE length to 1 (this value +1)
+  // WBEA = 000    same as RBEA
+  // WBEN = 000    same as RBEN
+  // WEA = 010     2 cycles between beginning of access and WE assertion
+  // WEN = 000     1 cycles to end of WE assertion
+  // WCSA = 000    cycles to CS assertion
+  // WCSN = 000    cycles to CS negation
+  // 1000 0111 1110 0001 0001  0100 0101 0001
+  // 8     7    E    1    1     4    5    1
+  // 0000 0111 0000 0100 0000  1000 0000 0000
+  // 0      7    0   4    0     8    0     0
+  // 0000 0100 0000 0000 0000  0100 0000 0000
+  //  0    4    0    0     0    4     0    0
+
+  write_kernel_memory( 0x21b8010, 0x09080800, 0, 4 );
+  //  write_kernel_memory( 0x21b8010, 0x02040400, 0, 4 );
+
+  // EIM_WCR
+  // BCM = 1   free-run BCLK
+  // GBCD = 0  don't divide the burst clock
+  write_kernel_memory( 0x21b8090, 0x701, 0, 4 );
+
+  // EIM_WIAR 
+  // ACLK_EN = 1
+  write_kernel_memory( 0x21b8094, 0x10, 0, 4 );
+
+  printf( "done.\n" );
+}
+
+void setup_fpga_cs1() { 
+  int i;
+  printf( "setting up EIM CS1 (burst interface) pads and configuring timing\n" );
+  // ASSUME: setup_fpga() is already called to configure gpio mux setting.
+  // this just gets the pads set to high-speed mode
+
+  // set up pads to be mapped to EIM
+  for( i = 0; i < 16; i++ ) {
+    write_kernel_memory( 0x20e0428 + i*4, 0xb0f1, 0, 4 ); // pad strength config'd for a 200MHz rate 
+  }
+
+  // pad strength
+  write_kernel_memory( 0x20e046c, 0xb0f1, 0, 4 ); // BCLK
+  //  write_kernel_memory( 0x20e040c, 0xb0b1, 0, 4 ); // CS0
+  write_kernel_memory( 0x20e0410, 0xb0f1, 0, 4 ); // CS1
+  write_kernel_memory( 0x20e0414, 0xb0f1, 0, 4 ); // OE
+  write_kernel_memory( 0x20e0418, 0xb0f1, 0, 4 ); // RW
+  write_kernel_memory( 0x20e041c, 0xb0f1, 0, 4 ); // LBA
+  write_kernel_memory( 0x20e0468, 0xb0f1, 0, 4 ); // WAIT
+  write_kernel_memory( 0x20e0408, 0xb0f1, 0, 4 ); // A16
+  write_kernel_memory( 0x20e0404, 0xb0f1, 0, 4 ); // A17
+  write_kernel_memory( 0x20e0400, 0xb0f1, 0, 4 ); // A18
+
+  // EIM_CS1GCR1   
+  // 0011 0  001 1   001    0   001 00  00  1  011  1    0   1   1   1   1   1   1
+  // PSZ  WP GBC AUS CSREC  SP  DSZ BCS BCD WC BL   CREP CRE RFL WFL MUM SRD SWR CSEN
+  //
+  // PSZ = 0011  64 words page size
+  // WP = 0      (not protected)
+  // GBC = 001   min 1 cycles between chip select changes
+  // AUS = 0     address shifted according to port size
+  // CSREC = 001 min 1 cycles between CS, OE, WE signals
+  // SP = 0      no supervisor protect (user mode access allowed)
+  // DSZ = 001   16-bit port resides on DATA[15:0]
+  // BCS = 00    0 clock delay for burst generation
+  // BCD = 00    divide EIM clock by 0 for burst clock
+  // WC = 1      write accesses are continuous burst length
+  // BL = 011    32 word memory wrap length
+  // CREP = 1    non-PSRAM, set to 1
+  // CRE = 0     CRE is disabled
+  // RFL = 1     fixed latency reads
+  // WFL = 1     fixed latency writes
+  // MUM = 1     multiplexed mode enabled
+  // SRD = 1     synch reads
+  // SWR = 1     synch writes
+  // CSEN = 1    chip select is enabled
+
+  // 0101 0111 1111    0001 1100  0000  1011   1   0   0   1
+  // 0x5  7    F        1   C     0     B    9
+
+  // 0101 0001 1001    0001 1100  0000  1011   1001
+  // 5     1    9       1    c     0     B      9
+
+  // 0011 0001 1001    0001 0000  1011  1011   1111
+
+  write_kernel_memory( 0x21b8000 + 0x18, 0x31910BBF, 0, 4 );
+
+  // EIM_CS1GCR2   
+  //  MUX16_BYP_GRANT = 1
+  //  ADH = 0 (0 cycles)
+  //  0x1000
+  write_kernel_memory( 0x21b8004 + 0x18, 0x1000, 0, 4 );
+
+
+  // 9 cycles is total length of read
+  // 2 cycles for address
+  // +4 more cycles for first data to show up
+
+  // EIM_CS1RCR1   
+  // 00 000100 0 000   0   001   0 010 0 000 0 000 0 000
+  //    RWSC     RADVA RAL RADVN   OEA   OEN   RCSA  RCSN
+  //
+  // 00 001001 0 000   0   001   0 110 0 000 0 000 0 000
+  //    RWSC     RADVA RAL RADVN   OEA   OEN   RCSA  RCSN
+  //
+  // 0000 0111 0000   0011   0000 0000 0000 0000
+  //  0    7     0     3      0  0    0    0
+  // 0000 0101 0000   0000   0 000 0 000 0 000 0 000
+//  write_kernel_memory( 0x21b8008, 0x05000000, 0, 4 );
+  // 0000 0011 0000   0001   0001 0000 0000 0000
+
+  // 0000 1001 0000   0001   0110 0000 0000 0000
+  // 
+  write_kernel_memory( 0x21b8008 + 0x18, 0x09014000, 0, 4 );
+
+  // EIM_CS1RCR2  
+  // 0000 0000 0   000 00 00 0 010  0 001 
+  //           APR PAT    RL   RBEA   RBEN
+  // APR = 0   mandatory because MUM = 1
+  // PAT = XXX because APR = 0
+  // RL = 00   because async mode
+  // RBEA = 000  these match RCSA/RCSN from previous field
+  // RBEN = 000
+  // 0000 0000 0000 0000 0000  0000
+  write_kernel_memory( 0x21b800c + 0x18, 0x00000200, 0, 4 );
+
+  // EIM_CS1WCR1
+  // 0   0    000010 000   001   000  000  010 000 000  000
+  // WAL WBED WWSC   WADVA WADVN WBEA WBEN WEA WEN WCSA WCSN
+  // WAL = 0       use WADVN
+  // WBED = 0      allow BE during write
+  // WWSC = 000100 4 write wait states
+  // WADVA = 000   same as RADVA
+  // WADVN = 000   this sets WE length to 1 (this value +1)
+  // WBEA = 000    same as RBEA
+  // WBEN = 000    same as RBEN
+  // WEA = 010     2 cycles between beginning of access and WE assertion
+  // WEN = 000     1 cycles to end of WE assertion
+  // WCSA = 000    cycles to CS assertion
+  // WCSN = 000    cycles to CS negation
+  // 1000 0111 1110 0001 0001  0100 0101 0001
+  // 8     7    E    1    1     4    5    1
+  // 0000 0111 0000 0100 0000  1000 0000 0000
+  // 0      7    0   4    0     8    0     0
+  // 0000 0100 0000 0000 0000  0100 0000 0000
+  //  0    4    0    0     0    4     0    0
+
+  // 0000 0010 0000 0000 0000  0010 0000 0000
+  // 0000 0010 0000 0100 0000  0100 0000 0000
+
+  write_kernel_memory( 0x21b8010 + 0x18, 0x02040400, 0, 4 );
+
+  // EIM_WCR
+  // BCM = 1   free-run BCLK
+  // GBCD = 0  divide the burst clock by 1
+  // add timeout watchdog after 1024 bclk cycles
+  write_kernel_memory( 0x21b8090, 0x701, 0, 4 );
+
+  // EIM_WIAR 
+  // ACLK_EN = 1
+  write_kernel_memory( 0x21b8094, 0x10, 0, 4 );
+
+  printf( "resetting CS0 space to 64M and enabling 64M CS1 space.\n" );
+  write_kernel_memory( 0x20e0004, 
+		       (read_kernel_memory(0x20e0004, 0, 4) & 0xFFFFFFC0) |
+		       0x1B, 0, 4);
+
+  printf( "done.\n" );
+}
+
+
+#define ROM_SIZE  32768    // size in 16-bit words
+
+void rom_dump() {
+  int i;
+  volatile unsigned short *cs0;
+
+  if(mem_32)
+    munmap(mem_32, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08010000);
+  cs0 = (volatile unsigned short *)mem_32;
+  
+  for( i = 0; i < ROM_SIZE; i++ ) {
+    if( (i % 16) == 0 ) {
+      printf( "\n%04x: ", i*2 );
+    }
+    printf( "%04hx ", cs0[i] );
+  }
+  printf( "\n" );
+}
+
+
+void rom_upload(int infile) {
+  int bytes, i;
+  unsigned short data[ROM_SIZE];
+  volatile unsigned short *cs0;
+  int errors = 0;
+
+  for( i = 0; i < ROM_SIZE; i++ ) 
+    data[i] = 0;
+
+  bytes = read(infile, data, ROM_SIZE * sizeof(unsigned short));
+  if( bytes != ROM_SIZE * sizeof(unsigned short) ) {
+    printf( "Note: read only %d shorts from input file\n", bytes );
+  }
+  
+  if(mem_16)
+    munmap(mem_16, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_16 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08010000);
+  cs0 = (volatile unsigned short *)mem_16;
+
+  // write it in
+  for( i = 0; i < ROM_SIZE; i++ ) {
+    cs0[i] = data[i];
+  }
+  // check it
+  for( i = 0, errors = 0; i < ROM_SIZE; i++ ) {
+    if( cs0[i] != data[i] ) {
+      errors++;
+      printf( "%04x: %04x , %04x\n", i, cs0[i], data[i] );
+    }
+  }
+  if( errors )
+    printf( "Found %d errors on readback check\n", errors );
+  else
+    printf( "ROM uploaded successfully\n" );
+}
+
+void rom_upload2(int infile) {  // upload with ECC holes
+  int bytes, i;
+  unsigned short data[ROM_SIZE];
+  volatile unsigned short *cs0;
+
+  printf( "uploading a ROM with holes for ECC\n" );
+  for( i = 0; i < ROM_SIZE; i++ ) 
+    data[i] = 0;
+
+  bytes = read(infile, data, ROM_SIZE * sizeof(unsigned short));
+  if( bytes != ROM_SIZE * sizeof(unsigned short) ) {
+    printf( "Note: read only %d shorts from input file\n", bytes );
+  }
+  
+  if(mem_32)
+    munmap(mem_32, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08000000);
+  cs0 = (volatile unsigned short *)mem_32;
+
+  // clear memory
+  for( i = 0; i < ROM_SIZE; i++ ) {
+    cs0[i] = 0xFFFF;
+  }
+
+  for( i = 0; i + ((i / 1024) * 32) < ROM_SIZE; i++ ) {
+    cs0[i + ((i / 1024) * 32)] = data[i];
+  }
+
+}
+
+void rom_verify(int infile) {
+  int bytes, i;
+  unsigned short data[ROM_SIZE];
+  volatile unsigned short *cs0;
+  int errors = 0;
+
+  for( i = 0; i < ROM_SIZE; i++ ) 
+    data[i] = 0;
+
+  bytes = read(infile, data, ROM_SIZE * sizeof(unsigned short));
+  if( bytes != ROM_SIZE  * sizeof(unsigned short)) {
+    printf( "Note: read only %d shorts from input file\n", bytes );
+  }
+  
+  if(mem_32)
+    munmap(mem_32, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return;
+  }
+
+  mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x08000000);
+  cs0 = (volatile unsigned short *)mem_32;
+
+  // check it
+  for( i = 0, errors = 0; i < ROM_SIZE; i++ ) {
+    if( cs0[i] != data[i] )
+      errors++;
+  }
+  if( errors )
+    printf( "Found %d errors on readback check\n", errors );
+  else
+    printf( "No errors found on readback compare\n" );
+}
+
+
+int testcs1() {
+  unsigned long long i;
+  unsigned long long retval;
+  volatile unsigned long long *cs1;
+  unsigned long long testbuf[16];
+  unsigned long long origbuf[16];
+
+  setup_fpga_cs1();
+
+  if(mem_32)
+    munmap(mem_32, 0xFFFF);
+  if(fd)
+    close(fd);
+
+  fd = open("/dev/mem", O_RDWR);
+  if( fd < 0 ) {
+    perror("Unable to open /dev/mem");
+    fd = 0;
+    return 0;
+  }
+
+  mem_32 = mmap(0, 0xffff, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x0C040000);
+  cs1 = (unsigned long long *)mem_32;
+
+  for( i = 0; i < 2; i++ ) {
+    testbuf[i] = i | (i + 64) << 16 | (i + 8) << 32 | (i + 16) << 48 ;
+  }
+  testbuf[0] = 0xdeadbeeffeedfaceLL;
+  //  testbuf[0] = 0x0LL;
+  testbuf[1] = 0x5555aaaa33339999LL;
+
+
+  retval = 0;
+
+  //  memcpy( (void *) cs1, testbuf, 2*8);
+  origbuf[0] = testbuf[0];
+  origbuf[1] = testbuf[1];
+  cs1[0] = testbuf[0];
+  cs1[1] = testbuf[1];
+
+  for( i = 0; i < 2; i++ ) {
+    testbuf[i] = 0;
+  }
+
+  memcpy(testbuf,(void *) cs1, 8);
+  memcpy(&(testbuf[1]),(void *)cs1 + 8, 8);
+  
+  for( i = 0; i < 2; i++ ) {
+    printf( "%lld: %016llx\n", i, origbuf[i] );
+    printf( "%lld: %016llx\n", i, testbuf[i] );
+  }
+
+  //  cs1[0] = 0xdeadbeeffeedfaceLL;
+  //  cs1[1] = 0x12456789abcdef01LL;
+  //  cs1[2] = 0xf0f0f0f0f0f0f0f0LL;
+  //  cs1[3] = 0x12345555aaaa9876LL;
+
+  return retval;
+}
+
+
+int main(int argc, char **argv) {
+  unsigned int a1, a2;
+  int infile = -1; 
+
+  char *prog = argv[0];
+  argv++;
+  argc--;
+
+  setup_fpga();
+
+  if(!argc) {
+    print_usage(prog);
+    return 1;
+  }
+
+  while(argc > 0) {
+    if(!strcmp(*argv, "-h")) {
+      argc--;
+      argv++;
+      print_usage(prog);
+    } 
+    else if(!strcmp(*argv, "-s")) {
+      argc--;
+      argv++;
+      setup_fpga();
+      //      setup_fpga_cs1();
+    }
+    else if(!strcmp(*argv, "-t")) {
+      argc--;
+      argv++;
+      test_fpga();
+    }
+    else if(!strcmp(*argv, "-v")) {
+      argc--;
+      argv++;
+      printf( "FPGA version code: %04hx.%04hx\n", 
+	      read_kernel_memory(FPGA_R_DDR3_V_MINOR, 0, 2),
+	      read_kernel_memory(FPGA_R_DDR3_V_MAJOR, 0, 2) );
+    }
+    else if(!strcmp(*argv, "-rd")) { // dump ROM contents
+      argc--;
+      argv++;
+      rom_dump();
+    }
+    else if(!strcmp(*argv, "-rul")) { // upload a ROM image
+      argc--;
+      argv++;
+      infile = open(*argv, O_RDONLY );
+      if( infile == -1 ) {
+	printf("Unable to open %s\n", *argv );
+	return 1;
+      }
+      argc--;
+      argv++;
+      rom_upload(infile);
+      close(infile);
+    }
+    else if(!strcmp(*argv, "-rcheck")) { // chek a ROM image
+      argc--;
+      argv++;
+      infile = open(*argv, O_RDONLY );
+      if( infile == -1 ) {
+	printf("Unable to open %s\n", *argv );
+	return 1;
+      }
+      argc--;
+      argv++;
+      rom_verify(infile);
+      close(infile);
+    }
+    else if(!strcmp(*argv, "-rul2")) { // upload a ROM image with ECC holes
+      argc--;
+      argv++;
+      infile = open(*argv, O_RDONLY );
+      if( infile == -1 ) {
+	printf("Unable to open %s\n", *argv );
+	return 1;
+      }
+      argc--;
+      argv++;
+      rom_upload2(infile);
+      close(infile);
+    }
+
+    else if(!strcmp(*argv, "-uk")) {
+      argc--;
+      argv++;
+      rom_uk(2);
+    }
+    else if(!strcmp(*argv, "-uk_reset")) {
+      argc--;
+      argv++;
+      rom_uk(1);
+    }
+    else if(!strcmp(*argv, "-uk_stat")) {
+      argc--;
+      argv++;
+      rom_uk(0);
+    }
+
+    else if(!strcmp(*argv, "-adr")) {
+      argc--;
+      argv++;
+      rom_adr(2);
+    }
+    else if(!strcmp(*argv, "-adr_reset")) {
+      argc--;
+      argv++;
+      rom_adr(1);
+    }
+    else if(!strcmp(*argv, "-adr_stat")) {
+      argc--;
+      argv++;
+      rom_adr(0);
+    }
+
+    else if(!strcmp(*argv, "-out")) {
+      argc--;
+      argv++;
+      rom_out(2);
+    }
+    else if(!strcmp(*argv, "-out_reset")) {
+      argc--;
+      argv++;
+      rom_out(1);
+    }
+    else if(!strcmp(*argv, "-out_stat")) {
+      argc--;
+      argv++;
+      rom_out(0);
+    }
+
+    else if(!strcmp(*argv, "-testcs1")) {
+      argc--;
+      argv++;
+      testcs1();
+    }
+    else if(!strcmp(*argv, "-romulate")) {
+      argc--;
+      argv++;
+      romulate(1);
+    }
+    else if(!strcmp(*argv, "-bypass")) {
+      argc--;
+      argv++;
+      romulate(0);
+    }
+    else {
+      print_usage(prog);
+      return 1;
+    }
+  }
+
+  return 0;
+}



More information about the Commits mailing list