From 1f7314552530d976377a661434b48540673229fd Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Tue, 14 Mar 2017 23:12:36 -0700
Subject: [PATCH] [VERILOG] VPI RAM controler to access python memory.

---
 include/tvm/runtime/c_runtime_api.h           |   7 +
 include/tvm/runtime/packed_func.h             |   2 +
 python/tvm/__init__.py                        |   2 +-
 python/tvm/_ctypes/_ndarray.py                |  13 +-
 python/tvm/addon/verilog.py                   |  14 +-
 python/tvm/ndarray.py                         |   2 +-
 src/codegen/verilog/vpi_device_api.cc         | 319 ++++++++++++++++++
 src/codegen/verilog/vpi_session.cc            | 171 +++++-----
 src/codegen/verilog/vpi_session.h             |  48 ++-
 src/pass/make_api.cc                          |   2 +-
 src/runtime/c_runtime_api.cc                  |   2 +-
 src/runtime/cpu_device_api.cc                 |   7 +-
 src/runtime/device_api.h                      |   1 +
 tests/python/unittest/test_runtime_ndarray.py |   5 +-
 tests/verilog/test_counter.py                 |   1 -
 tests/verilog/test_loop.py                    |   1 -
 tests/verilog/test_vpi_ram.py                 | 131 +++++++
 tests/verilog/test_vpi_ram.v                  |  52 +++
 verilog/tvm_vpi.cc                            |  58 ++--
 verilog/tvm_vpi.h                             |  17 +-
 verilog/tvm_vpi_ram.v                         |  49 +++
 21 files changed, 785 insertions(+), 119 deletions(-)
 create mode 100644 src/codegen/verilog/vpi_device_api.cc
 create mode 100644 tests/verilog/test_vpi_ram.py
 create mode 100644 tests/verilog/test_vpi_ram.v
 create mode 100644 verilog/tvm_vpi_ram.v
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index 9b10f16008c2..91f33d495cc8 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -38,6 +38,13 @@
 TVM_EXTERN_C {
 /*! \brief type of array index. */
 typedef int64_t tvm_index_t;
+
+/*! \brief Extension device types in TVM */
+typedef enum {
+  /*! \brief Simulated on board RAM */
+  kVPI = 9
+} TVMDeviceExtType;
+
 /*!
  * \brief The type code in TVMType
  * \note TVMType is used in two places.
diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h
index 592b418e5918..0d5064dcff00 100644
--- a/include/tvm/runtime/packed_func.h
+++ b/include/tvm/runtime/packed_func.h
@@ -274,6 +274,7 @@ class TVMArgValue : public TVMPODValue_ {
     return value_.v_type;
   }
   operator PackedFunc() const {
+    if (type_code_ == kNull) return PackedFunc();
     TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle);
     return *ptr<PackedFunc>();
   }
@@ -350,6 +351,7 @@ class TVMRetValue : public TVMPODValue_ {
     return value_.v_type;
   }
   operator PackedFunc() const {
+    if (type_code_ == kNull) return PackedFunc();
     TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle);
     return *ptr<PackedFunc>();
   }
diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
index 4e294bdafdf2..51cb4a179436 100644
--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -15,7 +15,7 @@
 from . import module
 
 from . import ndarray as nd
-from .ndarray import cpu, gpu, opencl, cl
+from .ndarray import cpu, gpu, opencl, cl, vpi
 
 from ._base import TVMError
 from .api import *
diff --git a/python/tvm/_ctypes/_ndarray.py b/python/tvm/_ctypes/_ndarray.py
index 05f300ef4062..72ba6402c150 100644
--- a/python/tvm/_ctypes/_ndarray.py
+++ b/python/tvm/_ctypes/_ndarray.py
@@ -18,7 +18,8 @@ class TVMContext(ctypes.Structure):
     MASK2STR = {
         1 : 'cpu',
         2 : 'gpu',
-        4 : 'opencl'
+        4 : 'opencl',
+        9 : 'vpi'
     }
     def __init__(self, device_id, device_type):
         super(TVMContext, self).__init__()
@@ -76,6 +77,16 @@ def opencl(dev_id=0):
     """
     return TVMContext(dev_id, 4)
 
+def vpi(dev_id=0):
+    """Construct a VPI simulated device
+
+    Parameters
+    ----------
+    dev_id : int, optional
+        The integer device id
+    """
+    return TVMContext(dev_id, 9)
+
 
 def numpyasarray(np_data):
     """Return a TVMArray representation of a numpy array.
diff --git a/python/tvm/addon/verilog.py b/python/tvm/addon/verilog.py
index 1321c966e2fd..9ec36fa880ae 100644
--- a/python/tvm/addon/verilog.py
+++ b/python/tvm/addon/verilog.py
@@ -17,6 +17,7 @@ def __init__(self, handle):
         super(VPISession, self).__init__(handle)
         self.proc = None
         self.execpath = None
+        self.yield_callbacks = []
 
     def __del__(self):
         self.proc.kill()
@@ -47,6 +48,8 @@ def __getattr__(self, name):
 
     def yield_until_posedge(self):
         """Yield until next posedge"""
+        for f in self.yield_callbacks:
+            f()
         return _api_internal._vpi_SessYield(self)
 
     def shutdown(self):
@@ -222,7 +225,16 @@ def session(file_name):
     env['TVM_HREAD_PIPE'] = str(read_host)
     env['TVM_HWRITE_PIPE'] = str(write_host)
 
-    proc = subprocess.Popen(cmd, env=env, close_fds=False)
+    try:
+        # close_fds does not work well for all python3
+        # Use pass_fds instead.
+        # pylint: disable=unexpected-keyword-arg
+        pass_fds = (read_device, write_device, read_host, write_host)
+        proc = subprocess.Popen(cmd, pass_fds=pass_fds, env=env)
+    except TypeError:
+        # This is effective for python2
+        proc = subprocess.Popen(cmd, close_fds=False, env=env)
+
     # close device side pipe
     os.close(read_device)
     os.close(write_device)
diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py
index 1d56e60b6ce9..4a88968143a8 100644
--- a/python/tvm/ndarray.py
+++ b/python/tvm/ndarray.py
@@ -7,7 +7,7 @@
 import numpy as _np
 
 from ._ctypes._ndarray import TVMContext, TVMType, NDArrayBase
-from ._ctypes._ndarray import cpu, gpu, opencl, empty, sync
+from ._ctypes._ndarray import cpu, gpu, opencl, vpi, empty, sync
 from ._ctypes._ndarray import _init_ndarray_module
 from ._ctypes._function import Function
 
diff --git a/src/codegen/verilog/vpi_device_api.cc b/src/codegen/verilog/vpi_device_api.cc
new file mode 100644
index 000000000000..6471f2e1d9eb
--- /dev/null
+++ b/src/codegen/verilog/vpi_device_api.cc
@@ -0,0 +1,319 @@
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file vpi_device.cc
+ * \brief Simulated VPI RAM device.
+ */
+#include <tvm/runtime/registry.h>
+#include <tvm/packed_func_ext.h>
+#include <cstdlib>
+#include <unordered_map>
+#include <map>
+#include <queue>
+#include "../../runtime/device_api.h"
+#include "./vpi_session.h"
+
+namespace tvm {
+namespace codegen {
+
+/*! \brief Simulated device ram */
+class VPIDeviceAPI : public runtime::DeviceAPI {
+ public:
+  VPIDeviceAPI() {
+    static const size_t kAllocAlign = 32U;
+    const char* s_ram_size = getenv("TVM_VPI_RAM_SIZE_MB");
+    // 16 MB ram.
+    int ram_size = 32;
+    if (s_ram_size != nullptr) {
+      ram_size = atoi(s_ram_size);
+    }
+    ram_.resize(ram_size << 17);
+    ram_head_ = kAllocAlign;
+    ram_max_ = ram_.size() * sizeof(int64_t);
+    LOG(INFO) << "Initialize VPI simulated ram " << ram_size << "MB ...";
+  }
+  // convert address to real address
+  void* RealAddr(const void* addr, size_t size) const {
+    int64_t ptr = reinterpret_cast<int64_t>(addr);
+    CHECK_LE(ptr + size, ram_max_)
+        << "VPI: Illegal memory access";
+    return (char*)(&ram_[0]) + ptr;  // NOLINT(*)
+  }
+  void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
+    static const size_t kAllocAlign = 32U;
+    // always align to 32 bytes at least.
+    CHECK_LE(alignment, kAllocAlign);
+    alignment = kAllocAlign;
+    // always allocate block with aligned size.
+    size += alignment - (size % alignment);
+    // This is not thread safe, but fine for simulation.
+    auto it = free_blocks_.lower_bound(size);
+    if (it != free_blocks_.end()) {
+      size_t head = it->second;
+      free_blocks_.erase(it);
+      Block& b = block_map_.at(head);
+      CHECK(b.is_free);
+      b.is_free = false;
+      return reinterpret_cast<void*>(head);
+    } else {
+      CHECK_EQ(ram_head_ % kAllocAlign, 0U);
+      Block b;
+      b.size = size;
+      b.is_free = false;
+      CHECK_LE(ram_head_ + size, ram_max_)
+          << "VPI: Out of memory";
+      block_map_[ram_head_] = b;
+      void* ret = reinterpret_cast<void*>(ram_head_);
+      ram_head_ += size;
+      return ret;
+    }
+  }
+  void FreeDataSpace(TVMContext ctx, void* ptr) final {
+    size_t head = reinterpret_cast<size_t>(ptr);
+    Block& b = block_map_.at(head);
+    b.is_free = true;
+    free_blocks_.insert({b.size, head});
+  }
+  void CopyDataFromTo(const void* from,
+                      void* to,
+                      size_t size,
+                      TVMContext ctx_from,
+                      TVMContext ctx_to,
+                      TVMStreamHandle stream) final {
+    if (static_cast<int>(ctx_from.device_type) == kVPI) {
+      from = RealAddr(from, size);
+    }
+    if (static_cast<int>(ctx_to.device_type) == kVPI) {
+      to = RealAddr(to, size);
+    }
+    memcpy(to, from, size);
+  }
+  void StreamSync(TVMContext ctx, TVMStreamHandle stream) final {
+  }
+  static VPIDeviceAPI* Global() {
+    static VPIDeviceAPI inst;
+    return &inst;
+  }
+
+ private:
+  // allocator block for reuse
+  struct Block {
+    // The size of the block
+    size_t size;
+    // Whether this is already freed.
+    bool is_free{true};
+  };
+  // head -> blocks
+  std::unordered_map<size_t, Block> block_map_;
+  // size -> free heads.
+  std::multimap<size_t, size_t> free_blocks_;
+  // top of the ram
+  size_t ram_head_, ram_max_;
+  // The ram space.
+  std::vector<int64_t> ram_;
+};
+
+/*!
+ * \brief Memory controler for VPI memory.
+ */
+class VPIMemoryController {
+ public:
+  // Initialize the FSM.
+  void Init(VPIHandle module) {
+    device_ = VPIDeviceAPI::Global();
+    // intiatialize the connections
+    in_clk_ = module["clk"];
+    in_rst_ = module["rst"];
+    in_read_dequeue_ = module["in_read_dequeue"];
+    in_write_enable_ = module["in_write_enable"];
+    in_write_data_ = module["in_write_data"];
+    // output registers
+    out_reg_read_data_ = module["out_reg_read_data"];
+    out_reg_read_valid_ = module["out_reg_read_valid"];
+    out_reg_write_full_ = module["out_reg_write_full"];
+    // memory control signal
+    ctrl_read_req_ = module["ctrl_read_req"];
+    ctrl_read_addr_ = module["ctrl_read_addr"];
+    ctrl_read_size_ = module["ctrl_read_size"];
+    ctrl_write_req_ = module["ctrl_write_req"];
+    ctrl_write_addr_ = module["ctrl_write_addr"];
+    ctrl_write_size_ = module["ctrl_write_size"];
+    // The bit and bytes;
+    size_t read_bits =  out_reg_read_data_.size();
+    size_t write_bits =  in_write_data_.size();
+    CHECK_EQ(read_bits % 8U, 0)
+        << "Read/write unit have to be multiple of 8 bit(bytes)";
+    CHECK_EQ(write_bits % 8U, 0)
+        << "Read/write unit have to be multiple of 8 bit(bytes)";
+    read_unit_bytes_ = read_bits / 8U;
+    write_unit_bytes_ = write_bits / 8U;
+  }
+  // Callback at post-edge.
+  void AtPosEedge() {
+    // reset
+    if (in_rst_.get_int()) {
+      CHECK_EQ(pending_read_.size, 0U);
+      CHECK_EQ(pending_write_.size, 0U);
+      CHECK(read_tasks_.empty());
+      CHECK(write_tasks_.empty());
+      out_reg_write_full_.put_int(1);
+      out_reg_read_valid_.put_int(0);
+      return;
+    }
+    // read write tasks
+    if (in_read_dequeue_.get_int() || !out_reg_read_valid_.get_int()) {
+      ReadFromFIFO();
+    }
+    // update write full
+    if (in_write_enable_.get_int()) {
+      CHECK(!out_reg_write_full_.get_int());
+      WriteToFIFO();
+    }
+    if (pending_write_.size || write_tasks_.size()) {
+      out_reg_write_full_.put_int(0);
+    } else {
+      out_reg_write_full_.put_int(1);
+    }
+    // Control tasks
+    if (ctrl_read_req_.get_int()) {
+      FIFOTask tsk;
+      tsk.addr = reinterpret_cast<char*>(ctrl_read_addr_.get_int());
+      tsk.size = static_cast<size_t>(ctrl_read_size_.get_int());
+      read_tasks_.push(tsk);
+    }
+    // Control tasks
+    if (ctrl_write_req_.get_int()) {
+      FIFOTask tsk;
+      tsk.addr = reinterpret_cast<char*>(ctrl_write_addr_.get_int());
+      tsk.size = static_cast<size_t>(ctrl_write_size_.get_int());
+      write_tasks_.push(tsk);
+    }
+  }
+
+ private:
+  // The FIFO tasks
+  struct FIFOTask {
+    char* addr{nullptr};
+    size_t size{0};
+  };
+  // handle dequeue event
+  void ReadFromFIFO() {
+    if (pending_read_.size == 0) {
+      if (!read_tasks_.empty()) {
+        pending_read_ = read_tasks_.front();
+        read_tasks_.pop();
+        // translate to real memory addr
+        pending_read_.addr = static_cast<char*>(
+            device_->RealAddr(
+                pending_read_.addr, pending_read_.size));
+      }
+    }
+    if (pending_read_.size != 0) {
+      // The size to be read
+      size_t nread = std::min(pending_read_.size, read_unit_bytes_);
+      // Read from the data
+      size_t nwords = (read_unit_bytes_ + 3) / 4;
+      wbuf_.resize(nwords);
+      vbuf_.resize(nwords);
+      memcpy(&wbuf_[0], pending_read_.addr, nread);
+      for (size_t i = 0; i < nwords; ++i) {
+        vbuf_[i].aval = wbuf_[i];
+        vbuf_[i].bval = 0;
+      }
+      out_reg_read_data_.put_vec(vbuf_);
+      // Update the pointer
+      pending_read_.size -= nread;
+      pending_read_.addr += nread;
+      // read into the vector
+      out_reg_read_valid_.put_int(1);
+    } else {
+      out_reg_read_valid_.put_int(0);
+    }
+  }
+  // handle write event
+  void WriteToFIFO() {
+    if (pending_write_.size == 0) {
+      if (!write_tasks_.empty()) {
+        pending_write_ = write_tasks_.front();
+        write_tasks_.pop();
+        // translate to real memory addr
+        pending_write_.addr = static_cast<char*>(
+            device_->RealAddr(
+                pending_write_.addr, pending_write_.size));
+      }
+    }
+    if (pending_write_.size != 0) {
+      // write to the ram.
+      size_t nwrite = std::min(pending_write_.size, write_unit_bytes_);
+      size_t nwords = (write_unit_bytes_ + 3) / 4;
+      in_write_data_.get_vec(&vbuf_);
+      CHECK_EQ(vbuf_.size(), nwords);
+      wbuf_.resize(nwords);
+      for (size_t i = 0; i < nwords; ++i) {
+        wbuf_[i] = vbuf_[i].aval;
+        CHECK_EQ(vbuf_[i].bval, 0)
+            << "Write indetermined value to RAM";
+      }
+      memcpy(pending_write_.addr, &wbuf_[0], nwrite);
+      // Update the pointer
+      pending_write_.size -= nwrite;
+      pending_write_.addr += nwrite;
+    }
+  }
+  // Device API
+  VPIDeviceAPI* device_{nullptr};
+  // Input clock and reset
+  VPIHandle in_clk_;
+  VPIHandle in_rst_;
+  // Read FIFO signal
+  VPIHandle in_read_dequeue_;
+  // Write FIFO signal
+  VPIHandle in_write_enable_;
+  VPIHandle in_write_data_;
+  // Read memory controler signals
+  VPIHandle ctrl_read_req_;
+  VPIHandle ctrl_read_addr_;
+  VPIHandle ctrl_read_size_;
+  // Write memory controler signal signals
+  VPIHandle ctrl_write_req_;
+  VPIHandle ctrl_write_addr_;
+  VPIHandle ctrl_write_size_;
+  // Read FIFO outputs
+  VPIHandle out_reg_read_data_;
+  VPIHandle out_reg_read_valid_;
+  // Write FIFO outputs
+  VPIHandle out_reg_write_full_;
+  // Size of current pending read.
+  FIFOTask pending_read_;
+  FIFOTask pending_write_;
+  // The read/write task queues.
+  std::queue<FIFOTask> read_tasks_;
+  std::queue<FIFOTask> write_tasks_;
+  // Unit bytes for read/writing
+  size_t read_unit_bytes_;
+  size_t write_unit_bytes_;
+  // Temporal buffers.
+  std::vector<int32_t> wbuf_;
+  std::vector<vpi::VPIVecVal> vbuf_;
+};
+
+TVM_REGISTER_GLOBAL(_device_api_vpi)
+.set_body([](runtime::TVMArgs args, runtime::TVMRetValue* rv) {
+    runtime::DeviceAPI* ptr = VPIDeviceAPI::Global();
+    *rv = static_cast<void*>(ptr);
+  });
+
+TVM_REGISTER_GLOBAL(_vpi_module_tvm_vpi_ram)
+.set_body([](runtime::TVMArgs args, runtime::TVMRetValue* rv) {
+    VPIHandle m = args[0];
+    std::shared_ptr<VPIMemoryController> ctrl =
+        std::make_shared<VPIMemoryController>();
+    ctrl->Init(m);
+    LOG(INFO) << "Hook up " << m.name()
+              << " to simulated memory controller...";
+    PackedFunc pf([ctrl](const runtime::TVMArgs&, runtime::TVMRetValue*) {
+        ctrl->AtPosEedge();
+      });
+    *rv = pf;
+  });
+}  // namespace codegen
+}  // namespace tvm
diff --git a/src/codegen/verilog/vpi_session.cc b/src/codegen/verilog/vpi_session.cc
index 14cc815debd1..7a3190b5beec 100644
--- a/src/codegen/verilog/vpi_session.cc
+++ b/src/codegen/verilog/vpi_session.cc
@@ -11,20 +11,19 @@ namespace codegen {
 
 using namespace vpi;
 
-/*! \brief Container for session. */
-class VPISessionNode : public Node {
+// helper class to get the node.
+class VPISessionEntry {
  public:
   // Whether in control.
   bool in_control{false};
   // Internal reader and writer.
   common::Pipe reader;
   common::Pipe writer;
-
   // internal constructor
-  VPISessionNode(int h_pipe_read, int h_pipe_write)
+  VPISessionEntry(int h_pipe_read, int h_pipe_write)
       : reader(h_pipe_read), writer(h_pipe_write) {
   }
-  ~VPISessionNode() {
+  ~VPISessionEntry() {
     if (in_control) {
       VPIReturnCode cd;
       writer.Write(kShutDown);
@@ -33,40 +32,11 @@ class VPISessionNode : public Node {
     reader.Close();
     writer.Close();
   }
-  // visit all attributes
-  void VisitAttrs(AttrVisitor* v) final {
-  }
   void ReadExpect(VPIReturnCode rcode) {
     VPIReturnCode code;
     CHECK(reader.Read(&code));
     CHECK_EQ(code, rcode) << "Error in simulation";
   }
-
-  static constexpr const char* _type_key = "VPISession";
-  TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node);
-};
-
-/*! \brief Container for handle */
-class VPIHandleNode : public Node {
- public:
-  // The internal session.
-  VPISession sess;
-  // Internal handle
-  VPIRawHandle handle;
-
-  void VisitAttrs(AttrVisitor* v) final {
-    v->Visit("sess", &sess);
-  }
-  static VPIHandle make(const VPISession& sess, VPIRawHandle handle) {
-    std::shared_ptr<VPIHandleNode> n =
-        std::make_shared<VPIHandleNode>();
-    n->sess = sess;
-    n->handle = handle;
-    return VPIHandle(n);
-  }
-
-  static constexpr const char* _type_key = "VPIHandle";
-  TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node);
 };
 
 // Inline implementations
@@ -77,34 +47,99 @@ inline VPIHandleNode* VPIHandle::get() const {
   return static_cast<VPIHandleNode*>(node_.get());
 }
 
-VPISession VPISession::make(int h_pipe_read, int h_pipe_write) {
-  std::shared_ptr<VPISessionNode> n = std::make_shared<VPISessionNode>(
-      h_pipe_read, h_pipe_write);
-  n->ReadExpect(kPosEdgeTrigger);
-  n->in_control = true;
-  return VPISession(n);
+VPIHandle VPIHandleCreate(
+    const std::shared_ptr<VPISessionEntry>& sess,
+    VPIRawHandle handle) {
+  std::shared_ptr<VPIHandleNode> n = std::make_shared<VPIHandleNode>();
+  n->sess = sess;
+  n->handle = handle;
+  return VPIHandle(n);
 }
 
-VPIHandle VPISession::operator[](const std::string& name) const {
-  return GetByName(name, nullptr);
-}
-
-VPIHandle VPISession::GetByName(const std::string& name, VPIRawHandle handle) const {
-  VPISessionNode* n = get();
+VPIHandle GetHandleByName(
+    const std::shared_ptr<VPISessionEntry>& sess,
+    const std::string& name,
+    VPIRawHandle handle,
+    bool allow_undefined) {
+  VPISessionEntry* n = sess.get();
   CHECK(n->in_control);
   n->writer.Write(kGetHandleByName);
   n->writer.Write(name);
   n->writer.Write(handle);
   n->ReadExpect(kSuccess);
   CHECK(n->reader.Read(&handle));
-  CHECK(handle != nullptr)
-      << "Cannot find handle with name=" << name;
-  return VPIHandleNode::make(*this, handle);
+  if (handle != nullptr) {
+    return VPIHandleCreate(sess, handle);
+  } else {
+    CHECK(allow_undefined)
+        << "Cannot find handle with name=" << name;
+    return VPIHandle();
+  }
+}
+
+std::string VPIGetStrProp(VPIHandleNode* h, int code) {
+  VPISessionEntry* n = h->sess.get();
+  CHECK(n->in_control);
+  n->writer.Write(kGetStrProp);
+  n->writer.Write(code);
+  n->writer.Write(h->handle);
+  n->ReadExpect(kSuccess);
+  std::string str;
+  CHECK(n->reader.Read(&str));
+  return str;
+}
+
+int VPIGetIntProp(VPIHandleNode* h, int code) {
+  VPISessionEntry* n = h->sess.get();
+  CHECK(n->in_control);
+  n->writer.Write(kGetIntProp);
+  n->writer.Write(code);
+  n->writer.Write(h->handle);
+  n->ReadExpect(kSuccess);
+  int value;
+  CHECK(n->reader.Read(&value));
+  return value;
+}
+
+VPISession VPISession::make(int h_pipe_read, int h_pipe_write) {
+  std::shared_ptr<VPISessionNode> n = std::make_shared<VPISessionNode>();
+  n->sess = std::make_shared<VPISessionEntry>(h_pipe_read, h_pipe_write);
+  n->sess->in_control = true;
+  VPISession sess(n);
+  // The custom module handles
+  std::vector<VPIRawHandle> mod_handles;
+  n->sess->reader.Read(&mod_handles);
+  n->sess->ReadExpect(kPosEdgeTrigger);
+  // start Initialize the callbacks
+  for (VPIRawHandle raw_h : mod_handles) {
+    VPIHandle h = VPIHandleCreate(n->sess, raw_h);
+    CHECK_EQ(VPIGetIntProp(h.get(), kVPIType), kVPIModule)
+        << "Expect pass modules to $tvm_session after clk";
+    std::string def = VPIGetStrProp(h.get(), kVPIDefName);
+    std::string callback_name = "_vpi_module_" + def;
+    const PackedFunc* f = runtime::Registry::Get(callback_name);
+    CHECK(f != nullptr)
+        << "Cannot find definition for tvm vpi module " << def;
+    PackedFunc cb = (*f)(h);
+    n->posedge_end_callbacks.push_back(cb);
+  }
+  return sess;
+}
+
+VPIHandle VPISession::operator[](const std::string& name) const {
+  return GetHandleByName(get()->sess, name, nullptr, false);
+}
+VPIHandle VPISession::GetByName(const std::string& name,
+                                bool allow_undefined) const {
+  return GetHandleByName(get()->sess, name, nullptr, true);
 }
 
 void VPISession::yield() {
-  VPISessionNode* n = get();
+  VPISessionEntry* n = get()->sess.get();
   CHECK(n->in_control);
+  for (const PackedFunc& f : get()->posedge_end_callbacks) {
+    f();
+  }
   n->writer.Write(kYield);
   n->ReadExpect(kSuccess);
   n->in_control = false;
@@ -113,7 +148,7 @@ void VPISession::yield() {
 }
 
 void VPISession::shutdown() {
-  VPISessionNode* n = get();
+  VPISessionEntry* n = get()->sess.get();
   if (n->in_control) {
     n->writer.Write(kShutDown);
     n->ReadExpect(kSuccess);
@@ -122,20 +157,12 @@ void VPISession::shutdown() {
 }
 
 int VPIHandle::size() const {
-  VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
-  CHECK(n->in_control);
-  n->writer.Write(kGetSize);
-  n->writer.Write(h->handle);
-  n->ReadExpect(kSuccess);
-  int value;
-  CHECK(n->reader.Read(&value));
-  return value;
+  return VPIGetIntProp(get(), kVPISize);
 }
 
 void VPIHandle::put_int(int value) {
   VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
+  VPISessionEntry* n = h->sess.get();
   CHECK(n->in_control);
   n->writer.Write(kPutInt32);
   n->writer.Write(h->handle);
@@ -145,7 +172,7 @@ void VPIHandle::put_int(int value) {
 
 int VPIHandle::get_int() const {
   VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
+  VPISessionEntry* n = h->sess.get();
   CHECK(n->in_control);
   n->writer.Write(kGetInt32);
   n->writer.Write(h->handle);
@@ -156,20 +183,12 @@ int VPIHandle::get_int() const {
 }
 
 std::string VPIHandle::name() const {
-  VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
-  CHECK(n->in_control);
-  n->writer.Write(kGetName);
-  n->writer.Write(h->handle);
-  n->ReadExpect(kSuccess);
-  std::string str;
-  CHECK(n->reader.Read(&str));
-  return str;
+  return VPIGetStrProp(get(), kVPIFullName);
 }
 
 void VPIHandle::put_vec(const std::vector<VPIVecVal>& vec) const {
   VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
+  VPISessionEntry* n = h->sess.get();
   CHECK(n->in_control);
   n->writer.Write(kPutVec);
   n->writer.Write(h->handle);
@@ -179,17 +198,17 @@ void VPIHandle::put_vec(const std::vector<VPIVecVal>& vec) const {
 
 void VPIHandle::get_vec(std::vector<VPIVecVal>* vec) const {
   VPIHandleNode* h = get();
-  VPISessionNode* n = h->sess.get();
+  VPISessionEntry* n = h->sess.get();
   CHECK(n->in_control);
-  n->writer.Write(kPutVec);
+  n->writer.Write(kGetVec);
   n->writer.Write(h->handle);
   n->ReadExpect(kSuccess);
-  CHECK(n->reader.Read(&vec));
+  CHECK(n->reader.Read(vec));
 }
 
 VPIHandle VPIHandle::operator[](const std::string& name) const {
   VPIHandleNode* h = get();
-  return h->sess.GetByName(name, h->handle);
+  return GetHandleByName(h->sess, name, h->handle, false);
 }
 
 // API registration
diff --git a/src/codegen/verilog/vpi_session.h b/src/codegen/verilog/vpi_session.h
index bc648adf7cba..88a7f2f1906e 100644
--- a/src/codegen/verilog/vpi_session.h
+++ b/src/codegen/verilog/vpi_session.h
@@ -14,10 +14,14 @@
 
 namespace tvm {
 namespace codegen {
+
 // node containers
 class VPISessionNode;
 class VPIHandleNode;
 class VPIHandle;
+class VPISessionEntry;
+
+using runtime::PackedFunc;
 
 /*! \brief Environment */
 class VPISession : public NodeRef {
@@ -29,6 +33,12 @@ class VPISession : public NodeRef {
    * \param name The name of the handle.
    */
   VPIHandle operator[](const std::string& name) const;
+  /*!
+   * \brief Get handle by name.
+   * \param name The name of the handle.
+   * \param allow_undefined whether allow undefined
+   */
+  VPIHandle GetByName(const std::string& name, bool allow_undefined) const;
   /*!
    * \brief Yield control back to the simulator
    *  Block until next cycle.
@@ -46,12 +56,7 @@ class VPISession : public NodeRef {
   static VPISession make(int h_pipe_read, int h_pipe_write);
   // Internal methods.
   using ContainerType = VPISessionNode;
-
- private:
-  friend class VPIHandle;
   inline VPISessionNode* get() const;
-  // Get handle by name
-  VPIHandle GetByName(const std::string& name, vpi::VPIRawHandle handle) const;
 };
 
 /*! \brief VPI Handle */
@@ -91,10 +96,39 @@ class VPIHandle : public NodeRef {
   void get_vec(std::vector<vpi::VPIVecVal>* vec) const;
   // Internal methods
   using ContainerType = VPIHandleNode;
-
- private:
   inline VPIHandleNode* get() const;
 };
+
+/*! \brief Container for session. */
+class VPISessionNode : public Node {
+ public:
+  // internal session.
+  std::shared_ptr<VPISessionEntry> sess;
+  // callbacks at pos edge end.
+  std::vector<PackedFunc> posedge_end_callbacks;
+
+  // visit all attributes
+  void VisitAttrs(AttrVisitor* v) final {
+  }
+  static constexpr const char* _type_key = "VPISession";
+  TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node);
+};
+
+/*! \brief Container for handle */
+class VPIHandleNode : public Node {
+ public:
+  // internal session.
+  std::shared_ptr<VPISessionEntry> sess;
+  // Internal handle
+  vpi::VPIRawHandle handle;
+
+  void VisitAttrs(AttrVisitor* v) final {
+  }
+
+  static constexpr const char* _type_key = "VPIHandle";
+  TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node);
+};
+
 }  // namespace codegen
 }  // namespace tvm
 #endif  // TVM_CODEGEN_VERILOG_VPI_SESSION_H_
diff --git a/src/pass/make_api.cc b/src/pass/make_api.cc
index ead3d38d8aec..33c2a93615fa 100644
--- a/src/pass/make_api.cc
+++ b/src/pass/make_api.cc
@@ -106,7 +106,7 @@ LoweredFunc MakeAPI(Stmt body,
 
   for (int i = 0; i < static_cast<int>(api_args.size()); ++i) {
     Var v_arg = f_arg_decl(i);
-    if (i < static_cast<size_t>(num_packed_args)) {
+    if (i < num_packed_args) {
       seq_init.emplace_back(LetStmt::make(
           v_arg, f_arg_value(v_arg.type(), i), nop));
     } else {
diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc
index 63a324027294..c0b876f772ff 100644
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -23,7 +23,7 @@ namespace runtime {
 
 class DeviceAPIManager {
  public:
-  static const int kMaxDeviceAPI = 16;
+  static const int kMaxDeviceAPI = 32;
   // Get API
   static DeviceAPI* Get(TVMContext ctx) {
     return Global()->GetAPI(ctx.device_type);
diff --git a/src/runtime/cpu_device_api.cc b/src/runtime/cpu_device_api.cc
index 3204c4e27f5e..7cb29bd2e99f 100644
--- a/src/runtime/cpu_device_api.cc
+++ b/src/runtime/cpu_device_api.cc
@@ -1,11 +1,7 @@
 /*!
  *  Copyright (c) 2016 by Contributors
- * \file device_api_gpu.h
- * \brief GPU specific API
+ * \file cpu_device_api.cc
  */
-#ifndef TVM_RUNTIME_DEVICE_API_CPU_H_
-#define TVM_RUNTIME_DEVICE_API_CPU_H_
-
 #include <dmlc/logging.h>
 #include <tvm/runtime/registry.h>
 #include <cstdlib>
@@ -58,4 +54,3 @@ TVM_REGISTER_GLOBAL(_device_api_cpu)
   });
 }  // namespace runtime
 }  // namespace tvm
-#endif  // TVM_RUNTIME_DEVICE_API_CPU_H_
diff --git a/src/runtime/device_api.h b/src/runtime/device_api.h
index 9d626075d484..699655b0c4b3 100644
--- a/src/runtime/device_api.h
+++ b/src/runtime/device_api.h
@@ -64,6 +64,7 @@ inline std::string DeviceName(DLDeviceType type) {
     case kCPU: return "cpu";
     case kGPU: return "gpu";
     case kOpenCL: return "opencl";
+    case kVPI: return "vpi";
     default: LOG(FATAL) << "unknown type =" << type; return "Unknown";
   }
 }
diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py
index 1c3e5f70d178..0dc6e3f1f93d 100644
--- a/tests/python/unittest/test_runtime_ndarray.py
+++ b/tests/python/unittest/test_runtime_ndarray.py
@@ -5,7 +5,10 @@ def enabled_ctx_list():
     if tvm.module.enabled("opencl"):
         tvm.module.init_opencl()
 
-    ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)), ('cl', tvm.opencl(0))]
+    ctx_list = [('cpu', tvm.cpu(0)),
+                ('gpu', tvm.gpu(0)),
+                ('cl', tvm.opencl(0)),
+                ('cpu', tvm.vpi(0))]
     ctx_list = [x[1] for x in ctx_list if tvm.module.enabled(x[0])]
     return ctx_list
 
diff --git a/tests/verilog/test_counter.py b/tests/verilog/test_counter.py
index 795b2d691a24..340e27afcf26 100644
--- a/tests/verilog/test_counter.py
+++ b/tests/verilog/test_counter.py
@@ -1,5 +1,4 @@
 import tvm
-import os
 from tvm.addon import verilog
 
 def test_counter():
diff --git a/tests/verilog/test_loop.py b/tests/verilog/test_loop.py
index 2ba19f771cc4..65a7ffaa1651 100644
--- a/tests/verilog/test_loop.py
+++ b/tests/verilog/test_loop.py
@@ -1,5 +1,4 @@
 import tvm
-import os
 from tvm.addon import verilog
 
 def test_loop():
diff --git a/tests/verilog/test_vpi_ram.py b/tests/verilog/test_vpi_ram.py
new file mode 100644
index 000000000000..43160ef315f4
--- /dev/null
+++ b/tests/verilog/test_vpi_ram.py
@@ -0,0 +1,131 @@
+import tvm
+import numpy as np
+from tvm.addon import verilog
+
+class FIFOReader(object):
+    """Auxiliary class to read from FIFO """
+    def __init__(self, read_data, read_valid):
+        self.read_data = read_data
+        self.read_valid = read_valid
+        self.data = []
+
+    def __call__(self):
+        if self.read_valid.get_int():
+            self.data.append(self.read_data.get_int())
+
+class FIFOWriter(object):
+    """Auxiliary class to write to FIFO """
+    def __init__(self, write_data, write_enable, write_full, data):
+        self.write_data = write_data
+        self.write_enable = write_enable
+        self.write_full = write_full
+        self.data = data
+
+    def __call__(self):
+        if self.data and not self.write_full.get_int():
+            self.write_enable.put_int(1)
+            self.write_data.put_int(int(self.data[0]))
+            del self.data[0]
+        else:
+            self.write_enable.put_int(0)
+
+
+def test_ram_read():
+    n = 10
+    # context for VPI RAM
+    ctx = tvm.vpi(0)
+    a_np = np.arange(n).astype('int8')
+    a = tvm.nd.array(a_np, ctx)
+
+    # head ptr of a
+    a_ptr = int(a.handle[0].data)
+    sess = verilog.session([
+        verilog.find_file("test_vpi_ram.v"),
+        verilog.find_file("tvm_vpi_ram.v")
+    ])
+    rst = sess.main.rst
+    read_data = sess.main.read_data
+    read_valid = sess.main.read_valid
+    read_dequeue = sess.main.read_dequeue
+    ctrl_read_req = sess.main.ctrl_read_req
+    ctrl_read_addr = sess.main.ctrl_read_addr
+    ctrl_read_size = sess.main.ctrl_read_size
+    rst.put_int(1)
+    sess.yield_until_posedge()
+    rst.put_int(0)
+    # hook up reader
+    reader = FIFOReader(read_data, read_valid)
+    sess.yield_callbacks.append(reader)
+    # request read
+    ctrl_read_req.put_int(1)
+    ctrl_read_addr.put_int(a_ptr)
+    ctrl_read_size.put_int(a.shape[0])
+
+    sess.yield_until_posedge()
+    # second read request
+    ctrl_read_addr.put_int(a_ptr + 2)
+    ctrl_read_size.put_int(a.shape[0] - 2)
+
+    sess.yield_until_posedge()
+    ctrl_read_req.put_int(0)
+    read_dequeue.put_int(1)
+
+    # yield until read is done
+    for i in range(a.shape[0] * 2):
+        sess.yield_until_posedge()
+    # check if result matches
+    r = np.concatenate((a_np, a_np[2:]))
+    np.testing.assert_equal(np.array(reader.data), r)
+
+
+def test_ram_write():
+    n = 10
+    # read from offset
+    offset = 2
+    # context for VPI RAM
+    ctx = tvm.vpi(0)
+    a_np = np.zeros(n).astype('int8')
+    a = tvm.nd.array(a_np, ctx)
+    w_data = range(2, n)
+    r_data = np.array(w_data, dtype='int8')
+
+    # head ptr of a
+    a_ptr = int(a.handle[0].data)
+
+    sess = verilog.session([
+        verilog.find_file("test_vpi_ram.v"),
+        verilog.find_file("tvm_vpi_ram.v")
+    ])
+    rst = sess.main.rst
+    write_data = sess.main.write_data
+    write_enable = sess.main.write_enable
+    write_full = sess.main.write_full
+    ctrl_write_req = sess.main.ctrl_write_req
+    ctrl_write_addr = sess.main.ctrl_write_addr
+    ctrl_write_size = sess.main.ctrl_write_size
+
+    rst.put_int(1)
+    sess.yield_until_posedge()
+    rst.put_int(0)
+    # hook up writeer
+    writer = FIFOWriter(write_data, write_enable, write_full, w_data)
+
+    sess.yield_callbacks.append(writer)
+    # request write
+    ctrl_write_req.put_int(1)
+    ctrl_write_addr.put_int(a_ptr + offset)
+    ctrl_write_size.put_int(a.shape[0] - offset)
+
+    sess.yield_until_posedge()
+    ctrl_write_req.put_int(0)
+
+    # yield until write is done
+    for i in range(a.shape[0]+2):
+        sess.yield_until_posedge()
+
+    # check if result matches
+    np.testing.assert_equal(a.asnumpy()[2:],r_data)
+
+
+if __name__ == "__main__":
+    test_ram_write()
diff --git a/tests/verilog/test_vpi_ram.v b/tests/verilog/test_vpi_ram.v
new file mode 100644
index 000000000000..8398d3dabd36
--- /dev/null
+++ b/tests/verilog/test_vpi_ram.v
@@ -0,0 +1,52 @@
+`include "tvm_marcos.v"
+
+module main();
+   parameter PER = 10;
+   parameter WIDTH = 8;
+   reg clk;
+   reg rst;
+   reg read_dequeue;
+   reg write_enable;
+   reg [WIDTH-1:0] write_data;
+   reg             ctrl_read_req;
+   reg [31:0]      ctrl_read_addr;
+   reg [31:0]      ctrl_read_size;
+   reg             ctrl_write_req;
+   reg [31:0]      ctrl_write_addr;
+   reg [31:0]      ctrl_write_size;
+   wire [WIDTH-1:0] read_data;
+   wire             read_valid;
+   wire             write_full;
+
+
+   always begin
+      #(PER/2) clk =~ clk;
+   end
+
+   tvm_vpi_ram #
+     (
+      .READ_WIDTH(WIDTH),
+      .WRITE_WIDTH(WIDTH))
+   myram
+     (
+      .clk(clk),
+      .rst(rst),
+      .in_read_dequeue(read_dequeue),
+      .in_write_enable(write_enable),
+      .in_write_data(write_data),
+      .ctrl_read_req(ctrl_read_req),
+      .ctrl_read_addr(ctrl_read_addr),
+      .ctrl_read_size(ctrl_read_size),
+      .ctrl_write_req(ctrl_write_req),
+      .ctrl_write_addr(ctrl_write_addr),
+      .ctrl_write_size(ctrl_write_size),
+      .out_read_data(read_data),
+      .out_read_valid(read_valid),
+      .out_write_full(write_full)
+      );
+
+   initial begin
+      // pass myram to session to hook it up with simulation
+      $tvm_session(clk, myram);
+   end
+endmodule
diff --git a/verilog/tvm_vpi.cc b/verilog/tvm_vpi.cc
index d1a41e0bdd2d..3c654b095fb3 100644
--- a/verilog/tvm_vpi.cc
+++ b/verilog/tvm_vpi.cc
@@ -13,8 +13,17 @@
 
 namespace tvm {
 namespace vpi {
+// standard consistency checks
 static_assert(sizeof(vpiHandle) == sizeof(VPIRawHandle),
-              "VPI handle condition");
+              "VPI standard");
+// type codes
+static_assert(vpiModule == kVPIModule, "VPI standard");
+// Property code
+static_assert(vpiType == kVPIType, "VPI standard");
+static_assert(vpiFullName == kVPIFullName, "VPI standard");
+static_assert(vpiSize == kVPISize, "VPI standard");
+static_assert(vpiDefName == kVPIDefName, "VPI standard");
+
 // IPC client for VPI
 class IPCClient {
  public:
@@ -26,8 +35,11 @@ class IPCClient {
     vpiHandle argv = vpi_handle(vpiSysTfCall, 0);
     vpiHandle arg_iter = vpi_iterate(vpiArgument, argv);
     clock_ = vpi_scan(arg_iter);
-    CHECK(vpi_scan(arg_iter) == nullptr)
-        << "tvm_session can only take in one clock";
+    std::vector<VPIRawHandle> handles;
+    while (vpiHandle h = vpi_scan(arg_iter)) {
+      handles.push_back(h);
+    }
+    writer_.Write(handles);
     PutInt(clock_, 0);
   }
   int Callback() {
@@ -74,12 +86,21 @@ class IPCClient {
           writer_.Write(handle);
           break;
         }
-        case kGetName: {
+        case kGetStrProp: {
+          CHECK(reader_.Read(&value));
+          CHECK(reader_.Read(&handle));
+          std::string prop = vpi_get_str(
+              value, static_cast<vpiHandle>(handle));
+          writer_.Write(kSuccess);
+          writer_.Write(prop);
+          break;
+        }
+        case kGetIntProp: {
+          CHECK(reader_.Read(&value));
           CHECK(reader_.Read(&handle));
-          std::string name = vpi_get_str(
-              vpiFullName, static_cast<vpiHandle>(handle));
+          value = vpi_get(value, static_cast<vpiHandle>(handle));
           writer_.Write(kSuccess);
-          writer_.Write(name);
+          writer_.Write(value);
           break;
         }
         case kGetInt32: {
@@ -97,13 +118,6 @@ class IPCClient {
           writer_.Write(kSuccess);
           break;
         }
-        case kGetSize: {
-          CHECK(reader_.Read(&handle));
-          value = vpi_get(vpiSize, static_cast<vpiHandle>(handle));
-          writer_.Write(kSuccess);
-          writer_.Write(value);
-          break;
-        }
         case kGetVec: {
           CHECK(reader_.Read(&handle));
           vpiHandle h = static_cast<vpiHandle>(handle);
@@ -126,17 +140,19 @@ class IPCClient {
           CHECK(reader_.Read(&vec_buf_));
           CHECK(handle != clock_) << "Cannot write to clock";
           vpiHandle h = static_cast<vpiHandle>(handle);
-          size_t nwords = vec_buf_.size();
-          svec_buf_.resize(nwords);
-          reader_.Read(&vec_buf_[0], nwords * sizeof(s_vpi_vecval));
+          svec_buf_.resize(vec_buf_.size());
           for (size_t i = 0; i < vec_buf_.size(); ++i) {
             svec_buf_[i].aval = vec_buf_[i].aval;
             svec_buf_[i].bval = vec_buf_[i].bval;
           }
           s_vpi_value  value_s;
+          s_vpi_time time_s;
+          time_s.type = vpiSimTime;
+          time_s.high = 0;
+          time_s.low  = 0;
           value_s.format = vpiVectorVal;
           value_s.value.vector = &svec_buf_[0];
-          vpi_put_value(h, &value_s, 0, vpiNoDelay);
+          vpi_put_value(h, &value_s, &time_s, vpiInertialDelay);
           writer_.Write(kSuccess);
           break;
         }
@@ -183,9 +199,13 @@ class IPCClient {
   // Put integer into handle.
   static void PutInt(vpiHandle h, int value) {
     s_vpi_value  value_s;
+    s_vpi_time time_s;
+    time_s.type = vpiSimTime;
+    time_s.high = 0;
+    time_s.low  = 0;
     value_s.format = vpiIntVal;
     value_s.value.integer = value;
-    vpi_put_value(h, &value_s, 0, vpiNoDelay);
+    vpi_put_value(h, &value_s, &time_s, vpiInertialDelay);
   }
   // Handles
   vpiHandle clock_;
diff --git a/verilog/tvm_vpi.h b/verilog/tvm_vpi.h
index 5696438f371d..3925e8aedda4 100644
--- a/verilog/tvm_vpi.h
+++ b/verilog/tvm_vpi.h
@@ -12,10 +12,10 @@ namespace vpi {
 enum VPICallCode : int {
   kGetHandleByName,
   kGetHandleByIndex,
-  kGetName,
+  kGetStrProp,
+  kGetIntProp,
   kGetInt32,
   kPutInt32,
-  kGetSize,
   kGetVec,
   kPutVec,
   kYield,
@@ -28,6 +28,19 @@ enum VPIReturnCode : int {
   kFail = 2
 };
 
+// VPI type code as in IEEE standard.
+enum VPITypeCode {
+  kVPIModule = 32
+};
+
+// VPI property code as in IEEE standard.
+enum VPIPropCode {
+  kVPIType = 1,
+  kVPIFullName = 3,
+  kVPISize = 4,
+  kVPIDefName = 9
+};
+
 /*! \brief The vector value used in trasmission */
 struct VPIVecVal {
   int aval;
diff --git a/verilog/tvm_vpi_ram.v b/verilog/tvm_vpi_ram.v
new file mode 100644
index 000000000000..5733ed10e9cc
--- /dev/null
+++ b/verilog/tvm_vpi_ram.v
@@ -0,0 +1,49 @@
+// Module to access TVM VPI simulated RAM.
+//
+// You only see the wires and registers but no logics here.
+// The real computation is implemented via TVM VPI
+//
+// Usage: create and pass instance to additional arguments of $tvm_session.
+// Then  it will be automatically hook up the RAM logic.
+//
+module tvm_vpi_ram
+  # ( parameter READ_WIDTH = 8,
+      parameter WRITE_WIDTH = 8
+      )
+   ( clk,
+     rst,
+     in_read_dequeue,
+     in_write_enable,
+     in_write_data,
+     ctrl_read_req,
+     ctrl_read_addr,
+     ctrl_read_size,
+     ctrl_write_req,
+     ctrl_write_addr,
+     ctrl_write_size,
+     out_read_data,
+     out_read_valid,
+     out_write_full
+     );
+   input clk;
+   input rst;
+   input in_read_dequeue;
+   input in_write_enable;
+   input [WRITE_WIDTH-1:0] in_write_data;
+   input                   ctrl_read_req;
+   input [31:0]            ctrl_read_addr;
+   input [31:0]            ctrl_read_size;
+   input                   ctrl_write_req;
+   input [31:0]            ctrl_write_addr;
+   input [31:0]            ctrl_write_size;
+   output [READ_WIDTH-1:0] out_read_data;
+   output                  out_read_valid;
+   output                  out_write_full;
+   reg [READ_WIDTH-1:0]    out_reg_read_data;
+   reg                     out_reg_read_valid;
+   reg                     out_reg_write_full;
+   // The wires up.
+   assign out_read_data = out_reg_read_data;
+   assign out_read_valid = out_reg_read_valid;
+   assign out_write_full = out_reg_write_full;
+endmodule