From 1f7314552530d976377a661434b48540673229fd Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 14 Mar 2017 23:12:36 -0700 Subject: [PATCH] [VERILOG] VPI RAM controler to access python memory. --- include/tvm/runtime/c_runtime_api.h | 7 + include/tvm/runtime/packed_func.h | 2 + python/tvm/__init__.py | 2 +- python/tvm/_ctypes/_ndarray.py | 13 +- python/tvm/addon/verilog.py | 14 +- python/tvm/ndarray.py | 2 +- src/codegen/verilog/vpi_device_api.cc | 319 ++++++++++++++++++ src/codegen/verilog/vpi_session.cc | 171 +++++----- src/codegen/verilog/vpi_session.h | 48 ++- src/pass/make_api.cc | 2 +- src/runtime/c_runtime_api.cc | 2 +- src/runtime/cpu_device_api.cc | 7 +- src/runtime/device_api.h | 1 + tests/python/unittest/test_runtime_ndarray.py | 5 +- tests/verilog/test_counter.py | 1 - tests/verilog/test_loop.py | 1 - tests/verilog/test_vpi_ram.py | 131 +++++++ tests/verilog/test_vpi_ram.v | 52 +++ verilog/tvm_vpi.cc | 58 ++-- verilog/tvm_vpi.h | 17 +- verilog/tvm_vpi_ram.v | 49 +++ 21 files changed, 785 insertions(+), 119 deletions(-) create mode 100644 src/codegen/verilog/vpi_device_api.cc create mode 100644 tests/verilog/test_vpi_ram.py create mode 100644 tests/verilog/test_vpi_ram.v create mode 100644 verilog/tvm_vpi_ram.v diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 9b10f16008c2..91f33d495cc8 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -38,6 +38,13 @@ TVM_EXTERN_C { /*! \brief type of array index. */ typedef int64_t tvm_index_t; + +/*! \brief Extension device types in TVM */ +typedef enum { + /*! \brief Simulated on board RAM */ + kVPI = 9 +} TVMDeviceExtType; + /*! * \brief The type code in TVMType * \note TVMType is used in two places. diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 592b418e5918..0d5064dcff00 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -274,6 +274,7 @@ class TVMArgValue : public TVMPODValue_ { return value_.v_type; } operator PackedFunc() const { + if (type_code_ == kNull) return PackedFunc(); TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle); return *ptr(); } @@ -350,6 +351,7 @@ class TVMRetValue : public TVMPODValue_ { return value_.v_type; } operator PackedFunc() const { + if (type_code_ == kNull) return PackedFunc(); TVM_CHECK_TYPE_CODE(type_code_, kFuncHandle); return *ptr(); } diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 4e294bdafdf2..51cb4a179436 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -15,7 +15,7 @@ from . import module from . import ndarray as nd -from .ndarray import cpu, gpu, opencl, cl +from .ndarray import cpu, gpu, opencl, cl, vpi from ._base import TVMError from .api import * diff --git a/python/tvm/_ctypes/_ndarray.py b/python/tvm/_ctypes/_ndarray.py index 05f300ef4062..72ba6402c150 100644 --- a/python/tvm/_ctypes/_ndarray.py +++ b/python/tvm/_ctypes/_ndarray.py @@ -18,7 +18,8 @@ class TVMContext(ctypes.Structure): MASK2STR = { 1 : 'cpu', 2 : 'gpu', - 4 : 'opencl' + 4 : 'opencl', + 9 : 'vpi' } def __init__(self, device_id, device_type): super(TVMContext, self).__init__() @@ -76,6 +77,16 @@ def opencl(dev_id=0): """ return TVMContext(dev_id, 4) +def vpi(dev_id=0): + """Construct a VPI simulated device + + Parameters + ---------- + dev_id : int, optional + The integer device id + """ + return TVMContext(dev_id, 9) + def numpyasarray(np_data): """Return a TVMArray representation of a numpy array. diff --git a/python/tvm/addon/verilog.py b/python/tvm/addon/verilog.py index 1321c966e2fd..9ec36fa880ae 100644 --- a/python/tvm/addon/verilog.py +++ b/python/tvm/addon/verilog.py @@ -17,6 +17,7 @@ def __init__(self, handle): super(VPISession, self).__init__(handle) self.proc = None self.execpath = None + self.yield_callbacks = [] def __del__(self): self.proc.kill() @@ -47,6 +48,8 @@ def __getattr__(self, name): def yield_until_posedge(self): """Yield until next posedge""" + for f in self.yield_callbacks: + f() return _api_internal._vpi_SessYield(self) def shutdown(self): @@ -222,7 +225,16 @@ def session(file_name): env['TVM_HREAD_PIPE'] = str(read_host) env['TVM_HWRITE_PIPE'] = str(write_host) - proc = subprocess.Popen(cmd, env=env, close_fds=False) + try: + # close_fds does not work well for all python3 + # Use pass_fds instead. + # pylint: disable=unexpected-keyword-arg + pass_fds = (read_device, write_device, read_host, write_host) + proc = subprocess.Popen(cmd, pass_fds=pass_fds, env=env) + except TypeError: + # This is effective for python2 + proc = subprocess.Popen(cmd, close_fds=False, env=env) + # close device side pipe os.close(read_device) os.close(write_device) diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index 1d56e60b6ce9..4a88968143a8 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -7,7 +7,7 @@ import numpy as _np from ._ctypes._ndarray import TVMContext, TVMType, NDArrayBase -from ._ctypes._ndarray import cpu, gpu, opencl, empty, sync +from ._ctypes._ndarray import cpu, gpu, opencl, vpi, empty, sync from ._ctypes._ndarray import _init_ndarray_module from ._ctypes._function import Function diff --git a/src/codegen/verilog/vpi_device_api.cc b/src/codegen/verilog/vpi_device_api.cc new file mode 100644 index 000000000000..6471f2e1d9eb --- /dev/null +++ b/src/codegen/verilog/vpi_device_api.cc @@ -0,0 +1,319 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file vpi_device.cc + * \brief Simulated VPI RAM device. + */ +#include +#include +#include +#include +#include +#include +#include "../../runtime/device_api.h" +#include "./vpi_session.h" + +namespace tvm { +namespace codegen { + +/*! \brief Simulated device ram */ +class VPIDeviceAPI : public runtime::DeviceAPI { + public: + VPIDeviceAPI() { + static const size_t kAllocAlign = 32U; + const char* s_ram_size = getenv("TVM_VPI_RAM_SIZE_MB"); + // 16 MB ram. + int ram_size = 32; + if (s_ram_size != nullptr) { + ram_size = atoi(s_ram_size); + } + ram_.resize(ram_size << 17); + ram_head_ = kAllocAlign; + ram_max_ = ram_.size() * sizeof(int64_t); + LOG(INFO) << "Initialize VPI simulated ram " << ram_size << "MB ..."; + } + // convert address to real address + void* RealAddr(const void* addr, size_t size) const { + int64_t ptr = reinterpret_cast(addr); + CHECK_LE(ptr + size, ram_max_) + << "VPI: Illegal memory access"; + return (char*)(&ram_[0]) + ptr; // NOLINT(*) + } + void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { + static const size_t kAllocAlign = 32U; + // always align to 32 bytes at least. + CHECK_LE(alignment, kAllocAlign); + alignment = kAllocAlign; + // always allocate block with aligned size. + size += alignment - (size % alignment); + // This is not thread safe, but fine for simulation. + auto it = free_blocks_.lower_bound(size); + if (it != free_blocks_.end()) { + size_t head = it->second; + free_blocks_.erase(it); + Block& b = block_map_.at(head); + CHECK(b.is_free); + b.is_free = false; + return reinterpret_cast(head); + } else { + CHECK_EQ(ram_head_ % kAllocAlign, 0U); + Block b; + b.size = size; + b.is_free = false; + CHECK_LE(ram_head_ + size, ram_max_) + << "VPI: Out of memory"; + block_map_[ram_head_] = b; + void* ret = reinterpret_cast(ram_head_); + ram_head_ += size; + return ret; + } + } + void FreeDataSpace(TVMContext ctx, void* ptr) final { + size_t head = reinterpret_cast(ptr); + Block& b = block_map_.at(head); + b.is_free = true; + free_blocks_.insert({b.size, head}); + } + void CopyDataFromTo(const void* from, + void* to, + size_t size, + TVMContext ctx_from, + TVMContext ctx_to, + TVMStreamHandle stream) final { + if (static_cast(ctx_from.device_type) == kVPI) { + from = RealAddr(from, size); + } + if (static_cast(ctx_to.device_type) == kVPI) { + to = RealAddr(to, size); + } + memcpy(to, from, size); + } + void StreamSync(TVMContext ctx, TVMStreamHandle stream) final { + } + static VPIDeviceAPI* Global() { + static VPIDeviceAPI inst; + return &inst; + } + + private: + // allocator block for reuse + struct Block { + // The size of the block + size_t size; + // Whether this is already freed. + bool is_free{true}; + }; + // head -> blocks + std::unordered_map block_map_; + // size -> free heads. + std::multimap free_blocks_; + // top of the ram + size_t ram_head_, ram_max_; + // The ram space. + std::vector ram_; +}; + +/*! + * \brief Memory controler for VPI memory. + */ +class VPIMemoryController { + public: + // Initialize the FSM. + void Init(VPIHandle module) { + device_ = VPIDeviceAPI::Global(); + // intiatialize the connections + in_clk_ = module["clk"]; + in_rst_ = module["rst"]; + in_read_dequeue_ = module["in_read_dequeue"]; + in_write_enable_ = module["in_write_enable"]; + in_write_data_ = module["in_write_data"]; + // output registers + out_reg_read_data_ = module["out_reg_read_data"]; + out_reg_read_valid_ = module["out_reg_read_valid"]; + out_reg_write_full_ = module["out_reg_write_full"]; + // memory control signal + ctrl_read_req_ = module["ctrl_read_req"]; + ctrl_read_addr_ = module["ctrl_read_addr"]; + ctrl_read_size_ = module["ctrl_read_size"]; + ctrl_write_req_ = module["ctrl_write_req"]; + ctrl_write_addr_ = module["ctrl_write_addr"]; + ctrl_write_size_ = module["ctrl_write_size"]; + // The bit and bytes; + size_t read_bits = out_reg_read_data_.size(); + size_t write_bits = in_write_data_.size(); + CHECK_EQ(read_bits % 8U, 0) + << "Read/write unit have to be multiple of 8 bit(bytes)"; + CHECK_EQ(write_bits % 8U, 0) + << "Read/write unit have to be multiple of 8 bit(bytes)"; + read_unit_bytes_ = read_bits / 8U; + write_unit_bytes_ = write_bits / 8U; + } + // Callback at post-edge. + void AtPosEedge() { + // reset + if (in_rst_.get_int()) { + CHECK_EQ(pending_read_.size, 0U); + CHECK_EQ(pending_write_.size, 0U); + CHECK(read_tasks_.empty()); + CHECK(write_tasks_.empty()); + out_reg_write_full_.put_int(1); + out_reg_read_valid_.put_int(0); + return; + } + // read write tasks + if (in_read_dequeue_.get_int() || !out_reg_read_valid_.get_int()) { + ReadFromFIFO(); + } + // update write full + if (in_write_enable_.get_int()) { + CHECK(!out_reg_write_full_.get_int()); + WriteToFIFO(); + } + if (pending_write_.size || write_tasks_.size()) { + out_reg_write_full_.put_int(0); + } else { + out_reg_write_full_.put_int(1); + } + // Control tasks + if (ctrl_read_req_.get_int()) { + FIFOTask tsk; + tsk.addr = reinterpret_cast(ctrl_read_addr_.get_int()); + tsk.size = static_cast(ctrl_read_size_.get_int()); + read_tasks_.push(tsk); + } + // Control tasks + if (ctrl_write_req_.get_int()) { + FIFOTask tsk; + tsk.addr = reinterpret_cast(ctrl_write_addr_.get_int()); + tsk.size = static_cast(ctrl_write_size_.get_int()); + write_tasks_.push(tsk); + } + } + + private: + // The FIFO tasks + struct FIFOTask { + char* addr{nullptr}; + size_t size{0}; + }; + // handle dequeue event + void ReadFromFIFO() { + if (pending_read_.size == 0) { + if (!read_tasks_.empty()) { + pending_read_ = read_tasks_.front(); + read_tasks_.pop(); + // translate to real memory addr + pending_read_.addr = static_cast( + device_->RealAddr( + pending_read_.addr, pending_read_.size)); + } + } + if (pending_read_.size != 0) { + // The size to be read + size_t nread = std::min(pending_read_.size, read_unit_bytes_); + // Read from the data + size_t nwords = (read_unit_bytes_ + 3) / 4; + wbuf_.resize(nwords); + vbuf_.resize(nwords); + memcpy(&wbuf_[0], pending_read_.addr, nread); + for (size_t i = 0; i < nwords; ++i) { + vbuf_[i].aval = wbuf_[i]; + vbuf_[i].bval = 0; + } + out_reg_read_data_.put_vec(vbuf_); + // Update the pointer + pending_read_.size -= nread; + pending_read_.addr += nread; + // read into the vector + out_reg_read_valid_.put_int(1); + } else { + out_reg_read_valid_.put_int(0); + } + } + // handle write event + void WriteToFIFO() { + if (pending_write_.size == 0) { + if (!write_tasks_.empty()) { + pending_write_ = write_tasks_.front(); + write_tasks_.pop(); + // translate to real memory addr + pending_write_.addr = static_cast( + device_->RealAddr( + pending_write_.addr, pending_write_.size)); + } + } + if (pending_write_.size != 0) { + // write to the ram. + size_t nwrite = std::min(pending_write_.size, write_unit_bytes_); + size_t nwords = (write_unit_bytes_ + 3) / 4; + in_write_data_.get_vec(&vbuf_); + CHECK_EQ(vbuf_.size(), nwords); + wbuf_.resize(nwords); + for (size_t i = 0; i < nwords; ++i) { + wbuf_[i] = vbuf_[i].aval; + CHECK_EQ(vbuf_[i].bval, 0) + << "Write indetermined value to RAM"; + } + memcpy(pending_write_.addr, &wbuf_[0], nwrite); + // Update the pointer + pending_write_.size -= nwrite; + pending_write_.addr += nwrite; + } + } + // Device API + VPIDeviceAPI* device_{nullptr}; + // Input clock and reset + VPIHandle in_clk_; + VPIHandle in_rst_; + // Read FIFO signal + VPIHandle in_read_dequeue_; + // Write FIFO signal + VPIHandle in_write_enable_; + VPIHandle in_write_data_; + // Read memory controler signals + VPIHandle ctrl_read_req_; + VPIHandle ctrl_read_addr_; + VPIHandle ctrl_read_size_; + // Write memory controler signal signals + VPIHandle ctrl_write_req_; + VPIHandle ctrl_write_addr_; + VPIHandle ctrl_write_size_; + // Read FIFO outputs + VPIHandle out_reg_read_data_; + VPIHandle out_reg_read_valid_; + // Write FIFO outputs + VPIHandle out_reg_write_full_; + // Size of current pending read. + FIFOTask pending_read_; + FIFOTask pending_write_; + // The read/write task queues. + std::queue read_tasks_; + std::queue write_tasks_; + // Unit bytes for read/writing + size_t read_unit_bytes_; + size_t write_unit_bytes_; + // Temporal buffers. + std::vector wbuf_; + std::vector vbuf_; +}; + +TVM_REGISTER_GLOBAL(_device_api_vpi) +.set_body([](runtime::TVMArgs args, runtime::TVMRetValue* rv) { + runtime::DeviceAPI* ptr = VPIDeviceAPI::Global(); + *rv = static_cast(ptr); + }); + +TVM_REGISTER_GLOBAL(_vpi_module_tvm_vpi_ram) +.set_body([](runtime::TVMArgs args, runtime::TVMRetValue* rv) { + VPIHandle m = args[0]; + std::shared_ptr ctrl = + std::make_shared(); + ctrl->Init(m); + LOG(INFO) << "Hook up " << m.name() + << " to simulated memory controller..."; + PackedFunc pf([ctrl](const runtime::TVMArgs&, runtime::TVMRetValue*) { + ctrl->AtPosEedge(); + }); + *rv = pf; + }); +} // namespace codegen +} // namespace tvm diff --git a/src/codegen/verilog/vpi_session.cc b/src/codegen/verilog/vpi_session.cc index 14cc815debd1..7a3190b5beec 100644 --- a/src/codegen/verilog/vpi_session.cc +++ b/src/codegen/verilog/vpi_session.cc @@ -11,20 +11,19 @@ namespace codegen { using namespace vpi; -/*! \brief Container for session. */ -class VPISessionNode : public Node { +// helper class to get the node. +class VPISessionEntry { public: // Whether in control. bool in_control{false}; // Internal reader and writer. common::Pipe reader; common::Pipe writer; - // internal constructor - VPISessionNode(int h_pipe_read, int h_pipe_write) + VPISessionEntry(int h_pipe_read, int h_pipe_write) : reader(h_pipe_read), writer(h_pipe_write) { } - ~VPISessionNode() { + ~VPISessionEntry() { if (in_control) { VPIReturnCode cd; writer.Write(kShutDown); @@ -33,40 +32,11 @@ class VPISessionNode : public Node { reader.Close(); writer.Close(); } - // visit all attributes - void VisitAttrs(AttrVisitor* v) final { - } void ReadExpect(VPIReturnCode rcode) { VPIReturnCode code; CHECK(reader.Read(&code)); CHECK_EQ(code, rcode) << "Error in simulation"; } - - static constexpr const char* _type_key = "VPISession"; - TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node); -}; - -/*! \brief Container for handle */ -class VPIHandleNode : public Node { - public: - // The internal session. - VPISession sess; - // Internal handle - VPIRawHandle handle; - - void VisitAttrs(AttrVisitor* v) final { - v->Visit("sess", &sess); - } - static VPIHandle make(const VPISession& sess, VPIRawHandle handle) { - std::shared_ptr n = - std::make_shared(); - n->sess = sess; - n->handle = handle; - return VPIHandle(n); - } - - static constexpr const char* _type_key = "VPIHandle"; - TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node); }; // Inline implementations @@ -77,34 +47,99 @@ inline VPIHandleNode* VPIHandle::get() const { return static_cast(node_.get()); } -VPISession VPISession::make(int h_pipe_read, int h_pipe_write) { - std::shared_ptr n = std::make_shared( - h_pipe_read, h_pipe_write); - n->ReadExpect(kPosEdgeTrigger); - n->in_control = true; - return VPISession(n); +VPIHandle VPIHandleCreate( + const std::shared_ptr& sess, + VPIRawHandle handle) { + std::shared_ptr n = std::make_shared(); + n->sess = sess; + n->handle = handle; + return VPIHandle(n); } -VPIHandle VPISession::operator[](const std::string& name) const { - return GetByName(name, nullptr); -} - -VPIHandle VPISession::GetByName(const std::string& name, VPIRawHandle handle) const { - VPISessionNode* n = get(); +VPIHandle GetHandleByName( + const std::shared_ptr& sess, + const std::string& name, + VPIRawHandle handle, + bool allow_undefined) { + VPISessionEntry* n = sess.get(); CHECK(n->in_control); n->writer.Write(kGetHandleByName); n->writer.Write(name); n->writer.Write(handle); n->ReadExpect(kSuccess); CHECK(n->reader.Read(&handle)); - CHECK(handle != nullptr) - << "Cannot find handle with name=" << name; - return VPIHandleNode::make(*this, handle); + if (handle != nullptr) { + return VPIHandleCreate(sess, handle); + } else { + CHECK(allow_undefined) + << "Cannot find handle with name=" << name; + return VPIHandle(); + } +} + +std::string VPIGetStrProp(VPIHandleNode* h, int code) { + VPISessionEntry* n = h->sess.get(); + CHECK(n->in_control); + n->writer.Write(kGetStrProp); + n->writer.Write(code); + n->writer.Write(h->handle); + n->ReadExpect(kSuccess); + std::string str; + CHECK(n->reader.Read(&str)); + return str; +} + +int VPIGetIntProp(VPIHandleNode* h, int code) { + VPISessionEntry* n = h->sess.get(); + CHECK(n->in_control); + n->writer.Write(kGetIntProp); + n->writer.Write(code); + n->writer.Write(h->handle); + n->ReadExpect(kSuccess); + int value; + CHECK(n->reader.Read(&value)); + return value; +} + +VPISession VPISession::make(int h_pipe_read, int h_pipe_write) { + std::shared_ptr n = std::make_shared(); + n->sess = std::make_shared(h_pipe_read, h_pipe_write); + n->sess->in_control = true; + VPISession sess(n); + // The custom module handles + std::vector mod_handles; + n->sess->reader.Read(&mod_handles); + n->sess->ReadExpect(kPosEdgeTrigger); + // start Initialize the callbacks + for (VPIRawHandle raw_h : mod_handles) { + VPIHandle h = VPIHandleCreate(n->sess, raw_h); + CHECK_EQ(VPIGetIntProp(h.get(), kVPIType), kVPIModule) + << "Expect pass modules to $tvm_session after clk"; + std::string def = VPIGetStrProp(h.get(), kVPIDefName); + std::string callback_name = "_vpi_module_" + def; + const PackedFunc* f = runtime::Registry::Get(callback_name); + CHECK(f != nullptr) + << "Cannot find definition for tvm vpi module " << def; + PackedFunc cb = (*f)(h); + n->posedge_end_callbacks.push_back(cb); + } + return sess; +} + +VPIHandle VPISession::operator[](const std::string& name) const { + return GetHandleByName(get()->sess, name, nullptr, false); +} +VPIHandle VPISession::GetByName(const std::string& name, + bool allow_undefined) const { + return GetHandleByName(get()->sess, name, nullptr, true); } void VPISession::yield() { - VPISessionNode* n = get(); + VPISessionEntry* n = get()->sess.get(); CHECK(n->in_control); + for (const PackedFunc& f : get()->posedge_end_callbacks) { + f(); + } n->writer.Write(kYield); n->ReadExpect(kSuccess); n->in_control = false; @@ -113,7 +148,7 @@ void VPISession::yield() { } void VPISession::shutdown() { - VPISessionNode* n = get(); + VPISessionEntry* n = get()->sess.get(); if (n->in_control) { n->writer.Write(kShutDown); n->ReadExpect(kSuccess); @@ -122,20 +157,12 @@ void VPISession::shutdown() { } int VPIHandle::size() const { - VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); - CHECK(n->in_control); - n->writer.Write(kGetSize); - n->writer.Write(h->handle); - n->ReadExpect(kSuccess); - int value; - CHECK(n->reader.Read(&value)); - return value; + return VPIGetIntProp(get(), kVPISize); } void VPIHandle::put_int(int value) { VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); + VPISessionEntry* n = h->sess.get(); CHECK(n->in_control); n->writer.Write(kPutInt32); n->writer.Write(h->handle); @@ -145,7 +172,7 @@ void VPIHandle::put_int(int value) { int VPIHandle::get_int() const { VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); + VPISessionEntry* n = h->sess.get(); CHECK(n->in_control); n->writer.Write(kGetInt32); n->writer.Write(h->handle); @@ -156,20 +183,12 @@ int VPIHandle::get_int() const { } std::string VPIHandle::name() const { - VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); - CHECK(n->in_control); - n->writer.Write(kGetName); - n->writer.Write(h->handle); - n->ReadExpect(kSuccess); - std::string str; - CHECK(n->reader.Read(&str)); - return str; + return VPIGetStrProp(get(), kVPIFullName); } void VPIHandle::put_vec(const std::vector& vec) const { VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); + VPISessionEntry* n = h->sess.get(); CHECK(n->in_control); n->writer.Write(kPutVec); n->writer.Write(h->handle); @@ -179,17 +198,17 @@ void VPIHandle::put_vec(const std::vector& vec) const { void VPIHandle::get_vec(std::vector* vec) const { VPIHandleNode* h = get(); - VPISessionNode* n = h->sess.get(); + VPISessionEntry* n = h->sess.get(); CHECK(n->in_control); - n->writer.Write(kPutVec); + n->writer.Write(kGetVec); n->writer.Write(h->handle); n->ReadExpect(kSuccess); - CHECK(n->reader.Read(&vec)); + CHECK(n->reader.Read(vec)); } VPIHandle VPIHandle::operator[](const std::string& name) const { VPIHandleNode* h = get(); - return h->sess.GetByName(name, h->handle); + return GetHandleByName(h->sess, name, h->handle, false); } // API registration diff --git a/src/codegen/verilog/vpi_session.h b/src/codegen/verilog/vpi_session.h index bc648adf7cba..88a7f2f1906e 100644 --- a/src/codegen/verilog/vpi_session.h +++ b/src/codegen/verilog/vpi_session.h @@ -14,10 +14,14 @@ namespace tvm { namespace codegen { + // node containers class VPISessionNode; class VPIHandleNode; class VPIHandle; +class VPISessionEntry; + +using runtime::PackedFunc; /*! \brief Environment */ class VPISession : public NodeRef { @@ -29,6 +33,12 @@ class VPISession : public NodeRef { * \param name The name of the handle. */ VPIHandle operator[](const std::string& name) const; + /*! + * \brief Get handle by name. + * \param name The name of the handle. + * \param allow_undefined whether allow undefined + */ + VPIHandle GetByName(const std::string& name, bool allow_undefined) const; /*! * \brief Yield control back to the simulator * Block until next cycle. @@ -46,12 +56,7 @@ class VPISession : public NodeRef { static VPISession make(int h_pipe_read, int h_pipe_write); // Internal methods. using ContainerType = VPISessionNode; - - private: - friend class VPIHandle; inline VPISessionNode* get() const; - // Get handle by name - VPIHandle GetByName(const std::string& name, vpi::VPIRawHandle handle) const; }; /*! \brief VPI Handle */ @@ -91,10 +96,39 @@ class VPIHandle : public NodeRef { void get_vec(std::vector* vec) const; // Internal methods using ContainerType = VPIHandleNode; - - private: inline VPIHandleNode* get() const; }; + +/*! \brief Container for session. */ +class VPISessionNode : public Node { + public: + // internal session. + std::shared_ptr sess; + // callbacks at pos edge end. + std::vector posedge_end_callbacks; + + // visit all attributes + void VisitAttrs(AttrVisitor* v) final { + } + static constexpr const char* _type_key = "VPISession"; + TVM_DECLARE_NODE_TYPE_INFO(VPISessionNode, Node); +}; + +/*! \brief Container for handle */ +class VPIHandleNode : public Node { + public: + // internal session. + std::shared_ptr sess; + // Internal handle + vpi::VPIRawHandle handle; + + void VisitAttrs(AttrVisitor* v) final { + } + + static constexpr const char* _type_key = "VPIHandle"; + TVM_DECLARE_NODE_TYPE_INFO(VPIHandleNode, Node); +}; + } // namespace codegen } // namespace tvm #endif // TVM_CODEGEN_VERILOG_VPI_SESSION_H_ diff --git a/src/pass/make_api.cc b/src/pass/make_api.cc index ead3d38d8aec..33c2a93615fa 100644 --- a/src/pass/make_api.cc +++ b/src/pass/make_api.cc @@ -106,7 +106,7 @@ LoweredFunc MakeAPI(Stmt body, for (int i = 0; i < static_cast(api_args.size()); ++i) { Var v_arg = f_arg_decl(i); - if (i < static_cast(num_packed_args)) { + if (i < num_packed_args) { seq_init.emplace_back(LetStmt::make( v_arg, f_arg_value(v_arg.type(), i), nop)); } else { diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 63a324027294..c0b876f772ff 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -23,7 +23,7 @@ namespace runtime { class DeviceAPIManager { public: - static const int kMaxDeviceAPI = 16; + static const int kMaxDeviceAPI = 32; // Get API static DeviceAPI* Get(TVMContext ctx) { return Global()->GetAPI(ctx.device_type); diff --git a/src/runtime/cpu_device_api.cc b/src/runtime/cpu_device_api.cc index 3204c4e27f5e..7cb29bd2e99f 100644 --- a/src/runtime/cpu_device_api.cc +++ b/src/runtime/cpu_device_api.cc @@ -1,11 +1,7 @@ /*! * Copyright (c) 2016 by Contributors - * \file device_api_gpu.h - * \brief GPU specific API + * \file cpu_device_api.cc */ -#ifndef TVM_RUNTIME_DEVICE_API_CPU_H_ -#define TVM_RUNTIME_DEVICE_API_CPU_H_ - #include #include #include @@ -58,4 +54,3 @@ TVM_REGISTER_GLOBAL(_device_api_cpu) }); } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_DEVICE_API_CPU_H_ diff --git a/src/runtime/device_api.h b/src/runtime/device_api.h index 9d626075d484..699655b0c4b3 100644 --- a/src/runtime/device_api.h +++ b/src/runtime/device_api.h @@ -64,6 +64,7 @@ inline std::string DeviceName(DLDeviceType type) { case kCPU: return "cpu"; case kGPU: return "gpu"; case kOpenCL: return "opencl"; + case kVPI: return "vpi"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } } diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index 1c3e5f70d178..0dc6e3f1f93d 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -5,7 +5,10 @@ def enabled_ctx_list(): if tvm.module.enabled("opencl"): tvm.module.init_opencl() - ctx_list = [('cpu', tvm.cpu(0)), ('gpu', tvm.gpu(0)), ('cl', tvm.opencl(0))] + ctx_list = [('cpu', tvm.cpu(0)), + ('gpu', tvm.gpu(0)), + ('cl', tvm.opencl(0)), + ('cpu', tvm.vpi(0))] ctx_list = [x[1] for x in ctx_list if tvm.module.enabled(x[0])] return ctx_list diff --git a/tests/verilog/test_counter.py b/tests/verilog/test_counter.py index 795b2d691a24..340e27afcf26 100644 --- a/tests/verilog/test_counter.py +++ b/tests/verilog/test_counter.py @@ -1,5 +1,4 @@ import tvm -import os from tvm.addon import verilog def test_counter(): diff --git a/tests/verilog/test_loop.py b/tests/verilog/test_loop.py index 2ba19f771cc4..65a7ffaa1651 100644 --- a/tests/verilog/test_loop.py +++ b/tests/verilog/test_loop.py @@ -1,5 +1,4 @@ import tvm -import os from tvm.addon import verilog def test_loop(): diff --git a/tests/verilog/test_vpi_ram.py b/tests/verilog/test_vpi_ram.py new file mode 100644 index 000000000000..43160ef315f4 --- /dev/null +++ b/tests/verilog/test_vpi_ram.py @@ -0,0 +1,131 @@ +import tvm +import numpy as np +from tvm.addon import verilog + +class FIFOReader(object): + """Auxiliary class to read from FIFO """ + def __init__(self, read_data, read_valid): + self.read_data = read_data + self.read_valid = read_valid + self.data = [] + + def __call__(self): + if self.read_valid.get_int(): + self.data.append(self.read_data.get_int()) + +class FIFOWriter(object): + """Auxiliary class to write to FIFO """ + def __init__(self, write_data, write_enable, write_full, data): + self.write_data = write_data + self.write_enable = write_enable + self.write_full = write_full + self.data = data + + def __call__(self): + if self.data and not self.write_full.get_int(): + self.write_enable.put_int(1) + self.write_data.put_int(int(self.data[0])) + del self.data[0] + else: + self.write_enable.put_int(0) + + +def test_ram_read(): + n = 10 + # context for VPI RAM + ctx = tvm.vpi(0) + a_np = np.arange(n).astype('int8') + a = tvm.nd.array(a_np, ctx) + + # head ptr of a + a_ptr = int(a.handle[0].data) + sess = verilog.session([ + verilog.find_file("test_vpi_ram.v"), + verilog.find_file("tvm_vpi_ram.v") + ]) + rst = sess.main.rst + read_data = sess.main.read_data + read_valid = sess.main.read_valid + read_dequeue = sess.main.read_dequeue + ctrl_read_req = sess.main.ctrl_read_req + ctrl_read_addr = sess.main.ctrl_read_addr + ctrl_read_size = sess.main.ctrl_read_size + rst.put_int(1) + sess.yield_until_posedge() + rst.put_int(0) + # hook up reader + reader = FIFOReader(read_data, read_valid) + sess.yield_callbacks.append(reader) + # request read + ctrl_read_req.put_int(1) + ctrl_read_addr.put_int(a_ptr) + ctrl_read_size.put_int(a.shape[0]) + + sess.yield_until_posedge() + # second read request + ctrl_read_addr.put_int(a_ptr + 2) + ctrl_read_size.put_int(a.shape[0] - 2) + + sess.yield_until_posedge() + ctrl_read_req.put_int(0) + read_dequeue.put_int(1) + + # yield until read is done + for i in range(a.shape[0] * 2): + sess.yield_until_posedge() + # check if result matches + r = np.concatenate((a_np, a_np[2:])) + np.testing.assert_equal(np.array(reader.data), r) + + +def test_ram_write(): + n = 10 + # read from offset + offset = 2 + # context for VPI RAM + ctx = tvm.vpi(0) + a_np = np.zeros(n).astype('int8') + a = tvm.nd.array(a_np, ctx) + w_data = range(2, n) + r_data = np.array(w_data, dtype='int8') + + # head ptr of a + a_ptr = int(a.handle[0].data) + + sess = verilog.session([ + verilog.find_file("test_vpi_ram.v"), + verilog.find_file("tvm_vpi_ram.v") + ]) + rst = sess.main.rst + write_data = sess.main.write_data + write_enable = sess.main.write_enable + write_full = sess.main.write_full + ctrl_write_req = sess.main.ctrl_write_req + ctrl_write_addr = sess.main.ctrl_write_addr + ctrl_write_size = sess.main.ctrl_write_size + + rst.put_int(1) + sess.yield_until_posedge() + rst.put_int(0) + # hook up writeer + writer = FIFOWriter(write_data, write_enable, write_full, w_data) + + sess.yield_callbacks.append(writer) + # request write + ctrl_write_req.put_int(1) + ctrl_write_addr.put_int(a_ptr + offset) + ctrl_write_size.put_int(a.shape[0] - offset) + + sess.yield_until_posedge() + ctrl_write_req.put_int(0) + + # yield until write is done + for i in range(a.shape[0]+2): + sess.yield_until_posedge() + + # check if result matches + np.testing.assert_equal(a.asnumpy()[2:],r_data) + + +if __name__ == "__main__": + test_ram_write() diff --git a/tests/verilog/test_vpi_ram.v b/tests/verilog/test_vpi_ram.v new file mode 100644 index 000000000000..8398d3dabd36 --- /dev/null +++ b/tests/verilog/test_vpi_ram.v @@ -0,0 +1,52 @@ +`include "tvm_marcos.v" + +module main(); + parameter PER = 10; + parameter WIDTH = 8; + reg clk; + reg rst; + reg read_dequeue; + reg write_enable; + reg [WIDTH-1:0] write_data; + reg ctrl_read_req; + reg [31:0] ctrl_read_addr; + reg [31:0] ctrl_read_size; + reg ctrl_write_req; + reg [31:0] ctrl_write_addr; + reg [31:0] ctrl_write_size; + wire [WIDTH-1:0] read_data; + wire read_valid; + wire write_full; + + + always begin + #(PER/2) clk =~ clk; + end + + tvm_vpi_ram # + ( + .READ_WIDTH(WIDTH), + .WRITE_WIDTH(WIDTH)) + myram + ( + .clk(clk), + .rst(rst), + .in_read_dequeue(read_dequeue), + .in_write_enable(write_enable), + .in_write_data(write_data), + .ctrl_read_req(ctrl_read_req), + .ctrl_read_addr(ctrl_read_addr), + .ctrl_read_size(ctrl_read_size), + .ctrl_write_req(ctrl_write_req), + .ctrl_write_addr(ctrl_write_addr), + .ctrl_write_size(ctrl_write_size), + .out_read_data(read_data), + .out_read_valid(read_valid), + .out_write_full(write_full) + ); + + initial begin + // pass myram to session to hook it up with simulation + $tvm_session(clk, myram); + end +endmodule diff --git a/verilog/tvm_vpi.cc b/verilog/tvm_vpi.cc index d1a41e0bdd2d..3c654b095fb3 100644 --- a/verilog/tvm_vpi.cc +++ b/verilog/tvm_vpi.cc @@ -13,8 +13,17 @@ namespace tvm { namespace vpi { +// standard consistency checks static_assert(sizeof(vpiHandle) == sizeof(VPIRawHandle), - "VPI handle condition"); + "VPI standard"); +// type codes +static_assert(vpiModule == kVPIModule, "VPI standard"); +// Property code +static_assert(vpiType == kVPIType, "VPI standard"); +static_assert(vpiFullName == kVPIFullName, "VPI standard"); +static_assert(vpiSize == kVPISize, "VPI standard"); +static_assert(vpiDefName == kVPIDefName, "VPI standard"); + // IPC client for VPI class IPCClient { public: @@ -26,8 +35,11 @@ class IPCClient { vpiHandle argv = vpi_handle(vpiSysTfCall, 0); vpiHandle arg_iter = vpi_iterate(vpiArgument, argv); clock_ = vpi_scan(arg_iter); - CHECK(vpi_scan(arg_iter) == nullptr) - << "tvm_session can only take in one clock"; + std::vector handles; + while (vpiHandle h = vpi_scan(arg_iter)) { + handles.push_back(h); + } + writer_.Write(handles); PutInt(clock_, 0); } int Callback() { @@ -74,12 +86,21 @@ class IPCClient { writer_.Write(handle); break; } - case kGetName: { + case kGetStrProp: { + CHECK(reader_.Read(&value)); + CHECK(reader_.Read(&handle)); + std::string prop = vpi_get_str( + value, static_cast(handle)); + writer_.Write(kSuccess); + writer_.Write(prop); + break; + } + case kGetIntProp: { + CHECK(reader_.Read(&value)); CHECK(reader_.Read(&handle)); - std::string name = vpi_get_str( - vpiFullName, static_cast(handle)); + value = vpi_get(value, static_cast(handle)); writer_.Write(kSuccess); - writer_.Write(name); + writer_.Write(value); break; } case kGetInt32: { @@ -97,13 +118,6 @@ class IPCClient { writer_.Write(kSuccess); break; } - case kGetSize: { - CHECK(reader_.Read(&handle)); - value = vpi_get(vpiSize, static_cast(handle)); - writer_.Write(kSuccess); - writer_.Write(value); - break; - } case kGetVec: { CHECK(reader_.Read(&handle)); vpiHandle h = static_cast(handle); @@ -126,17 +140,19 @@ class IPCClient { CHECK(reader_.Read(&vec_buf_)); CHECK(handle != clock_) << "Cannot write to clock"; vpiHandle h = static_cast(handle); - size_t nwords = vec_buf_.size(); - svec_buf_.resize(nwords); - reader_.Read(&vec_buf_[0], nwords * sizeof(s_vpi_vecval)); + svec_buf_.resize(vec_buf_.size()); for (size_t i = 0; i < vec_buf_.size(); ++i) { svec_buf_[i].aval = vec_buf_[i].aval; svec_buf_[i].bval = vec_buf_[i].bval; } s_vpi_value value_s; + s_vpi_time time_s; + time_s.type = vpiSimTime; + time_s.high = 0; + time_s.low = 0; value_s.format = vpiVectorVal; value_s.value.vector = &svec_buf_[0]; - vpi_put_value(h, &value_s, 0, vpiNoDelay); + vpi_put_value(h, &value_s, &time_s, vpiInertialDelay); writer_.Write(kSuccess); break; } @@ -183,9 +199,13 @@ class IPCClient { // Put integer into handle. static void PutInt(vpiHandle h, int value) { s_vpi_value value_s; + s_vpi_time time_s; + time_s.type = vpiSimTime; + time_s.high = 0; + time_s.low = 0; value_s.format = vpiIntVal; value_s.value.integer = value; - vpi_put_value(h, &value_s, 0, vpiNoDelay); + vpi_put_value(h, &value_s, &time_s, vpiInertialDelay); } // Handles vpiHandle clock_; diff --git a/verilog/tvm_vpi.h b/verilog/tvm_vpi.h index 5696438f371d..3925e8aedda4 100644 --- a/verilog/tvm_vpi.h +++ b/verilog/tvm_vpi.h @@ -12,10 +12,10 @@ namespace vpi { enum VPICallCode : int { kGetHandleByName, kGetHandleByIndex, - kGetName, + kGetStrProp, + kGetIntProp, kGetInt32, kPutInt32, - kGetSize, kGetVec, kPutVec, kYield, @@ -28,6 +28,19 @@ enum VPIReturnCode : int { kFail = 2 }; +// VPI type code as in IEEE standard. +enum VPITypeCode { + kVPIModule = 32 +}; + +// VPI property code as in IEEE standard. +enum VPIPropCode { + kVPIType = 1, + kVPIFullName = 3, + kVPISize = 4, + kVPIDefName = 9 +}; + /*! \brief The vector value used in trasmission */ struct VPIVecVal { int aval; diff --git a/verilog/tvm_vpi_ram.v b/verilog/tvm_vpi_ram.v new file mode 100644 index 000000000000..5733ed10e9cc --- /dev/null +++ b/verilog/tvm_vpi_ram.v @@ -0,0 +1,49 @@ +// Module to access TVM VPI simulated RAM. +// +// You only see the wires and registers but no logics here. +// The real computation is implemented via TVM VPI +// +// Usage: create and pass instance to additional arguments of $tvm_session. +// Then it will be automatically hook up the RAM logic. +// +module tvm_vpi_ram + # ( parameter READ_WIDTH = 8, + parameter WRITE_WIDTH = 8 + ) + ( clk, + rst, + in_read_dequeue, + in_write_enable, + in_write_data, + ctrl_read_req, + ctrl_read_addr, + ctrl_read_size, + ctrl_write_req, + ctrl_write_addr, + ctrl_write_size, + out_read_data, + out_read_valid, + out_write_full + ); + input clk; + input rst; + input in_read_dequeue; + input in_write_enable; + input [WRITE_WIDTH-1:0] in_write_data; + input ctrl_read_req; + input [31:0] ctrl_read_addr; + input [31:0] ctrl_read_size; + input ctrl_write_req; + input [31:0] ctrl_write_addr; + input [31:0] ctrl_write_size; + output [READ_WIDTH-1:0] out_read_data; + output out_read_valid; + output out_write_full; + reg [READ_WIDTH-1:0] out_reg_read_data; + reg out_reg_read_valid; + reg out_reg_write_full; + // The wires up. + assign out_read_data = out_reg_read_data; + assign out_read_valid = out_reg_read_valid; + assign out_write_full = out_reg_write_full; +endmodule