-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Add Hexagon VTCM and discontiguous allocation support #9525
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9bb3da5
7122d2b
0d68507
387d358
43176a6
fba53c5
2cea7b5
643e9ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,83 +23,151 @@ | |
|
|
||
| #include <tvm/runtime/module.h> | ||
|
|
||
| #include "hexagon_common.h" | ||
|
|
||
| #if defined(__hexagon__) | ||
| #include "HAP_compute_res.h" | ||
| #endif | ||
|
|
||
| #include <string> | ||
| #include <utility> | ||
|
|
||
| #include "hexagon_common.h" | ||
|
|
||
| namespace tvm { | ||
| namespace runtime { | ||
| namespace hexagon { | ||
|
|
||
| static size_t GetDataAlignment(const DLDataType dtype) { | ||
| size_t align = (dtype.bits / 8) * dtype.lanes; | ||
| if (align < kAllocAlignment) return kAllocAlignment; | ||
| return align; | ||
| } | ||
| struct Allocation { | ||
| Allocation(size_t nbytes, size_t alignment) : nbytes_(nbytes), alignment_(alignment) {} | ||
| virtual ~Allocation() {} | ||
| Allocation(const Allocation&) = delete; | ||
| Allocation& operator=(const Allocation&) = delete; | ||
| Allocation(Allocation&&) = delete; | ||
| Allocation& operator=(Allocation&&) = delete; | ||
|
|
||
| HexagonBuffer::HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype, | ||
| Optional<String> scope) { | ||
| // TODO(csullivan): Re-enable check on ndim <= 2 when physical layout support | ||
| // in MakePackedAPI is added. | ||
| // ICHECK_LE(ndim, 1) << "Hexagon currently only supports flat allocations " | ||
| // << "and arrays of flat allocations."; | ||
|
|
||
| DLTensor t; | ||
| t.shape = const_cast<int64_t*>(shape); | ||
| t.ndim = ndim; | ||
| t.dtype = dtype; | ||
| size_t nbytes = GetDataSize(t); | ||
| size_t alignment = GetDataAlignment(dtype); | ||
| // TODO(csullivan): Extend to support arrays of allocations. | ||
| // Move assignment from r-value constructed flat allocation. | ||
| *this = HexagonBuffer(nbytes, alignment, scope); | ||
| } | ||
| void* data_{nullptr}; | ||
| size_t nbytes_; | ||
| size_t alignment_; | ||
| }; | ||
|
|
||
| HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) { | ||
| void* ptr = nullptr; | ||
| int ret = posix_memalign(&ptr, alignment, nbytes); | ||
| if (ret != 0) { | ||
| throw std::bad_alloc(); | ||
| struct DDRAllocation : public Allocation { | ||
| DDRAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) { | ||
| #ifdef _WIN32 | ||
| data_ = _aligned_malloc(nbytes, alignment); | ||
| CHECK(data_ != nullptr); | ||
| #else | ||
| int ret = posix_memalign(&data_, alignment, nbytes); | ||
| CHECK_EQ(ret, 0); | ||
| #endif | ||
| } | ||
| allocations_.push_back(ptr); | ||
| SetStorageScope(scope); | ||
| ~DDRAllocation() { | ||
| #ifdef _WIN32 | ||
| _aligned_free(data_); | ||
| #else | ||
| free(data_); | ||
| #endif | ||
| } | ||
| }; | ||
|
|
||
| #if defined(__hexagon__) | ||
| struct VTCMAllocation : public Allocation { | ||
| VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) { | ||
| compute_res_attr_t res_info; | ||
| HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info)); | ||
|
|
||
| // allocate nbytes of vtcm on a single page | ||
| HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes, | ||
| /*b_single_page = */ 1)); | ||
| context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 10000); | ||
|
|
||
| if (context_id_) { | ||
| data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info); | ||
| if (!data_) { | ||
| HEXAGON_PRINT(ERROR, "ERROR: Allocated VTCM ptr is null."); | ||
| HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_)); | ||
| return; | ||
| } | ||
| } else { | ||
| HEXAGON_PRINT(ERROR, "ERROR: Unable to acquire requeisted resource."); | ||
| return; | ||
| } | ||
| // HEXAGON_PRINT(ALWAYS, "VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, data_); | ||
| } | ||
| ~VTCMAllocation() { | ||
| // HEXAGON_PRINT(ALWAYS, "~VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, | ||
| // data_); | ||
| HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_)); | ||
| data_ = nullptr; | ||
| } | ||
| unsigned int context_id_{0}; | ||
| }; | ||
| #else | ||
| struct VTCMAllocation : public DDRAllocation { | ||
| VTCMAllocation(size_t nbytes, size_t alignment) : DDRAllocation(nbytes, alignment) {} | ||
| }; | ||
| #endif | ||
|
|
||
| template <HexagonBuffer::StorageScope S> | ||
| std::unique_ptr<Allocation> Allocator(size_t nbytes, size_t alignment); | ||
|
|
||
| template <> | ||
| std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kDDR>(size_t nbytes, | ||
| size_t alignment) { | ||
| return std::make_unique<DDRAllocation>(nbytes, alignment); | ||
| } | ||
|
|
||
| HexagonBuffer::HexagonBuffer(void* data, Optional<String> scope) : managed_{false} { | ||
| template <> | ||
| std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kVTCM>(size_t nbytes, | ||
| size_t alignment) { | ||
| return std::make_unique<VTCMAllocation>(nbytes, alignment); | ||
| } | ||
|
|
||
| HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) | ||
| : nallocs_(1), nbytes_(nbytes) { | ||
| SetStorageScope(scope); | ||
| allocations_.push_back(data); | ||
|
|
||
| std::unique_ptr<Allocation> alloca = nullptr; | ||
| if (GetStorageScope() == StorageScope::kDDR) { | ||
| alloca = Allocator<StorageScope::kDDR>(nbytes, alignment); | ||
| } else if (GetStorageScope() == StorageScope::kVTCM) { | ||
| alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment); | ||
| } | ||
| CHECK(alloca != nullptr); | ||
| allocations_.push_back(alloca->data_); | ||
| managed_allocations_.push_back(std::move(alloca)); | ||
| } | ||
|
|
||
| HexagonBuffer::~HexagonBuffer() { | ||
| if (managed_) { | ||
| for (auto& ptr : allocations_) { | ||
| free(ptr); | ||
| HexagonBuffer::HexagonBuffer(size_t nallocs, size_t nbytes, size_t alignment, | ||
| Optional<String> scope) | ||
| : nallocs_(nallocs), nbytes_(nallocs * nbytes) { | ||
| SetStorageScope(scope); | ||
| for (size_t i = 0; i < nallocs; ++i) { | ||
| std::unique_ptr<Allocation> alloca = nullptr; | ||
| if (GetStorageScope() == StorageScope::kDDR) { | ||
| alloca = Allocator<StorageScope::kDDR>(nbytes, alignment); | ||
| } else if (GetStorageScope() == StorageScope::kVTCM) { | ||
| alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment); | ||
| } | ||
| CHECK(alloca != nullptr); | ||
| allocations_.push_back(alloca->data_); | ||
| managed_allocations_.push_back(std::move(alloca)); | ||
| } | ||
| } | ||
|
|
||
| HexagonBuffer::HexagonBuffer(HexagonBuffer&& other) | ||
| : allocations_(other.allocations_), | ||
| managed_(other.managed_), | ||
| storage_scope_(other.storage_scope_) { | ||
| other.allocations_.clear(); | ||
| other.managed_ = false; | ||
| other.storage_scope_ = StorageScope::kDDR; | ||
| HexagonBuffer::HexagonBuffer(void* data, size_t nbytes, Optional<String> scope) | ||
| : nallocs_(1), nbytes_(nbytes) { | ||
| SetStorageScope(scope); | ||
| // disallow external VTCM allocations | ||
| CHECK(GetStorageScope() != HexagonBuffer::StorageScope::kVTCM); | ||
| allocations_.push_back(data); | ||
| } | ||
|
|
||
| HexagonBuffer& HexagonBuffer::operator=(HexagonBuffer&& other) { | ||
| std::swap(allocations_, other.allocations_); | ||
| std::swap(managed_, other.managed_); | ||
| std::swap(storage_scope_, other.storage_scope_); | ||
| return *this; | ||
| } | ||
| HexagonBuffer::~HexagonBuffer() { managed_allocations_.clear(); } | ||
|
|
||
| void* HexagonBuffer::GetPointer() { | ||
| void** HexagonBuffer::GetPointer() { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Returning a void ** here disambiguates between the cases where there is a single allocation vs. multiple. We always return a pointer to a pointer. This may be controversial.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did we decide to revert this change so that tests pass and revisit once codegen supports pointer array indexing?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I decided to keep |
||
| if (!allocations_.size()) { | ||
| return nullptr; | ||
| } | ||
| return (allocations_.size() > 1) ? allocations_.data() : allocations_[0]; | ||
| return allocations_.data(); | ||
| } | ||
|
|
||
| HexagonBuffer::StorageScope HexagonBuffer::GetStorageScope() const { return storage_scope_; } | ||
|
|
@@ -119,11 +187,70 @@ void HexagonBuffer::SetStorageScope(Optional<String> scope) { | |
| } | ||
| } | ||
|
|
||
| HexagonBuffer* IsHexagonBuffer(DLTensor* tensor) { | ||
| if (TVMDeviceExtType(tensor->device.device_type) == kDLHexagon) { | ||
| return static_cast<HexagonBuffer*>(tensor->data); | ||
| void HexagonBuffer::CopyTo(void* data, size_t nbytes) { | ||
| CHECK(nbytes_ == nbytes); | ||
| size_t offset = 0; | ||
| for (size_t i = 0; i < nallocs_; ++i) { | ||
| CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_); | ||
|
|
||
| memcpy(static_cast<char*>(data) + offset, | ||
| static_cast<const char*>(managed_allocations_[i]->data_), | ||
| managed_allocations_[i]->nbytes_); | ||
|
|
||
| offset += managed_allocations_[i]->nbytes_; | ||
| } | ||
| } | ||
|
|
||
| void HexagonBuffer::CopyFrom(void* data, size_t nbytes) { | ||
| CHECK(nbytes_ == nbytes); | ||
| size_t offset = 0; | ||
| for (size_t i = 0; i < nallocs_; ++i) { | ||
| CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_); | ||
|
|
||
| memcpy(static_cast<char*>(managed_allocations_[i]->data_), | ||
| static_cast<const char*>(data) + offset, managed_allocations_[i]->nbytes_); | ||
|
|
||
| offset += managed_allocations_[i]->nbytes_; | ||
| } | ||
| } | ||
|
|
||
| void HexagonBuffer::CopyFrom(const HexagonBuffer& other) { | ||
| CHECK(nbytes_ == other.nbytes_); | ||
|
|
||
| if (nallocs_ == other.nallocs_) { | ||
| for (size_t i = 0; i < nallocs_; ++i) { | ||
| CHECK(managed_allocations_[i]->nbytes_ == other.managed_allocations_[i]->nbytes_); | ||
|
|
||
| memcpy(static_cast<char*>(managed_allocations_[i]->data_), | ||
| static_cast<const char*>(other.managed_allocations_[i]->data_), | ||
| managed_allocations_[i]->nbytes_); | ||
| } | ||
| } else if (nallocs_ == 1) { | ||
| size_t offset = 0; | ||
| for (size_t i = 0; i < other.nallocs_; ++i) { | ||
| CHECK(nbytes_ / other.nallocs_ == other.managed_allocations_[i]->nbytes_); | ||
|
|
||
| memcpy(static_cast<char*>(managed_allocations_[0]->data_) + offset, | ||
| static_cast<const char*>(other.managed_allocations_[i]->data_), | ||
| other.managed_allocations_[i]->nbytes_); | ||
|
|
||
| offset += other.managed_allocations_[i]->nbytes_; | ||
| } | ||
| } else if (other.nallocs_ == 1) { | ||
| size_t offset = 0; | ||
| for (size_t i = 0; i < nallocs_; ++i) { | ||
| CHECK(other.nbytes_ / nallocs_ == managed_allocations_[i]->nbytes_); | ||
|
|
||
| memcpy(static_cast<char*>(managed_allocations_[i]->data_), | ||
| static_cast<const char*>(other.managed_allocations_[0]->data_) + offset, | ||
| managed_allocations_[i]->nbytes_); | ||
|
|
||
| offset += managed_allocations_[i]->nbytes_; | ||
| } | ||
| } else { | ||
| CHECK(false) << "To copy between Hexagon Buffers they must either have the same number of " | ||
| "dimensions or one of the Hexagon Buffers must have a single dimension."; | ||
| } | ||
| return nullptr; | ||
| } | ||
|
|
||
| } // namespace hexagon | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.