Skip to content

Commit c573525

Browse files
adstrawcsullivan
authored andcommitted
Add Hexagon VTCM and discontiguous allocation support (apache#9525)
* WIP Allocation abstraction for VTCM and DDR. * Add Hexagon VTCM and discontiguous allocation support * differentiate between dimensions and allocations * remove change to llvm codegen * add integration test_add_vtcm to demo vtcm alloc * remove cmake change * forcing contiguous allocation in device API, for now Co-authored-by: Chris Sullivan <[email protected]>
1 parent 52a7c59 commit c573525

File tree

6 files changed

+352
-119
lines changed

6 files changed

+352
-119
lines changed

src/runtime/hexagon/hexagon/hexagon_buffer.cc

Lines changed: 183 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -23,83 +23,151 @@
2323

2424
#include <tvm/runtime/module.h>
2525

26+
#include "hexagon_common.h"
27+
28+
#if defined(__hexagon__)
29+
#include "HAP_compute_res.h"
30+
#endif
31+
2632
#include <string>
2733
#include <utility>
2834

29-
#include "hexagon_common.h"
30-
3135
namespace tvm {
3236
namespace runtime {
3337
namespace hexagon {
3438

35-
static size_t GetDataAlignment(const DLDataType dtype) {
36-
size_t align = (dtype.bits / 8) * dtype.lanes;
37-
if (align < kAllocAlignment) return kAllocAlignment;
38-
return align;
39-
}
39+
struct Allocation {
40+
Allocation(size_t nbytes, size_t alignment) : nbytes_(nbytes), alignment_(alignment) {}
41+
virtual ~Allocation() {}
42+
Allocation(const Allocation&) = delete;
43+
Allocation& operator=(const Allocation&) = delete;
44+
Allocation(Allocation&&) = delete;
45+
Allocation& operator=(Allocation&&) = delete;
4046

41-
HexagonBuffer::HexagonBuffer(int ndim, const int64_t* shape, DLDataType dtype,
42-
Optional<String> scope) {
43-
// TODO(csullivan): Re-enable check on ndim <= 2 when physical layout support
44-
// in MakePackedAPI is added.
45-
// ICHECK_LE(ndim, 1) << "Hexagon currently only supports flat allocations "
46-
// << "and arrays of flat allocations.";
47-
48-
DLTensor t;
49-
t.shape = const_cast<int64_t*>(shape);
50-
t.ndim = ndim;
51-
t.dtype = dtype;
52-
size_t nbytes = GetDataSize(t);
53-
size_t alignment = GetDataAlignment(dtype);
54-
// TODO(csullivan): Extend to support arrays of allocations.
55-
// Move assignment from r-value constructed flat allocation.
56-
*this = HexagonBuffer(nbytes, alignment, scope);
57-
}
47+
void* data_{nullptr};
48+
size_t nbytes_;
49+
size_t alignment_;
50+
};
5851

59-
HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope) {
60-
void* ptr = nullptr;
61-
int ret = posix_memalign(&ptr, alignment, nbytes);
62-
if (ret != 0) {
63-
throw std::bad_alloc();
52+
struct DDRAllocation : public Allocation {
53+
DDRAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
54+
#ifdef _WIN32
55+
data_ = _aligned_malloc(nbytes, alignment);
56+
CHECK(data_ != nullptr);
57+
#else
58+
int ret = posix_memalign(&data_, alignment, nbytes);
59+
CHECK_EQ(ret, 0);
60+
#endif
6461
}
65-
allocations_.push_back(ptr);
66-
SetStorageScope(scope);
62+
~DDRAllocation() {
63+
#ifdef _WIN32
64+
_aligned_free(data_);
65+
#else
66+
free(data_);
67+
#endif
68+
}
69+
};
70+
71+
#if defined(__hexagon__)
72+
struct VTCMAllocation : public Allocation {
73+
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
74+
compute_res_attr_t res_info;
75+
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));
76+
77+
// allocate nbytes of vtcm on a single page
78+
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
79+
/*b_single_page = */ 1));
80+
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 10000);
81+
82+
if (context_id_) {
83+
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
84+
if (!data_) {
85+
HEXAGON_PRINT(ERROR, "ERROR: Allocated VTCM ptr is null.");
86+
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
87+
return;
88+
}
89+
} else {
90+
HEXAGON_PRINT(ERROR, "ERROR: Unable to acquire requeisted resource.");
91+
return;
92+
}
93+
// HEXAGON_PRINT(ALWAYS, "VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_, data_);
94+
}
95+
~VTCMAllocation() {
96+
// HEXAGON_PRINT(ALWAYS, "~VTCMAllocation() - Context ID: %u, VTCM ptr: %p", context_id_,
97+
// data_);
98+
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
99+
data_ = nullptr;
100+
}
101+
unsigned int context_id_{0};
102+
};
103+
#else
104+
struct VTCMAllocation : public DDRAllocation {
105+
VTCMAllocation(size_t nbytes, size_t alignment) : DDRAllocation(nbytes, alignment) {}
106+
};
107+
#endif
108+
109+
template <HexagonBuffer::StorageScope S>
110+
std::unique_ptr<Allocation> Allocator(size_t nbytes, size_t alignment);
111+
112+
template <>
113+
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kDDR>(size_t nbytes,
114+
size_t alignment) {
115+
return std::make_unique<DDRAllocation>(nbytes, alignment);
67116
}
68117

69-
HexagonBuffer::HexagonBuffer(void* data, Optional<String> scope) : managed_{false} {
118+
template <>
119+
std::unique_ptr<Allocation> Allocator<HexagonBuffer::StorageScope::kVTCM>(size_t nbytes,
120+
size_t alignment) {
121+
return std::make_unique<VTCMAllocation>(nbytes, alignment);
122+
}
123+
124+
HexagonBuffer::HexagonBuffer(size_t nbytes, size_t alignment, Optional<String> scope)
125+
: nallocs_(1), nbytes_(nbytes) {
70126
SetStorageScope(scope);
71-
allocations_.push_back(data);
127+
128+
std::unique_ptr<Allocation> alloca = nullptr;
129+
if (GetStorageScope() == StorageScope::kDDR) {
130+
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
131+
} else if (GetStorageScope() == StorageScope::kVTCM) {
132+
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
133+
}
134+
CHECK(alloca != nullptr);
135+
allocations_.push_back(alloca->data_);
136+
managed_allocations_.push_back(std::move(alloca));
72137
}
73138

74-
HexagonBuffer::~HexagonBuffer() {
75-
if (managed_) {
76-
for (auto& ptr : allocations_) {
77-
free(ptr);
139+
HexagonBuffer::HexagonBuffer(size_t nallocs, size_t nbytes, size_t alignment,
140+
Optional<String> scope)
141+
: nallocs_(nallocs), nbytes_(nallocs * nbytes) {
142+
SetStorageScope(scope);
143+
for (size_t i = 0; i < nallocs; ++i) {
144+
std::unique_ptr<Allocation> alloca = nullptr;
145+
if (GetStorageScope() == StorageScope::kDDR) {
146+
alloca = Allocator<StorageScope::kDDR>(nbytes, alignment);
147+
} else if (GetStorageScope() == StorageScope::kVTCM) {
148+
alloca = Allocator<StorageScope::kVTCM>(nbytes, alignment);
78149
}
150+
CHECK(alloca != nullptr);
151+
allocations_.push_back(alloca->data_);
152+
managed_allocations_.push_back(std::move(alloca));
79153
}
80154
}
81155

82-
HexagonBuffer::HexagonBuffer(HexagonBuffer&& other)
83-
: allocations_(other.allocations_),
84-
managed_(other.managed_),
85-
storage_scope_(other.storage_scope_) {
86-
other.allocations_.clear();
87-
other.managed_ = false;
88-
other.storage_scope_ = StorageScope::kDDR;
156+
HexagonBuffer::HexagonBuffer(void* data, size_t nbytes, Optional<String> scope)
157+
: nallocs_(1), nbytes_(nbytes) {
158+
SetStorageScope(scope);
159+
// disallow external VTCM allocations
160+
CHECK(GetStorageScope() != HexagonBuffer::StorageScope::kVTCM);
161+
allocations_.push_back(data);
89162
}
90163

91-
HexagonBuffer& HexagonBuffer::operator=(HexagonBuffer&& other) {
92-
std::swap(allocations_, other.allocations_);
93-
std::swap(managed_, other.managed_);
94-
std::swap(storage_scope_, other.storage_scope_);
95-
return *this;
96-
}
164+
HexagonBuffer::~HexagonBuffer() { managed_allocations_.clear(); }
97165

98-
void* HexagonBuffer::GetPointer() {
166+
void** HexagonBuffer::GetPointer() {
99167
if (!allocations_.size()) {
100168
return nullptr;
101169
}
102-
return (allocations_.size() > 1) ? allocations_.data() : allocations_[0];
170+
return allocations_.data();
103171
}
104172

105173
HexagonBuffer::StorageScope HexagonBuffer::GetStorageScope() const { return storage_scope_; }
@@ -119,11 +187,70 @@ void HexagonBuffer::SetStorageScope(Optional<String> scope) {
119187
}
120188
}
121189

122-
HexagonBuffer* IsHexagonBuffer(DLTensor* tensor) {
123-
if (TVMDeviceExtType(tensor->device.device_type) == kDLHexagon) {
124-
return static_cast<HexagonBuffer*>(tensor->data);
190+
void HexagonBuffer::CopyTo(void* data, size_t nbytes) {
191+
CHECK(nbytes_ == nbytes);
192+
size_t offset = 0;
193+
for (size_t i = 0; i < nallocs_; ++i) {
194+
CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);
195+
196+
memcpy(static_cast<char*>(data) + offset,
197+
static_cast<const char*>(managed_allocations_[i]->data_),
198+
managed_allocations_[i]->nbytes_);
199+
200+
offset += managed_allocations_[i]->nbytes_;
201+
}
202+
}
203+
204+
void HexagonBuffer::CopyFrom(void* data, size_t nbytes) {
205+
CHECK(nbytes_ == nbytes);
206+
size_t offset = 0;
207+
for (size_t i = 0; i < nallocs_; ++i) {
208+
CHECK(nbytes / nallocs_ == managed_allocations_[i]->nbytes_);
209+
210+
memcpy(static_cast<char*>(managed_allocations_[i]->data_),
211+
static_cast<const char*>(data) + offset, managed_allocations_[i]->nbytes_);
212+
213+
offset += managed_allocations_[i]->nbytes_;
214+
}
215+
}
216+
217+
void HexagonBuffer::CopyFrom(const HexagonBuffer& other) {
218+
CHECK(nbytes_ == other.nbytes_);
219+
220+
if (nallocs_ == other.nallocs_) {
221+
for (size_t i = 0; i < nallocs_; ++i) {
222+
CHECK(managed_allocations_[i]->nbytes_ == other.managed_allocations_[i]->nbytes_);
223+
224+
memcpy(static_cast<char*>(managed_allocations_[i]->data_),
225+
static_cast<const char*>(other.managed_allocations_[i]->data_),
226+
managed_allocations_[i]->nbytes_);
227+
}
228+
} else if (nallocs_ == 1) {
229+
size_t offset = 0;
230+
for (size_t i = 0; i < other.nallocs_; ++i) {
231+
CHECK(nbytes_ / other.nallocs_ == other.managed_allocations_[i]->nbytes_);
232+
233+
memcpy(static_cast<char*>(managed_allocations_[0]->data_) + offset,
234+
static_cast<const char*>(other.managed_allocations_[i]->data_),
235+
other.managed_allocations_[i]->nbytes_);
236+
237+
offset += other.managed_allocations_[i]->nbytes_;
238+
}
239+
} else if (other.nallocs_ == 1) {
240+
size_t offset = 0;
241+
for (size_t i = 0; i < nallocs_; ++i) {
242+
CHECK(other.nbytes_ / nallocs_ == managed_allocations_[i]->nbytes_);
243+
244+
memcpy(static_cast<char*>(managed_allocations_[i]->data_),
245+
static_cast<const char*>(other.managed_allocations_[0]->data_) + offset,
246+
managed_allocations_[i]->nbytes_);
247+
248+
offset += managed_allocations_[i]->nbytes_;
249+
}
250+
} else {
251+
CHECK(false) << "To copy between Hexagon Buffers they must either have the same number of "
252+
"dimensions or one of the Hexagon Buffers must have a single dimension.";
125253
}
126-
return nullptr;
127254
}
128255

129256
} // namespace hexagon

0 commit comments

Comments
 (0)