1717 * under the License.
1818 */
1919
20- #include < HAP_farf.h>
2120#include < tvm/runtime/c_runtime_api.h>
2221#include < tvm/runtime/device_api.h>
2322
2625#ifndef TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
2726#define TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
2827
29- #ifdef DEBUG_CONV
30- #define DEBUG_BLOCK (X ) \
31- { X }
32- #define debug (...) FARF(ALWAYS, ##__VA_ARGS__)
33- #else
34- #define DEBUG_BLOCK (X )
35- #define debug (...)
36- #endif
37-
38- #define HAP_CALL (hap_fn, ...) \
39- { \
40- int rc = hap_fn (__VA_ARGS__); \
41- if (rc != 0 ) { \
42- debug (" %s failed: rc=%x" , #hap_fn, rc); \
43- } \
44- }
45-
46- namespace detail {
28+ namespace tvm {
29+ namespace runtime {
30+ namespace hexagon {
4731static constexpr auto hexagon_device = DLDevice{static_cast <DLDeviceType>(kDLHexagon ), 0 };
4832
4933// Standalone DLTensor: the standalone-ness means that this object owns the shape
5034// (as opposed to a DLTensor).
51- template <size_t N >
35+ template <size_t NDIM >
5236class SDLTensor : public DLTensor {
5337 public:
5438 SDLTensor (void * data_ptr, DLDataType data_type, void * data_space, const int64_t * data_dims)
5539 : SDLTensor(data_ptr, data_type, data_space) {
56- for (size_t i = 0 ; i != N ; ++i) dims[i] = data_dims[i];
40+ for (size_t i = 0 ; i < NDIM ; ++i) dims[i] = data_dims[i];
5741 }
5842
5943 SDLTensor (void * data_ptr, DLDataType data_type, void * data_space,
@@ -66,53 +50,56 @@ class SDLTensor : public DLTensor {
6650 SDLTensor (void * data_ptr, DLDataType data_type, void * data_space) : data_space(data_space) {
6751 data = data_ptr;
6852 device = hexagon_device;
69- ndim = N ;
53+ ndim = NDIM ;
7054 dtype = data_type;
7155 shape = dims;
7256 strides = nullptr ;
7357 byte_offset = 0 ;
7458 }
7559
7660 void * data_space = nullptr ;
77- int64_t dims[N ];
61+ int64_t dims[NDIM ];
7862};
7963
8064inline void * to_ptr (uintptr_t v) { return reinterpret_cast <void *>(v); }
8165
8266inline uintptr_t to_uint (void * ptr) { return reinterpret_cast <uintptr_t >(ptr); }
8367
84- inline constexpr int xyc_to_sm_16b (int y, int x, int c) {
68+ constexpr int xyc_to_sm_16b (int y, int x, int c) {
8569 // Map y,x,c coordinates within a block to the offset (in 16-bit elements)
8670 // from the beginning of the block in spatial-major layout.
8771 // 10-bit spatial mask: yyyxcccccx
72+ assert (y >= 0 && x >= 0 && c >= 0 );
8873 return y << 7 | (x & 2 ) << 5 | c << 1 | (x & 1 );
8974}
9075
91- inline constexpr int hwio_to_sm_16b (int width, int y, int x, int i, int o) {
76+ constexpr int hwio_to_sm_16b (int width, int y, int x, int i, int o) {
9277 // Map y,x,i,o coordinates within a chunk (assuming the origin at the
9378 // top-left spatial corner) to the offset (in 16-bit elements) from the
9479 // beginning of the chunk in spatial-major layout.
9580 // Spatial mask: p..piiiioooooi, where p..p are position bits.
81+ assert (width >= 1 );
82+ assert (y >= 0 && x >= 0 && i >= 0 && o >= 0 );
9683 int p = y * width + (width - 1 - x);
9784 return p << 10 | (i & 0x1e ) << 5 | o << 1 | (i & 1 );
9885}
9986
10087inline constexpr int round_up (int v, int p2) { return (v + p2 - 1 ) & -p2; }
10188
102- constexpr uintptr_t nhwc_at (const DLTensor& a, int n, int y, int x, int c) {
89+ inline uintptr_t nhwc_at (const DLTensor& a, int n, int y, int x, int c) {
10390 if (y < 0 || y >= a.shape [1 ]) return uintptr_t (0 );
10491 auto p = static_cast <uintptr_t *>(a.data );
10592 assert (n == 0 );
10693 return p[y * a.shape [2 ] * a.shape [3 ] + x * a.shape [3 ] + c];
10794}
10895
109- constexpr uintptr_t hwio_at (const DLTensor& f, int y, int x, int i, int o) {
96+ inline uintptr_t hwio_at (const DLTensor& f, int y, int x, int i, int o) {
11097 auto p = static_cast <uintptr_t *>(f.data );
11198 return p[y * f.shape [1 ] * f.shape [2 ] * f.shape [3 ] + x * f.shape [2 ] * f.shape [3 ] + i * f.shape [3 ] +
11299 o];
113100}
114101
115- constexpr uint32_t * bias_at (const DLTensor& b, int d) {
102+ inline uint32_t * bias_at (const DLTensor& b, int d) {
116103 auto p = static_cast <uint32_t *>(b.data );
117104 return p + d;
118105}
@@ -139,6 +126,8 @@ void release(tvm::runtime::DeviceAPI* device_api, const SDLTensor<N>& tensor) {
139126 }
140127}
141128
142- } // namespace detail
129+ } // namespace hexagon
130+ } // namespace runtime
131+ } // namespace tvm
143132
144133#endif // TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
0 commit comments