Skip to content

Commit b87ae97

Browse files
author
Peter Yeh
committed
fix lint
1 parent 05f4fcc commit b87ae97

File tree

1 file changed

+155
-206
lines changed

1 file changed

+155
-206
lines changed

src/contrib/miopen/conv_forward.cc

Lines changed: 155 additions & 206 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
* to you under the Apache License, Version 2.0 (the
77
* "License"); you may not use this file except in compliance
88
* with the License. You may obtain a copy of the License at
9-
*
9+
*
1010
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
11+
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -21,9 +21,9 @@
2121
* Copyright (c) 2017 by Contributors
2222
* \file Use external miopen utils function
2323
*/
24+
#include <tvm/runtime/device_api.h>
2425
#include <tvm/runtime/registry.h>
2526
#include <tvm/runtime/util.h>
26-
#include <tvm/runtime/device_api.h>
2727
#include "miopen_utils.h"
2828

2929
namespace tvm {
@@ -33,211 +33,160 @@ namespace miopen {
3333
using namespace runtime;
3434

3535
TVM_REGISTER_GLOBAL("tvm.contrib.miopen.conv2d.setup")
36-
.set_body([](TVMArgs args, TVMRetValue *ret) {
37-
const int mode = args[0];
38-
const int dtype = args[1];
39-
const int pad_h = args[2];
40-
const int pad_w = args[3];
41-
const int stride_h = args[4];
42-
const int stride_w = args[5];
43-
const int dilation_h = args[6];
44-
const int dilation_w = args[7];
45-
const int x_dim0 = args[8];
46-
const int x_dim1 = args[9];
47-
const int x_dim2 = args[10];
48-
const int x_dim3 = args[11];
49-
const int w_dim0 = args[12];
50-
const int w_dim1 = args[13];
51-
const int w_dim2 = args[14];
52-
const int w_dim3 = args[15];
53-
void *out_shape = args[16];
54-
55-
MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal();
56-
// Set Mode
57-
entry_ptr->conv_entry.mode = static_cast<miopenConvolutionMode_t>(mode);
58-
// Set Ctx
59-
entry_ptr->conv_entry.ctx = TVMContext{kDLROCM, 0};
60-
// Set Data Type
61-
entry_ptr->conv_entry.data_type = static_cast<miopenDataType_t>(dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
62-
// Set Desc
63-
MIOPEN_CALL(miopenInitConvolutionDescriptor(entry_ptr->conv_entry.conv_desc,
64-
entry_ptr->conv_entry.mode,
65-
pad_h,
66-
pad_w,
67-
stride_h,
68-
stride_w,
69-
dilation_h,
70-
dilation_w));
71-
// Set Filter
72-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.filter_desc,
73-
entry_ptr->conv_entry.data_type,
74-
w_dim0,
75-
w_dim1,
76-
w_dim2,
77-
w_dim3));
78-
// Set Input
79-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.input_desc,
80-
entry_ptr->conv_entry.data_type,
81-
x_dim0,
82-
x_dim1,
83-
x_dim2,
84-
x_dim3));
85-
86-
// Set Output shape
87-
MIOPEN_CALL(miopenGetConvolutionForwardOutputDim(entry_ptr->conv_entry.conv_desc,
88-
entry_ptr->conv_entry.input_desc,
89-
entry_ptr->conv_entry.filter_desc,
90-
static_cast<int*>(out_shape),
91-
static_cast<int*>(out_shape) + 1,
92-
static_cast<int*>(out_shape) + 2,
93-
static_cast<int*>(out_shape) + 3));
94-
95-
const int *oshape = static_cast<int*>(out_shape);
96-
// Set Output
97-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.output_desc,
98-
entry_ptr->conv_entry.data_type,
99-
oshape[0],
100-
oshape[1],
101-
oshape[2],
102-
oshape[3]));
103-
104-
// Set workspace
105-
size_t workspace_size = 0;
106-
MIOPEN_CALL(miopenConvolutionForwardGetWorkSpaceSize(entry_ptr->handle,
107-
entry_ptr->conv_entry.filter_desc,
108-
entry_ptr->conv_entry.input_desc,
109-
entry_ptr->conv_entry.conv_desc,
110-
entry_ptr->conv_entry.output_desc,
111-
&workspace_size));
112-
entry_ptr->conv_entry.UpdateWorkspace(workspace_size);
113-
114-
const size_t input_size = x_dim0 * x_dim1 * x_dim2 * x_dim3;
115-
const size_t filter_size = w_dim0 * w_dim1 * w_dim2 * w_dim3;
116-
const size_t output_size = oshape[0] * oshape[1] * oshape[2] * oshape[3];
117-
118-
runtime::DeviceAPI* rocm_api = entry_ptr->conv_entry.rocm_api;
119-
float* input_buf = static_cast<float*>(rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx,
120-
input_size * sizeof(float)));
121-
float* filter_buf = static_cast<float*>(rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx,
122-
filter_size * sizeof(float)));
123-
float* output_buf = static_cast<float*>(rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx,
124-
output_size * sizeof(float)));
125-
126-
const int request_algo_count = 4;
127-
const bool exhaustive_search = false;
128-
void* workspace = entry_ptr->conv_entry.workspace;
129-
if (workspace_size == 0) workspace = nullptr;
130-
int returned_algo_count = 0;
131-
miopenConvAlgoPerf_t perfs[4];
132-
133-
MIOPEN_CALL(miopenFindConvolutionForwardAlgorithm(entry_ptr->handle,
134-
entry_ptr->conv_entry.input_desc,
135-
input_buf,
136-
entry_ptr->conv_entry.filter_desc,
137-
filter_buf,
138-
entry_ptr->conv_entry.conv_desc,
139-
entry_ptr->conv_entry.output_desc,
140-
output_buf,
141-
request_algo_count,
142-
&returned_algo_count,
143-
perfs,
144-
workspace,
145-
workspace_size,
146-
exhaustive_search));
147-
148-
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, input_buf);
149-
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, filter_buf);
150-
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, output_buf);
151-
152-
const std::vector<std::string> fwd_algo_names{
153-
"miopenConvolutionFwdAlgoGEMM",
154-
"miopenConvolutionFwdAlgoDirect",
155-
"miopenConvolutionFwdAlgoFFT",
156-
"miopenConvolutionFwdAlgoWinograd",
157-
};
158-
const auto best_algo = perfs[0].fwd_algo;
159-
LOG(INFO) << "\tMIOpen Found " << returned_algo_count
160-
<< " fwd algorithms, choosing " << fwd_algo_names[best_algo];
161-
for (int i = 0; i < returned_algo_count; ++i) {
162-
LOG(INFO) << "\t\t" << i << ") " << fwd_algo_names[perfs[i].fwd_algo]
163-
<< " - time: " << perfs[i].time << " ms"
164-
<< ", Memory: " << perfs[i].memory;
165-
}
166-
// Set Algo
167-
ret[0] = static_cast<int>(best_algo);
168-
});
169-
36+
.set_body([](TVMArgs args, TVMRetValue* ret) {
37+
const int mode = args[0];
38+
const int dtype = args[1];
39+
const int pad_h = args[2];
40+
const int pad_w = args[3];
41+
const int stride_h = args[4];
42+
const int stride_w = args[5];
43+
const int dilation_h = args[6];
44+
const int dilation_w = args[7];
45+
const int x_dim0 = args[8];
46+
const int x_dim1 = args[9];
47+
const int x_dim2 = args[10];
48+
const int x_dim3 = args[11];
49+
const int w_dim0 = args[12];
50+
const int w_dim1 = args[13];
51+
const int w_dim2 = args[14];
52+
const int w_dim3 = args[15];
53+
void* out_shape = args[16];
54+
55+
MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal();
56+
// Set Mode
57+
entry_ptr->conv_entry.mode = static_cast<miopenConvolutionMode_t>(mode);
58+
// Set Ctx
59+
entry_ptr->conv_entry.ctx = TVMContext{kDLROCM, 0};
60+
// Set Data Type
61+
entry_ptr->conv_entry.data_type = static_cast<miopenDataType_t>(
62+
dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
63+
// Set Desc
64+
MIOPEN_CALL(miopenInitConvolutionDescriptor(entry_ptr->conv_entry.conv_desc,
65+
entry_ptr->conv_entry.mode, pad_h, pad_w,
66+
stride_h, stride_w, dilation_h, dilation_w));
67+
// Set Filter
68+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.filter_desc,
69+
entry_ptr->conv_entry.data_type, w_dim0, w_dim1,
70+
w_dim2, w_dim3));
71+
// Set Input
72+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.input_desc,
73+
entry_ptr->conv_entry.data_type, x_dim0, x_dim1,
74+
x_dim2, x_dim3));
75+
76+
// Set Output shape
77+
MIOPEN_CALL(miopenGetConvolutionForwardOutputDim(
78+
entry_ptr->conv_entry.conv_desc, entry_ptr->conv_entry.input_desc,
79+
entry_ptr->conv_entry.filter_desc, static_cast<int*>(out_shape),
80+
static_cast<int*>(out_shape) + 1, static_cast<int*>(out_shape) + 2,
81+
static_cast<int*>(out_shape) + 3));
82+
83+
const int* oshape = static_cast<int*>(out_shape);
84+
// Set Output
85+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.output_desc,
86+
entry_ptr->conv_entry.data_type, oshape[0], oshape[1],
87+
oshape[2], oshape[3]));
88+
89+
// Set workspace
90+
size_t workspace_size = 0;
91+
MIOPEN_CALL(miopenConvolutionForwardGetWorkSpaceSize(
92+
entry_ptr->handle, entry_ptr->conv_entry.filter_desc, entry_ptr->conv_entry.input_desc,
93+
entry_ptr->conv_entry.conv_desc, entry_ptr->conv_entry.output_desc, &workspace_size));
94+
entry_ptr->conv_entry.UpdateWorkspace(workspace_size);
95+
96+
const size_t input_size = x_dim0 * x_dim1 * x_dim2 * x_dim3;
97+
const size_t filter_size = w_dim0 * w_dim1 * w_dim2 * w_dim3;
98+
const size_t output_size = oshape[0] * oshape[1] * oshape[2] * oshape[3];
99+
100+
runtime::DeviceAPI* rocm_api = entry_ptr->conv_entry.rocm_api;
101+
float* input_buf = static_cast<float*>(
102+
rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx, input_size * sizeof(float)));
103+
float* filter_buf = static_cast<float*>(
104+
rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx, filter_size * sizeof(float)));
105+
float* output_buf = static_cast<float*>(
106+
rocm_api->AllocWorkspace(entry_ptr->conv_entry.ctx, output_size * sizeof(float)));
107+
108+
const int request_algo_count = 4;
109+
const bool exhaustive_search = false;
110+
void* workspace = entry_ptr->conv_entry.workspace;
111+
if (workspace_size == 0) workspace = nullptr;
112+
int returned_algo_count = 0;
113+
miopenConvAlgoPerf_t perfs[4];
114+
115+
MIOPEN_CALL(miopenFindConvolutionForwardAlgorithm(
116+
entry_ptr->handle, entry_ptr->conv_entry.input_desc, input_buf,
117+
entry_ptr->conv_entry.filter_desc, filter_buf, entry_ptr->conv_entry.conv_desc,
118+
entry_ptr->conv_entry.output_desc, output_buf, request_algo_count, &returned_algo_count,
119+
perfs, workspace, workspace_size, exhaustive_search));
120+
121+
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, input_buf);
122+
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, filter_buf);
123+
rocm_api->FreeWorkspace(entry_ptr->conv_entry.ctx, output_buf);
124+
125+
const std::vector<std::string> fwd_algo_names{
126+
"miopenConvolutionFwdAlgoGEMM", "miopenConvolutionFwdAlgoDirect",
127+
"miopenConvolutionFwdAlgoFFT", "miopenConvolutionFwdAlgoWinograd",
128+
};
129+
const auto best_algo = perfs[0].fwd_algo;
130+
LOG(INFO) << "\tMIOpen Found " << returned_algo_count << " fwd algorithms, choosing "
131+
<< fwd_algo_names[best_algo];
132+
for (int i = 0; i < returned_algo_count; ++i) {
133+
LOG(INFO) << "\t\t" << i << ") " << fwd_algo_names[perfs[i].fwd_algo]
134+
<< " - time: " << perfs[i].time << " ms"
135+
<< ", Memory: " << perfs[i].memory;
136+
}
137+
// Set Algo
138+
ret[0] = static_cast<int>(best_algo);
139+
});
170140

171141
TVM_REGISTER_GLOBAL("tvm.contrib.miopen.conv2d.forward")
172-
.set_body([](TVMArgs args, TVMRetValue *ret) {
173-
const int mode = args[0];
174-
const int dtype = args[1];
175-
const int pad_h = args[2];
176-
const int pad_w = args[3];
177-
const int stride_h = args[4];
178-
const int stride_w = args[5];
179-
const int dilation_h = args[6];
180-
const int dilation_w = args[7];
181-
const int algo = args[8];
182-
const DLTensor *x = args[9];
183-
const DLTensor *w = args[10];
184-
const DLTensor *y = args[11];
185-
186-
MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal();
187-
entry_ptr->conv_entry.fwd_algo = static_cast<miopenConvFwdAlgorithm_t>(algo);
188-
// Set Mode
189-
entry_ptr->conv_entry.mode = static_cast<miopenConvolutionMode_t>(mode);
190-
// Set Ctx
191-
entry_ptr->conv_entry.ctx = x->ctx;
192-
// Set Data Type
193-
entry_ptr->conv_entry.data_type = static_cast<miopenDataType_t>(dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
194-
// Set Desc
195-
MIOPEN_CALL(miopenInitConvolutionDescriptor(entry_ptr->conv_entry.conv_desc,
196-
entry_ptr->conv_entry.mode,
197-
pad_h,
198-
pad_w,
199-
stride_h,
200-
stride_w,
201-
dilation_h,
202-
dilation_w));
203-
// Set Filter
204-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.filter_desc,
205-
entry_ptr->conv_entry.data_type,
206-
w->shape[0],
207-
w->shape[1],
208-
w->shape[2],
209-
w->shape[3]));
210-
// Set Input
211-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.input_desc,
212-
entry_ptr->conv_entry.data_type,
213-
x->shape[0],
214-
x->shape[1],
215-
x->shape[2],
216-
x->shape[3]));
217-
// Set Output
218-
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.output_desc,
219-
entry_ptr->conv_entry.data_type,
220-
y->shape[0],
221-
y->shape[1],
222-
y->shape[2],
223-
y->shape[3]));
224-
225-
const float alpha = 1.f;
226-
const float beta = 0.f;
227-
MIOPEN_CALL(miopenConvolutionForward(entry_ptr->handle,
228-
&alpha,
229-
entry_ptr->conv_entry.input_desc,
230-
x->data,
231-
entry_ptr->conv_entry.filter_desc,
232-
w->data,
233-
entry_ptr->conv_entry.conv_desc,
234-
entry_ptr->conv_entry.fwd_algo,
235-
&beta,
236-
entry_ptr->conv_entry.output_desc,
237-
y->data,
238-
entry_ptr->conv_entry.workspace,
239-
entry_ptr->conv_entry.workspace_size));
240-
});
142+
.set_body([](TVMArgs args, TVMRetValue* ret) {
143+
const int mode = args[0];
144+
const int dtype = args[1];
145+
const int pad_h = args[2];
146+
const int pad_w = args[3];
147+
const int stride_h = args[4];
148+
const int stride_w = args[5];
149+
const int dilation_h = args[6];
150+
const int dilation_w = args[7];
151+
const int algo = args[8];
152+
const DLTensor* x = args[9];
153+
const DLTensor* w = args[10];
154+
const DLTensor* y = args[11];
155+
156+
MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal();
157+
entry_ptr->conv_entry.fwd_algo = static_cast<miopenConvFwdAlgorithm_t>(algo);
158+
// Set Mode
159+
entry_ptr->conv_entry.mode = static_cast<miopenConvolutionMode_t>(mode);
160+
// Set Ctx
161+
entry_ptr->conv_entry.ctx = x->ctx;
162+
// Set Data Type
163+
entry_ptr->conv_entry.data_type = static_cast<miopenDataType_t>(
164+
dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
165+
// Set Desc
166+
MIOPEN_CALL(miopenInitConvolutionDescriptor(entry_ptr->conv_entry.conv_desc,
167+
entry_ptr->conv_entry.mode, pad_h, pad_w,
168+
stride_h, stride_w, dilation_h, dilation_w));
169+
// Set Filter
170+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.filter_desc,
171+
entry_ptr->conv_entry.data_type, w->shape[0],
172+
w->shape[1], w->shape[2], w->shape[3]));
173+
// Set Input
174+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.input_desc,
175+
entry_ptr->conv_entry.data_type, x->shape[0],
176+
x->shape[1], x->shape[2], x->shape[3]));
177+
// Set Output
178+
MIOPEN_CALL(miopenSet4dTensorDescriptor(entry_ptr->conv_entry.output_desc,
179+
entry_ptr->conv_entry.data_type, y->shape[0],
180+
y->shape[1], y->shape[2], y->shape[3]));
181+
182+
const float alpha = 1.f;
183+
const float beta = 0.f;
184+
MIOPEN_CALL(miopenConvolutionForward(
185+
entry_ptr->handle, &alpha, entry_ptr->conv_entry.input_desc, x->data,
186+
entry_ptr->conv_entry.filter_desc, w->data, entry_ptr->conv_entry.conv_desc,
187+
entry_ptr->conv_entry.fwd_algo, &beta, entry_ptr->conv_entry.output_desc, y->data,
188+
entry_ptr->conv_entry.workspace, entry_ptr->conv_entry.workspace_size));
189+
});
241190

242191
} // namespace miopen
243192
} // namespace contrib

0 commit comments

Comments
 (0)