66 * to you under the Apache License, Version 2.0 (the
77 * "License"); you may not use this file except in compliance
88 * with the License. You may obtain a copy of the License at
9- *
9+ *
1010 * http://www.apache.org/licenses/LICENSE-2.0
11- *
11+ *
1212 * Unless required by applicable law or agreed to in writing,
1313 * software distributed under the License is distributed on an
1414 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2121 * Copyright (c) 2017 by Contributors
2222 * \file Use external miopen utils function
2323 */
24+ #include < tvm/runtime/device_api.h>
2425#include < tvm/runtime/registry.h>
2526#include < tvm/runtime/util.h>
26- #include < tvm/runtime/device_api.h>
2727#include " miopen_utils.h"
2828
2929namespace tvm {
@@ -33,211 +33,160 @@ namespace miopen {
3333using namespace runtime ;
3434
3535TVM_REGISTER_GLOBAL (" tvm.contrib.miopen.conv2d.setup" )
36- .set_body([](TVMArgs args, TVMRetValue *ret) {
37- const int mode = args[0 ];
38- const int dtype = args[1 ];
39- const int pad_h = args[2 ];
40- const int pad_w = args[3 ];
41- const int stride_h = args[4 ];
42- const int stride_w = args[5 ];
43- const int dilation_h = args[6 ];
44- const int dilation_w = args[7 ];
45- const int x_dim0 = args[8 ];
46- const int x_dim1 = args[9 ];
47- const int x_dim2 = args[10 ];
48- const int x_dim3 = args[11 ];
49- const int w_dim0 = args[12 ];
50- const int w_dim1 = args[13 ];
51- const int w_dim2 = args[14 ];
52- const int w_dim3 = args[15 ];
53- void *out_shape = args[16 ];
54-
55- MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal ();
56- // Set Mode
57- entry_ptr->conv_entry .mode = static_cast <miopenConvolutionMode_t>(mode);
58- // Set Ctx
59- entry_ptr->conv_entry .ctx = TVMContext{kDLROCM , 0 };
60- // Set Data Type
61- entry_ptr->conv_entry .data_type = static_cast <miopenDataType_t>(dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
62- // Set Desc
63- MIOPEN_CALL (miopenInitConvolutionDescriptor (entry_ptr->conv_entry .conv_desc ,
64- entry_ptr->conv_entry .mode ,
65- pad_h,
66- pad_w,
67- stride_h,
68- stride_w,
69- dilation_h,
70- dilation_w));
71- // Set Filter
72- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .filter_desc ,
73- entry_ptr->conv_entry .data_type ,
74- w_dim0,
75- w_dim1,
76- w_dim2,
77- w_dim3));
78- // Set Input
79- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .input_desc ,
80- entry_ptr->conv_entry .data_type ,
81- x_dim0,
82- x_dim1,
83- x_dim2,
84- x_dim3));
85-
86- // Set Output shape
87- MIOPEN_CALL (miopenGetConvolutionForwardOutputDim (entry_ptr->conv_entry .conv_desc ,
88- entry_ptr->conv_entry .input_desc ,
89- entry_ptr->conv_entry .filter_desc ,
90- static_cast <int *>(out_shape),
91- static_cast <int *>(out_shape) + 1 ,
92- static_cast <int *>(out_shape) + 2 ,
93- static_cast <int *>(out_shape) + 3 ));
94-
95- const int *oshape = static_cast <int *>(out_shape);
96- // Set Output
97- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .output_desc ,
98- entry_ptr->conv_entry .data_type ,
99- oshape[0 ],
100- oshape[1 ],
101- oshape[2 ],
102- oshape[3 ]));
103-
104- // Set workspace
105- size_t workspace_size = 0 ;
106- MIOPEN_CALL (miopenConvolutionForwardGetWorkSpaceSize (entry_ptr->handle ,
107- entry_ptr->conv_entry .filter_desc ,
108- entry_ptr->conv_entry .input_desc ,
109- entry_ptr->conv_entry .conv_desc ,
110- entry_ptr->conv_entry .output_desc ,
111- &workspace_size));
112- entry_ptr->conv_entry .UpdateWorkspace (workspace_size);
113-
114- const size_t input_size = x_dim0 * x_dim1 * x_dim2 * x_dim3;
115- const size_t filter_size = w_dim0 * w_dim1 * w_dim2 * w_dim3;
116- const size_t output_size = oshape[0 ] * oshape[1 ] * oshape[2 ] * oshape[3 ];
117-
118- runtime::DeviceAPI* rocm_api = entry_ptr->conv_entry .rocm_api ;
119- float * input_buf = static_cast <float *>(rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx ,
120- input_size * sizeof (float )));
121- float * filter_buf = static_cast <float *>(rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx ,
122- filter_size * sizeof (float )));
123- float * output_buf = static_cast <float *>(rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx ,
124- output_size * sizeof (float )));
125-
126- const int request_algo_count = 4 ;
127- const bool exhaustive_search = false ;
128- void * workspace = entry_ptr->conv_entry .workspace ;
129- if (workspace_size == 0 ) workspace = nullptr ;
130- int returned_algo_count = 0 ;
131- miopenConvAlgoPerf_t perfs[4 ];
132-
133- MIOPEN_CALL (miopenFindConvolutionForwardAlgorithm (entry_ptr->handle ,
134- entry_ptr->conv_entry .input_desc ,
135- input_buf,
136- entry_ptr->conv_entry .filter_desc ,
137- filter_buf,
138- entry_ptr->conv_entry .conv_desc ,
139- entry_ptr->conv_entry .output_desc ,
140- output_buf,
141- request_algo_count,
142- &returned_algo_count,
143- perfs,
144- workspace,
145- workspace_size,
146- exhaustive_search));
147-
148- rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , input_buf);
149- rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , filter_buf);
150- rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , output_buf);
151-
152- const std::vector<std::string> fwd_algo_names{
153- " miopenConvolutionFwdAlgoGEMM" ,
154- " miopenConvolutionFwdAlgoDirect" ,
155- " miopenConvolutionFwdAlgoFFT" ,
156- " miopenConvolutionFwdAlgoWinograd" ,
157- };
158- const auto best_algo = perfs[0 ].fwd_algo ;
159- LOG (INFO) << " \t MIOpen Found " << returned_algo_count
160- << " fwd algorithms, choosing " << fwd_algo_names[best_algo];
161- for (int i = 0 ; i < returned_algo_count; ++i) {
162- LOG (INFO) << " \t\t " << i << " ) " << fwd_algo_names[perfs[i].fwd_algo ]
163- << " - time: " << perfs[i].time << " ms"
164- << " , Memory: " << perfs[i].memory ;
165- }
166- // Set Algo
167- ret[0 ] = static_cast <int >(best_algo);
168- });
169-
36+ .set_body([](TVMArgs args, TVMRetValue* ret) {
37+ const int mode = args[0 ];
38+ const int dtype = args[1 ];
39+ const int pad_h = args[2 ];
40+ const int pad_w = args[3 ];
41+ const int stride_h = args[4 ];
42+ const int stride_w = args[5 ];
43+ const int dilation_h = args[6 ];
44+ const int dilation_w = args[7 ];
45+ const int x_dim0 = args[8 ];
46+ const int x_dim1 = args[9 ];
47+ const int x_dim2 = args[10 ];
48+ const int x_dim3 = args[11 ];
49+ const int w_dim0 = args[12 ];
50+ const int w_dim1 = args[13 ];
51+ const int w_dim2 = args[14 ];
52+ const int w_dim3 = args[15 ];
53+ void * out_shape = args[16 ];
54+
55+ MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal ();
56+ // Set Mode
57+ entry_ptr->conv_entry .mode = static_cast <miopenConvolutionMode_t>(mode);
58+ // Set Ctx
59+ entry_ptr->conv_entry .ctx = TVMContext{kDLROCM , 0 };
60+ // Set Data Type
61+ entry_ptr->conv_entry .data_type = static_cast <miopenDataType_t>(
62+ dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
63+ // Set Desc
64+ MIOPEN_CALL (miopenInitConvolutionDescriptor (entry_ptr->conv_entry .conv_desc ,
65+ entry_ptr->conv_entry .mode , pad_h, pad_w,
66+ stride_h, stride_w, dilation_h, dilation_w));
67+ // Set Filter
68+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .filter_desc ,
69+ entry_ptr->conv_entry .data_type , w_dim0, w_dim1,
70+ w_dim2, w_dim3));
71+ // Set Input
72+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .input_desc ,
73+ entry_ptr->conv_entry .data_type , x_dim0, x_dim1,
74+ x_dim2, x_dim3));
75+
76+ // Set Output shape
77+ MIOPEN_CALL (miopenGetConvolutionForwardOutputDim (
78+ entry_ptr->conv_entry .conv_desc , entry_ptr->conv_entry .input_desc ,
79+ entry_ptr->conv_entry .filter_desc , static_cast <int *>(out_shape),
80+ static_cast <int *>(out_shape) + 1 , static_cast <int *>(out_shape) + 2 ,
81+ static_cast <int *>(out_shape) + 3 ));
82+
83+ const int * oshape = static_cast <int *>(out_shape);
84+ // Set Output
85+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .output_desc ,
86+ entry_ptr->conv_entry .data_type , oshape[0 ], oshape[1 ],
87+ oshape[2 ], oshape[3 ]));
88+
89+ // Set workspace
90+ size_t workspace_size = 0 ;
91+ MIOPEN_CALL (miopenConvolutionForwardGetWorkSpaceSize (
92+ entry_ptr->handle , entry_ptr->conv_entry .filter_desc , entry_ptr->conv_entry .input_desc ,
93+ entry_ptr->conv_entry .conv_desc , entry_ptr->conv_entry .output_desc , &workspace_size));
94+ entry_ptr->conv_entry .UpdateWorkspace (workspace_size);
95+
96+ const size_t input_size = x_dim0 * x_dim1 * x_dim2 * x_dim3;
97+ const size_t filter_size = w_dim0 * w_dim1 * w_dim2 * w_dim3;
98+ const size_t output_size = oshape[0 ] * oshape[1 ] * oshape[2 ] * oshape[3 ];
99+
100+ runtime::DeviceAPI* rocm_api = entry_ptr->conv_entry .rocm_api ;
101+ float * input_buf = static_cast <float *>(
102+ rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx , input_size * sizeof (float )));
103+ float * filter_buf = static_cast <float *>(
104+ rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx , filter_size * sizeof (float )));
105+ float * output_buf = static_cast <float *>(
106+ rocm_api->AllocWorkspace (entry_ptr->conv_entry .ctx , output_size * sizeof (float )));
107+
108+ const int request_algo_count = 4 ;
109+ const bool exhaustive_search = false ;
110+ void * workspace = entry_ptr->conv_entry .workspace ;
111+ if (workspace_size == 0 ) workspace = nullptr ;
112+ int returned_algo_count = 0 ;
113+ miopenConvAlgoPerf_t perfs[4 ];
114+
115+ MIOPEN_CALL (miopenFindConvolutionForwardAlgorithm (
116+ entry_ptr->handle , entry_ptr->conv_entry .input_desc , input_buf,
117+ entry_ptr->conv_entry .filter_desc , filter_buf, entry_ptr->conv_entry .conv_desc ,
118+ entry_ptr->conv_entry .output_desc , output_buf, request_algo_count, &returned_algo_count,
119+ perfs, workspace, workspace_size, exhaustive_search));
120+
121+ rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , input_buf);
122+ rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , filter_buf);
123+ rocm_api->FreeWorkspace (entry_ptr->conv_entry .ctx , output_buf);
124+
125+ const std::vector<std::string> fwd_algo_names{
126+ " miopenConvolutionFwdAlgoGEMM" , " miopenConvolutionFwdAlgoDirect" ,
127+ " miopenConvolutionFwdAlgoFFT" , " miopenConvolutionFwdAlgoWinograd" ,
128+ };
129+ const auto best_algo = perfs[0 ].fwd_algo ;
130+ LOG (INFO) << " \t MIOpen Found " << returned_algo_count << " fwd algorithms, choosing "
131+ << fwd_algo_names[best_algo];
132+ for (int i = 0 ; i < returned_algo_count; ++i) {
133+ LOG (INFO) << " \t\t " << i << " ) " << fwd_algo_names[perfs[i].fwd_algo ]
134+ << " - time: " << perfs[i].time << " ms"
135+ << " , Memory: " << perfs[i].memory ;
136+ }
137+ // Set Algo
138+ ret[0 ] = static_cast <int >(best_algo);
139+ });
170140
171141TVM_REGISTER_GLOBAL (" tvm.contrib.miopen.conv2d.forward" )
172- .set_body([](TVMArgs args, TVMRetValue *ret) {
173- const int mode = args[0 ];
174- const int dtype = args[1 ];
175- const int pad_h = args[2 ];
176- const int pad_w = args[3 ];
177- const int stride_h = args[4 ];
178- const int stride_w = args[5 ];
179- const int dilation_h = args[6 ];
180- const int dilation_w = args[7 ];
181- const int algo = args[8 ];
182- const DLTensor *x = args[9 ];
183- const DLTensor *w = args[10 ];
184- const DLTensor *y = args[11 ];
185-
186- MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal ();
187- entry_ptr->conv_entry .fwd_algo = static_cast <miopenConvFwdAlgorithm_t>(algo);
188- // Set Mode
189- entry_ptr->conv_entry .mode = static_cast <miopenConvolutionMode_t>(mode);
190- // Set Ctx
191- entry_ptr->conv_entry .ctx = x->ctx ;
192- // Set Data Type
193- entry_ptr->conv_entry .data_type = static_cast <miopenDataType_t>(dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
194- // Set Desc
195- MIOPEN_CALL (miopenInitConvolutionDescriptor (entry_ptr->conv_entry .conv_desc ,
196- entry_ptr->conv_entry .mode ,
197- pad_h,
198- pad_w,
199- stride_h,
200- stride_w,
201- dilation_h,
202- dilation_w));
203- // Set Filter
204- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .filter_desc ,
205- entry_ptr->conv_entry .data_type ,
206- w->shape [0 ],
207- w->shape [1 ],
208- w->shape [2 ],
209- w->shape [3 ]));
210- // Set Input
211- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .input_desc ,
212- entry_ptr->conv_entry .data_type ,
213- x->shape [0 ],
214- x->shape [1 ],
215- x->shape [2 ],
216- x->shape [3 ]));
217- // Set Output
218- MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .output_desc ,
219- entry_ptr->conv_entry .data_type ,
220- y->shape [0 ],
221- y->shape [1 ],
222- y->shape [2 ],
223- y->shape [3 ]));
224-
225- const float alpha = 1 .f ;
226- const float beta = 0 .f ;
227- MIOPEN_CALL (miopenConvolutionForward (entry_ptr->handle ,
228- &alpha,
229- entry_ptr->conv_entry .input_desc ,
230- x->data ,
231- entry_ptr->conv_entry .filter_desc ,
232- w->data ,
233- entry_ptr->conv_entry .conv_desc ,
234- entry_ptr->conv_entry .fwd_algo ,
235- &beta,
236- entry_ptr->conv_entry .output_desc ,
237- y->data ,
238- entry_ptr->conv_entry .workspace ,
239- entry_ptr->conv_entry .workspace_size ));
240- });
142+ .set_body([](TVMArgs args, TVMRetValue* ret) {
143+ const int mode = args[0 ];
144+ const int dtype = args[1 ];
145+ const int pad_h = args[2 ];
146+ const int pad_w = args[3 ];
147+ const int stride_h = args[4 ];
148+ const int stride_w = args[5 ];
149+ const int dilation_h = args[6 ];
150+ const int dilation_w = args[7 ];
151+ const int algo = args[8 ];
152+ const DLTensor* x = args[9 ];
153+ const DLTensor* w = args[10 ];
154+ const DLTensor* y = args[11 ];
155+
156+ MIOpenThreadEntry* entry_ptr = MIOpenThreadEntry::ThreadLocal ();
157+ entry_ptr->conv_entry .fwd_algo = static_cast <miopenConvFwdAlgorithm_t>(algo);
158+ // Set Mode
159+ entry_ptr->conv_entry .mode = static_cast <miopenConvolutionMode_t>(mode);
160+ // Set Ctx
161+ entry_ptr->conv_entry .ctx = x->ctx ;
162+ // Set Data Type
163+ entry_ptr->conv_entry .data_type = static_cast <miopenDataType_t>(
164+ dtype); // MIOpen suppports fp32(miopenFloat), fp16(miopenHalf) at this moment.
165+ // Set Desc
166+ MIOPEN_CALL (miopenInitConvolutionDescriptor (entry_ptr->conv_entry .conv_desc ,
167+ entry_ptr->conv_entry .mode , pad_h, pad_w,
168+ stride_h, stride_w, dilation_h, dilation_w));
169+ // Set Filter
170+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .filter_desc ,
171+ entry_ptr->conv_entry .data_type , w->shape [0 ],
172+ w->shape [1 ], w->shape [2 ], w->shape [3 ]));
173+ // Set Input
174+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .input_desc ,
175+ entry_ptr->conv_entry .data_type , x->shape [0 ],
176+ x->shape [1 ], x->shape [2 ], x->shape [3 ]));
177+ // Set Output
178+ MIOPEN_CALL (miopenSet4dTensorDescriptor (entry_ptr->conv_entry .output_desc ,
179+ entry_ptr->conv_entry .data_type , y->shape [0 ],
180+ y->shape [1 ], y->shape [2 ], y->shape [3 ]));
181+
182+ const float alpha = 1 .f ;
183+ const float beta = 0 .f ;
184+ MIOPEN_CALL (miopenConvolutionForward (
185+ entry_ptr->handle , &alpha, entry_ptr->conv_entry .input_desc , x->data ,
186+ entry_ptr->conv_entry .filter_desc , w->data , entry_ptr->conv_entry .conv_desc ,
187+ entry_ptr->conv_entry .fwd_algo , &beta, entry_ptr->conv_entry .output_desc , y->data ,
188+ entry_ptr->conv_entry .workspace , entry_ptr->conv_entry .workspace_size ));
189+ });
241190
242191} // namespace miopen
243192} // namespace contrib
0 commit comments