|
32 | 32 | # PyTorch. |
33 | 33 |
|
34 | 34 | import os |
35 | | -import sys |
36 | 35 | import numpy as np |
37 | 36 | import torch |
38 | 37 | from torch import fx |
|
101 | 100 |
|
102 | 101 | # Skip running in CI environment |
103 | 102 | IS_IN_CI = os.getenv("CI", "") == "true" |
104 | | -if IS_IN_CI: |
105 | | - sys.exit(0) |
106 | | - |
107 | | -with target: |
108 | | - mod = tvm.ir.transform.Sequential( |
109 | | - [ |
110 | | - # Convert BatchNorm into a sequence of simpler ops for fusion |
111 | | - relax.transform.DecomposeOpsForInference(), |
112 | | - # Canonicalize the bindings |
113 | | - relax.transform.CanonicalizeBindings(), |
114 | | - # Run default optimization pipeline |
115 | | - relax.get_pipeline("zero"), |
116 | | - # Tune the model and store the log to database |
117 | | - relax.transform.MetaScheduleTuneIRMod({}, work_dir, TOTAL_TRIALS), |
118 | | - # Apply the database |
119 | | - relax.transform.MetaScheduleApplyDatabase(work_dir), |
120 | | - ] |
121 | | - )(mod) |
122 | | - |
123 | | -# Only show the main function |
124 | | -mod["main"].show() |
| 103 | +if not IS_IN_CI: |
| 104 | + with target: |
| 105 | + mod = tvm.ir.transform.Sequential( |
| 106 | + [ |
| 107 | + # Convert BatchNorm into a sequence of simpler ops for fusion |
| 108 | + relax.transform.DecomposeOpsForInference(), |
| 109 | + # Canonicalize the bindings |
| 110 | + relax.transform.CanonicalizeBindings(), |
| 111 | + # Run default optimization pipeline |
| 112 | + relax.get_pipeline("zero"), |
| 113 | + # Tune the model and store the log to database |
| 114 | + relax.transform.MetaScheduleTuneIRMod({}, work_dir, TOTAL_TRIALS), |
| 115 | + # Apply the database |
| 116 | + relax.transform.MetaScheduleApplyDatabase(work_dir), |
| 117 | + ] |
| 118 | + )(mod) |
| 119 | + |
| 120 | + # Only show the main function |
| 121 | + mod["main"].show() |
125 | 122 |
|
126 | 123 | ###################################################################### |
127 | 124 | # Build and Deploy |
128 | 125 | # ---------------- |
129 | 126 | # Finally, we build the optimized model and deploy it to the target device. |
130 | | - |
131 | | -ex = relax.build(mod, target="cuda") |
132 | | -dev = tvm.device("cuda", 0) |
133 | | -vm = relax.VirtualMachine(ex, dev) |
134 | | -# Need to allocate data and params on GPU device |
135 | | -gpu_data = tvm.nd.array(np.random.rand(1, 3, 224, 224).astype("float32"), dev) |
136 | | -gpu_params = [tvm.nd.array(p, dev) for p in params["main"]] |
137 | | -gpu_out = vm["main"](gpu_data, *gpu_params).numpy() |
138 | | - |
139 | | -print(gpu_out.shape) |
| 127 | +# We skip this step in the CI environment. |
| 128 | + |
| 129 | +if not IS_IN_CI: |
| 130 | + ex = relax.build(mod, target="cuda") |
| 131 | + dev = tvm.device("cuda", 0) |
| 132 | + vm = relax.VirtualMachine(ex, dev) |
| 133 | + # Need to allocate data and params on GPU device |
| 134 | + gpu_data = tvm.nd.array(np.random.rand(1, 3, 224, 224).astype("float32"), dev) |
| 135 | + gpu_params = [tvm.nd.array(p, dev) for p in params["main"]] |
| 136 | + gpu_out = vm["main"](gpu_data, *gpu_params).numpy() |
| 137 | + |
| 138 | + print(gpu_out.shape) |
0 commit comments