|
3 | 3 |
|
4 | 4 | import tvm |
5 | 5 |
|
6 | | -def gevm(env, mock=False): |
7 | | - """Vector-matrix multiply intrinsic |
8 | | -
|
9 | | - Parameters |
10 | | - ---------- |
11 | | - env : Environment |
12 | | - The Environment |
13 | | -
|
14 | | - mock : bool |
15 | | - Whether create a mock version. |
16 | | - """ |
17 | | - wgt_lanes = env.WGT_ELEM_BITS // env.WGT_WIDTH |
18 | | - assert wgt_lanes == env.BLOCK_OUT * env.BLOCK_IN |
19 | | - wgt_shape = (env.BLOCK_OUT, env.BLOCK_IN) |
20 | | - assert wgt_shape[0] * wgt_shape[1] == wgt_lanes |
21 | | - inp_lanes = env.INP_ELEM_BITS // env.INP_WIDTH |
22 | | - out_lanes = env.ACC_ELEM_BITS // env.ACC_WIDTH |
23 | | - wgt = tvm.placeholder((wgt_shape[0], wgt_shape[1]), |
24 | | - dtype="int%d" % env.WGT_WIDTH, |
25 | | - name=env.wgt_scope) |
26 | | - inp = tvm.placeholder((wgt_shape[1], ), |
27 | | - dtype="int%d" % env.INP_WIDTH, |
28 | | - name=env.inp_scope) |
29 | | - k = tvm.reduce_axis((0, wgt_shape[1]), name="k") |
30 | | - out_dtype = "int%d" % env.ACC_WIDTH |
31 | | - out = tvm.compute((wgt_shape[0],), |
32 | | - lambda i: tvm.sum(inp[k].astype(out_dtype) * |
33 | | - wgt[i, k].astype(out_dtype), |
34 | | - axis=[k]), |
35 | | - name="out") |
36 | | - wgt_layout = tvm.decl_buffer( |
37 | | - wgt.shape, wgt.dtype, env.wgt_scope, |
38 | | - scope=env.wgt_scope, offset_factor=wgt_lanes, data_alignment=wgt_lanes) |
39 | | - inp_layout = tvm.decl_buffer( |
40 | | - inp.shape, inp.dtype, env.inp_scope, |
41 | | - scope=env.inp_scope, offset_factor=inp_lanes, data_alignment=inp_lanes) |
42 | | - out_layout = tvm.decl_buffer( |
43 | | - out.shape, out.dtype, env.acc_scope, |
44 | | - scope=env.acc_scope, offset_factor=out_lanes, data_alignment=out_lanes) |
45 | | - |
46 | | - def intrin_func(ins, outs): |
47 | | - """Vector-matrix multiply intrinsic function""" |
48 | | - dinp, dwgt = ins |
49 | | - dout = outs[0] |
50 | | - def instr(index): |
51 | | - """Generate vector-matrix multiply VTA instruction""" |
52 | | - irb = tvm.ir_builder.create() |
53 | | - dev = env.dev |
54 | | - irb.scope_attr(dev.vta_axis, "coproc_scope", |
55 | | - dev.get_task_qid(dev.QID_COMPUTE)) |
56 | | - irb.scope_attr(dev.vta_axis, "coproc_uop_scope", |
57 | | - dev.vta_push_uop) |
58 | | - if index == 0 or index == 2: |
59 | | - irb.emit(tvm.call_extern( |
60 | | - "int32", "VTAUopPush", |
61 | | - 0, 0, |
62 | | - dout.access_ptr("rw", "int32"), |
63 | | - dinp.access_ptr("r", "int32"), |
64 | | - dwgt.access_ptr("r", "int32"), |
65 | | - 0, 0, 0)) |
66 | | - else: |
67 | | - irb.emit(tvm.call_extern( |
68 | | - "int32", "VTAUopPush", |
69 | | - 0, 1, |
70 | | - dout.access_ptr("rw", "int32"), |
71 | | - 0, |
72 | | - 0, |
73 | | - 0, 0, 0)) |
74 | | - return irb.get() |
75 | | - # return a triple of normal-set, reset, update |
76 | | - nop = tvm.make.Evaluate(0) |
77 | | - if mock: |
78 | | - return (nop, nop, nop) |
79 | | - return (instr(0), instr(1), instr(2)) |
80 | | - |
81 | | - return tvm.decl_tensor_intrin(out.op, intrin_func, |
82 | | - name="GEVM", |
83 | | - binds={inp: inp_layout, |
84 | | - wgt: wgt_layout, |
85 | | - out: out_layout}) |
86 | | - |
87 | | - |
88 | 6 | def gemm(env, mock=False): |
89 | 7 | """Matrix-matrix multiply intrinsic |
90 | 8 |
|
|
0 commit comments