|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +package vta.core |
| 21 | + |
| 22 | +import chisel3._ |
| 23 | +import chisel3.util._ |
| 24 | +import vta.util.config._ |
| 25 | +import vta.shell._ |
| 26 | + |
| 27 | +/** Compute. |
| 28 | + * |
| 29 | + * The compute unit is in charge of the following: |
| 30 | + * - Loading micro-ops from memory (loadUop module) |
| 31 | + * - Loading biases (acc) from memory (tensorAcc module) |
| 32 | + * - Compute ALU instructions (tensorAlu module) |
| 33 | + * - Compute GEMM instructions (tensorGemm module) |
| 34 | + */ |
| 35 | +class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { |
| 36 | + val mp = p(ShellKey).memParams |
| 37 | + val io = IO(new Bundle { |
| 38 | + val i_post = Vec(2, Input(Bool())) |
| 39 | + val o_post = Vec(2, Output(Bool())) |
| 40 | + val inst = Flipped(Decoupled(UInt(INST_BITS.W))) |
| 41 | + val uop_baddr = Input(UInt(mp.addrBits.W)) |
| 42 | + val acc_baddr = Input(UInt(mp.addrBits.W)) |
| 43 | + val vme_rd = Vec(2, new VMEReadMaster) |
| 44 | + val inp = new TensorMaster(tensorType = "inp") |
| 45 | + val wgt = new TensorMaster(tensorType = "wgt") |
| 46 | + val out = new TensorMaster(tensorType = "out") |
| 47 | + val finish = Output(Bool()) |
| 48 | + }) |
| 49 | + val sIdle :: sSync :: sExe :: Nil = Enum(3) |
| 50 | + val state = RegInit(sIdle) |
| 51 | + |
| 52 | + val s = Seq.tabulate(2)(_ => Module(new Semaphore(counterBits = 8, counterInitValue = 0))) |
| 53 | + |
| 54 | + val loadUop = Module(new LoadUop) |
| 55 | + val tensorAcc = Module(new TensorLoad(tensorType = "acc")) |
| 56 | + val tensorGemm = Module(new TensorGemm) |
| 57 | + val tensorAlu = Module(new TensorAlu) |
| 58 | + |
| 59 | + val inst_q = Module(new Queue(UInt(INST_BITS.W), p(CoreKey).instQueueEntries)) |
| 60 | + |
| 61 | + // decode |
| 62 | + val dec = Module(new ComputeDecode) |
| 63 | + dec.io.inst := inst_q.io.deq.bits |
| 64 | + |
| 65 | + val inst_type = Cat(dec.io.isFinish, |
| 66 | + dec.io.isAlu, |
| 67 | + dec.io.isGemm, |
| 68 | + dec.io.isLoadAcc, |
| 69 | + dec.io.isLoadUop).asUInt |
| 70 | + |
| 71 | + val sprev = inst_q.io.deq.valid & Mux(dec.io.pop_prev, s(0).io.sready, true.B) |
| 72 | + val snext = inst_q.io.deq.valid & Mux(dec.io.pop_next, s(1).io.sready, true.B) |
| 73 | + val start = snext & sprev |
| 74 | + val done = |
| 75 | + MuxLookup(inst_type, |
| 76 | + false.B, // default |
| 77 | + Array( |
| 78 | + "h_01".U -> loadUop.io.done, |
| 79 | + "h_02".U -> tensorAcc.io.done, |
| 80 | + "h_04".U -> tensorGemm.io.done, |
| 81 | + "h_08".U -> tensorAlu.io.done, |
| 82 | + "h_10".U -> true.B // Finish |
| 83 | + ) |
| 84 | + ) |
| 85 | + |
| 86 | + // control |
| 87 | + switch (state) { |
| 88 | + is (sIdle) { |
| 89 | + when (start) { |
| 90 | + when (dec.io.isSync) { |
| 91 | + state := sSync |
| 92 | + } .elsewhen (inst_type.orR) { |
| 93 | + state := sExe |
| 94 | + } |
| 95 | + } |
| 96 | + } |
| 97 | + is (sSync) { |
| 98 | + state := sIdle |
| 99 | + } |
| 100 | + is (sExe) { |
| 101 | + when (done) { |
| 102 | + state := sIdle |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | + // instructions |
| 108 | + inst_q.io.enq <> io.inst |
| 109 | + inst_q.io.deq.ready := (state === sExe & done) | (state === sSync) |
| 110 | + |
| 111 | + // uop |
| 112 | + loadUop.io.start := state === sIdle & start & dec.io.isLoadUop |
| 113 | + loadUop.io.inst := inst_q.io.deq.bits |
| 114 | + loadUop.io.baddr := io.uop_baddr |
| 115 | + io.vme_rd(0) <> loadUop.io.vme_rd |
| 116 | + loadUop.io.uop.idx <> Mux(dec.io.isGemm, tensorGemm.io.uop.idx, tensorAlu.io.uop.idx) |
| 117 | + |
| 118 | + // acc |
| 119 | + tensorAcc.io.start := state === sIdle & start & dec.io.isLoadAcc |
| 120 | + tensorAcc.io.inst := inst_q.io.deq.bits |
| 121 | + tensorAcc.io.baddr := io.acc_baddr |
| 122 | + tensorAcc.io.tensor.rd.idx <> Mux(dec.io.isGemm, tensorGemm.io.acc.rd.idx, tensorAlu.io.acc.rd.idx) |
| 123 | + tensorAcc.io.tensor.wr <> Mux(dec.io.isGemm, tensorGemm.io.acc.wr, tensorAlu.io.acc.wr) |
| 124 | + io.vme_rd(1) <> tensorAcc.io.vme_rd |
| 125 | + |
| 126 | + // gemm |
| 127 | + tensorGemm.io.start := state === sIdle & start & dec.io.isGemm |
| 128 | + tensorGemm.io.inst := inst_q.io.deq.bits |
| 129 | + tensorGemm.io.uop.data.valid := loadUop.io.uop.data.valid & dec.io.isGemm |
| 130 | + tensorGemm.io.uop.data.bits <> loadUop.io.uop.data.bits |
| 131 | + tensorGemm.io.inp <> io.inp |
| 132 | + tensorGemm.io.wgt <> io.wgt |
| 133 | + tensorGemm.io.acc.rd.data.valid := tensorAcc.io.tensor.rd.data.valid & dec.io.isGemm |
| 134 | + tensorGemm.io.acc.rd.data.bits <> tensorAcc.io.tensor.rd.data.bits |
| 135 | + tensorGemm.io.out.rd.data.valid := io.out.rd.data.valid & dec.io.isGemm |
| 136 | + tensorGemm.io.out.rd.data.bits <> io.out.rd.data.bits |
| 137 | + |
| 138 | + // alu |
| 139 | + tensorAlu.io.start := state === sIdle & start & dec.io.isAlu |
| 140 | + tensorAlu.io.inst := inst_q.io.deq.bits |
| 141 | + tensorAlu.io.uop.data.valid := loadUop.io.uop.data.valid & dec.io.isAlu |
| 142 | + tensorAlu.io.uop.data.bits <> loadUop.io.uop.data.bits |
| 143 | + tensorAlu.io.acc.rd.data.valid := tensorAcc.io.tensor.rd.data.valid & dec.io.isAlu |
| 144 | + tensorAlu.io.acc.rd.data.bits <> tensorAcc.io.tensor.rd.data.bits |
| 145 | + tensorAlu.io.out.rd.data.valid := io.out.rd.data.valid & dec.io.isAlu |
| 146 | + tensorAlu.io.out.rd.data.bits <> io.out.rd.data.bits |
| 147 | + |
| 148 | + // out |
| 149 | + io.out.rd.idx <> Mux(dec.io.isGemm, tensorGemm.io.out.rd.idx, tensorAlu.io.out.rd.idx) |
| 150 | + io.out.wr <> Mux(dec.io.isGemm, tensorGemm.io.out.wr, tensorAlu.io.out.wr) |
| 151 | + |
| 152 | + // semaphore |
| 153 | + s(0).io.spost := io.i_post(0) |
| 154 | + s(1).io.spost := io.i_post(1) |
| 155 | + s(0).io.swait := dec.io.pop_prev & (state === sIdle & start) |
| 156 | + s(1).io.swait := dec.io.pop_next & (state === sIdle & start) |
| 157 | + io.o_post(0) := dec.io.push_prev & ((state === sExe & done) | (state === sSync)) |
| 158 | + io.o_post(1) := dec.io.push_next & ((state === sExe & done) | (state === sSync)) |
| 159 | + |
| 160 | + // finish |
| 161 | + io.finish := state === sExe & done & dec.io.isFinish |
| 162 | + |
| 163 | + // debug |
| 164 | + if (debug) { |
| 165 | + // start |
| 166 | + when (state === sIdle && start) { |
| 167 | + when (dec.io.isSync) { |
| 168 | + printf("[Compute] start sync\n") |
| 169 | + } .elsewhen (dec.io.isLoadUop) { |
| 170 | + printf("[Compute] start load uop\n") |
| 171 | + } .elsewhen (dec.io.isLoadAcc) { |
| 172 | + printf("[Compute] start load acc\n") |
| 173 | + } .elsewhen (dec.io.isGemm) { |
| 174 | + printf("[Compute] start gemm\n") |
| 175 | + } .elsewhen (dec.io.isAlu) { |
| 176 | + printf("[Compute] start alu\n") |
| 177 | + } .elsewhen (dec.io.isFinish) { |
| 178 | + printf("[Compute] start finish\n") |
| 179 | + } |
| 180 | + } |
| 181 | + // done |
| 182 | + when (state === sSync) { |
| 183 | + printf("[Compute] done sync\n") |
| 184 | + } |
| 185 | + when (state === sExe) { |
| 186 | + when (done) { |
| 187 | + when (dec.io.isLoadUop) { |
| 188 | + printf("[Compute] done load uop\n") |
| 189 | + } .elsewhen (dec.io.isLoadAcc) { |
| 190 | + printf("[Compute] done load acc\n") |
| 191 | + } .elsewhen (dec.io.isGemm) { |
| 192 | + printf("[Compute] done gemm\n") |
| 193 | + } .elsewhen (dec.io.isAlu) { |
| 194 | + printf("[Compute] done alu\n") |
| 195 | + } .elsewhen (dec.io.isFinish) { |
| 196 | + printf("[Compute] done finish\n") |
| 197 | + } |
| 198 | + } |
| 199 | + } |
| 200 | + } |
| 201 | +} |
0 commit comments