Skip to content

Commit f46cb55

Browse files
vegaluisjosetmoreau89
authored andcommitted
[VTA][Chisel] scale dram base address in hardware instead of runtime (apache#3772)
* [VTA][Chisel] scale dram base address in hardware instead of runtime * remove trailing spaces
1 parent d022b73 commit f46cb55

File tree

6 files changed

+23
-22
lines changed

6 files changed

+23
-22
lines changed

hardware/chisel/src/main/scala/core/LoadUop.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,12 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module {
126126
}
127127

128128
// read-from-dram
129+
val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt
129130
when (state === sIdle) {
130131
when (offsetIsEven) {
131-
raddr := io.baddr + dec.dram_offset
132+
raddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(uopBytes)))
132133
} .otherwise {
133-
raddr := io.baddr + dec.dram_offset - uopBytes.U
134+
raddr := (io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(uopBytes)))) - uopBytes.U
134135
}
135136
} .elsewhen (state === sReadData && xcnt === xlen && xrem =/= 0.U) {
136137
raddr := raddr + xmax_bytes

hardware/chisel/src/main/scala/core/TensorLoad.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)
4848
val strideFactor = tp.tensorLength * tp.tensorWidth
4949

5050
val dec = io.inst.asTypeOf(new MemDecode)
51-
val dataCtrl = Module(new TensorDataCtrl(sizeFactor, strideFactor))
51+
val dataCtrl = Module(new TensorDataCtrl(tensorType, sizeFactor, strideFactor))
5252
val dataCtrlDone = RegInit(false.B)
5353
val yPadCtrl0 = Module(new TensorPadCtrl(padType = "YPad0", sizeFactor))
5454
val yPadCtrl1 = Module(new TensorPadCtrl(padType = "YPad1", sizeFactor))

hardware/chisel/src/main/scala/core/TensorStore.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,11 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)
180180
val mdata = MuxLookup(set, 0.U.asTypeOf(chiselTypeOf(wdata_t)), tread)
181181

182182
// write-to-dram
183+
val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt
184+
val elemBytes = (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).outBits) / 8
183185
when (state === sIdle) {
184-
waddr_cur := io.baddr + dec.dram_offset
185-
waddr_nxt := io.baddr + dec.dram_offset
186+
waddr_cur := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes)))
187+
waddr_nxt := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes)))
186188
} .elsewhen (state === sWriteAck && io.vme_wr.ack && xrem =/= 0.U) {
187189
waddr_cur := waddr_cur + xmax_bytes
188190
} .elsewhen (stride) {

hardware/chisel/src/main/scala/core/TensorUtil.scala

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ class TensorPadCtrl(padType: String = "none", sizeFactor: Int = 1) extends Modul
214214
}
215215

216216
/** TensorDataCtrl. Data controller for TensorLoad. */
217-
class TensorDataCtrl(sizeFactor: Int = 1, strideFactor: Int = 1)(implicit p: Parameters) extends Module {
217+
class TensorDataCtrl(tensorType: String = "none", sizeFactor: Int = 1, strideFactor: Int = 1)(implicit p: Parameters) extends Module {
218218
val mp = p(ShellKey).memParams
219219
val io = IO(new Bundle {
220220
val start = Input(Bool())
@@ -281,9 +281,19 @@ class TensorDataCtrl(sizeFactor: Int = 1, strideFactor: Int = 1)(implicit p: Par
281281
ycnt := ycnt + 1.U
282282
}
283283

284+
val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt
285+
val elemBytes =
286+
if (tensorType == "inp") {
287+
(p(CoreKey).batch * p(CoreKey).blockIn * p(CoreKey).inpBits) / 8
288+
} else if (tensorType == "wgt") {
289+
(p(CoreKey).blockOut * p(CoreKey).blockIn * p(CoreKey).wgtBits) / 8
290+
} else {
291+
(p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).accBits) / 8
292+
}
293+
284294
when (io.start) {
285-
caddr := io.baddr + dec.dram_offset
286-
baddr := io.baddr + dec.dram_offset
295+
caddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes)))
296+
baddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes)))
287297
} .elsewhen (io.yupdate) {
288298
when (split) {
289299
caddr := caddr + xmax_bytes

src/device_api.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
* to you under the Apache License, Version 2.0 (the
77
* "License"); you may not use this file except in compliance
88
* with the License. You may obtain a copy of the License at
9-
*
9+
*
1010
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
11+
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

src/runtime.cc

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,7 @@ class UopQueue : public BaseQueue<VTAUop> {
431431
insn->memory_type = VTA_MEM_ID_UOP;
432432
insn->sram_base = sram_begin_;
433433
// Update cache idx to physical address map
434-
#ifdef USE_TSIM
435-
insn->dram_base = fpga_buff_phy_ + offset;
436-
#else
437434
insn->dram_base = (fpga_buff_phy_ + offset) / kElemBytes;
438-
#endif
439435
insn->y_size = 1;
440436
insn->x_size = (sram_end_ - sram_begin_);
441437
insn->x_stride = (sram_end_ - sram_begin_);
@@ -1011,11 +1007,7 @@ class CommandQueue {
10111007
insn->memory_type = dst_memory_type;
10121008
insn->sram_base = dst_sram_index;
10131009
DataBuffer* src = DataBuffer::FromHandle(src_dram_addr);
1014-
#ifdef USE_TSIM
1015-
insn->dram_base = (uint32_t) src->phy_addr() + src_elem_offset*GetElemBytes(dst_memory_type);
1016-
#else
10171010
insn->dram_base = src->phy_addr() / GetElemBytes(dst_memory_type) + src_elem_offset;
1018-
#endif
10191011
insn->y_size = y_size;
10201012
insn->x_size = x_size;
10211013
insn->x_stride = x_stride;
@@ -1038,11 +1030,7 @@ class CommandQueue {
10381030
insn->memory_type = src_memory_type;
10391031
insn->sram_base = src_sram_index;
10401032
DataBuffer* dst = DataBuffer::FromHandle(dst_dram_addr);
1041-
#ifdef USE_TSIM
1042-
insn->dram_base = (uint32_t) dst->phy_addr() + dst_elem_offset*GetElemBytes(src_memory_type);
1043-
#else
10441033
insn->dram_base = dst->phy_addr() / GetElemBytes(src_memory_type) + dst_elem_offset;
1045-
#endif
10461034
insn->y_size = y_size;
10471035
insn->x_size = x_size;
10481036
insn->x_stride = x_stride;

0 commit comments

Comments
 (0)