Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VTA] Enable streamlined GEMM execution #4392

Merged
merged 5 commits into from
Nov 27, 2019
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions vta/hardware/chisel/src/main/scala/core/TensorGemm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,23 @@ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module {
io.y := add
}

/** Adder */
liangfu marked this conversation as resolved.
Show resolved Hide resolved
class Adder(aBits: Int = 8, bBits: Int = 8) extends Module {
val outBits = Math.max(aBits, bBits) + 1
val io = IO(new Bundle {
val a = Input(SInt(aBits.W))
val b = Input(SInt(bBits.W))
val y = Output(SInt(outBits.W))
})
val add = Wire(SInt(outBits.W))
val rA = Wire(SInt(aBits.W))
val rB = Wire(SInt(bBits.W))
rA := io.a
rB := io.b
add := rA +& rB
io.y := add
}

/** Pipelined DotProduct based on MAC and PipeAdder */
class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16)
extends Module {
Expand All @@ -80,9 +97,11 @@ class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16)
val m = Seq.fill(s(0))(Module(new MAC(aBits, bBits, cBits = 1))) // # of total vector pairs
val a = Seq.tabulate(p)(
i =>
Seq.fill(s(i + 1))(Module(new PipeAdder(
aBits = (b + i + 1),
bBits = (b + i + 1))))) // # adders within each layer
Seq.fill(s(i + 1))(
if (i == 0)
Module(new PipeAdder(aBits = (b + i + 1), bBits = (b + i + 1)))
else
Module(new Adder(aBits = (b + i + 1), bBits = (b + i + 1))))) // # adders within each layer

// Vector MACs
for (i <- 0 until s(0)) {
Expand Down Expand Up @@ -126,8 +145,7 @@ class MatrixVectorMultiplication(implicit p: Parameters) extends Module {
})
val dot = Seq.fill(size)(
Module(new DotProduct(aBits = inpBits, bBits = wgtBits, size)))
val acc = Seq.fill(size)(
Module(new Pipe(UInt(accBits.W), latency = log2Ceil(size) + 1)))
val acc = Seq.fill(size)(Module(new Pipe(UInt(accBits.W), latency = 2)))
liangfu marked this conversation as resolved.
Show resolved Hide resolved
val add = Seq.fill(size)(Wire(SInt(accBits.W)))
val vld = Wire(Vec(size, Bool()))

Expand Down Expand Up @@ -188,7 +206,7 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters)
val wgt_i = Reg(chiselTypeOf(dec.uop_end))
val pBits = log2Ceil(p(CoreKey).blockOut) + 1
val inflight = Reg(UInt(pBits.W))
val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = pBits))
val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = 2))
val done = inflight === 0.U &
((state === sExe &
cnt_o === dec.lp_0 - 1.U &
Expand Down Expand Up @@ -236,11 +254,14 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters)
when(state === sIdle) {
inflight := 0.U
}.elsewhen(!dec.reset) {
when(state === sReadTensor) { // issue a tensor
inflight := inflight + 1.U
}.elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor
inflight := inflight - 1.U
}
when((state === sReadTensor) && mvc.io.acc_o.data.valid) { // issue & commit
liangfu marked this conversation as resolved.
Show resolved Hide resolved
inflight := inflight
}.elsewhen(state === sReadTensor) { // issue a tensor
inflight := inflight + 1.U
}
.elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor
inflight := inflight - 1.U
}
}

when(
Expand Down Expand Up @@ -278,8 +299,7 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters)
inp_i := inp_o
wgt_i := wgt_o
}
.elsewhen(state === sExe &&
uop_idx === uop_end - 1.U) {
.elsewhen(state === sExe && uop_idx === uop_end - 1.U) {
cnt_i := cnt_i + 1.U
acc_i := acc_i + dec.acc_1
inp_i := inp_i + dec.inp_1
Expand Down