Skip to content

Commit

Permalink
zstd enc: Remove unneeded masks (#145)
Browse files Browse the repository at this point in the history
The compiler can figure out that they are not needed.

5-10% speed increase.
  • Loading branch information
klauspost authored Aug 3, 2019
1 parent 0e54620 commit 763de0b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 35 deletions.
36 changes: 12 additions & 24 deletions zstd/enc_dfast.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,11 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
stepSize++
}

// TEMPLATE

const kSearchStrength = 8

// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)

// Relative offsets
offset1 := int32(blk.recentOffsets[0])
Expand Down Expand Up @@ -119,8 +114,8 @@ encodeLoop:
panic("offset0 was 0")
}

nextHashS = nextHashS & dFastShortTableMask
nextHashL = nextHashL & dFastLongTableMask
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]

Expand Down Expand Up @@ -172,8 +167,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
continue
}
const repOff2 = 1
Expand Down Expand Up @@ -221,8 +214,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
Expand Down Expand Up @@ -296,8 +287,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}

// A 4-byte match has been found. Update recent offsets.
Expand Down Expand Up @@ -354,20 +343,18 @@ encodeLoop:
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = te0
e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = te1
e.longTable[hash8(cv0, dFastLongTableBits)] = te0
e.longTable[hash8(cv1, dFastLongTableBits)] = te1
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = te0
e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = te1
e.table[hash5(cv0, dFastShortTableBits)] = te0
e.table[hash5(cv1, dFastShortTableBits)] = te1

cv = load6432(src, s)
nextHashS = hash5(cv1>>8, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)

if !canRepeat {
continue
Expand All @@ -381,14 +368,17 @@ encodeLoop:
break
}

// Store this, since we have it.
nextHashS := hash5(cv1>>8, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)

// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL&dFastLongTableMask] = entry
e.table[nextHashS&dFastShortTableMask] = entry
e.longTable[nextHashL] = entry
e.table[nextHashS] = entry
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0

Expand All @@ -408,8 +398,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHashS = hash5(cv, dFastShortTableBits)
nextHashL = hash8(cv, dFastLongTableBits)
}
}

Expand Down
17 changes: 6 additions & 11 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,6 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// nextHash is the hash at s
nextHash := hash6(cv, hashLog)

// Relative offsets
offset1 := int32(blk.recentOffsets[0])
Expand Down Expand Up @@ -157,8 +155,8 @@ encodeLoop:
panic("offset0 was 0")
}

nextHash2 := hash6(cv>>8, hashLog) & tableMask
nextHash = nextHash & tableMask
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
Expand Down Expand Up @@ -207,8 +205,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
//nextHash = hashLen(cv, hashLog, mls)
nextHash = hash6(cv, hashLog)
continue
}
coffset0 := s - (candidate.offset - e.cur)
Expand Down Expand Up @@ -245,7 +241,6 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes.
offset2 = offset1
Expand Down Expand Up @@ -292,15 +287,16 @@ encodeLoop:
break encodeLoop
}
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)

// Check offset 2
if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) {
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: uint32(cv)}
nextHash := hash6(cv, hashLog)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
Expand All @@ -319,7 +315,6 @@ encodeLoop:
}
// Prepare next loop.
cv = load6432(src, s)
nextHash = hash6(cv, hashLog)
}
}

Expand Down

0 comments on commit 763de0b

Please sign in to comment.