Skip to content

Commit

Permalink
zstd: Improve match speed of fastest level. (#241)
Browse files Browse the repository at this point in the history
Inline matchlen code and slightly simplify it. This loses a small amount of compression but gives a big speedup.

Before/after, best of 3 runs each:

```
file	out	level	insize	outsize	millis	mb/s
enwik9	zskp	1	1000000000	343831004	4202	226.91
enwik9	zskp	1	1000000000	343848582	3682	258.97

github-june-2days-2019.json	zskp	1	6273951764	698824137	10787	554.67
github-june-2days-2019.json	zskp	1	6273951764	699045015	10474	571.23

github-ranks-backup.bin	zskp	1	1862623243	454018274	4833	367.54
github-ranks-backup.bin	zskp	1	1862623243	454072815	4568	388.82

rawstudio-mint14.tar	zskp	1	8558382592	3667295557	21060	387.55
rawstudio-mint14.tar	zskp	1	8558382592	3667489370	20207	403.90

nyc-taxi-data-10M.csv	zskp	1	3325605752	641244049	10954	289.53
nyc-taxi-data-10M.csv	zskp	1	3325605752	641339945	9668	328.01

gob-stream	zskp	1	1911399616	234947276	3514	518.62
gob-stream	zskp	1	1911399616	235022249	3354	543.36

```
  • Loading branch information
klauspost authored Mar 9, 2020
1 parent 56999ed commit 970d4dc
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 13 deletions.
105 changes: 92 additions & 13 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package zstd

import (
"fmt"
"math"
"math/bits"

"github.com/klauspost/compress/zstd/internal/xxhash"
Expand Down Expand Up @@ -173,9 +174,22 @@ encodeLoop:
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+6, repIndex+4, src)
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

seq.matchLen = uint32(lenght - zstdMinMatch)
seq.matchLen = uint32(length - zstdMinMatch)

// We might be able to match backwards.
// Extend as long as we can.
Expand All @@ -201,11 +215,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + 2
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
println("repeat ended", s, length)

}
break encodeLoop
Expand Down Expand Up @@ -261,7 +275,20 @@ encodeLoop:
}

// Extend the 4-byte match as long as possible.
l := e.matchlen(s+4, t+4, src) + 4
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -298,7 +325,20 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

// Store this, since we have it.
nextHash := hash6(cv, hashLog)
Expand Down Expand Up @@ -416,10 +456,23 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// lenght := 4 + e.matchlen(s+6, repIndex+4, src)
lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

seq.matchLen = uint32(lenght - zstdMinMatch)
seq.matchLen = uint32(length - zstdMinMatch)

// We might be able to match backwards.
// Extend as long as we can.
Expand All @@ -445,11 +498,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + 2
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
println("repeat ended", s, length)

}
break encodeLoop
Expand Down Expand Up @@ -502,7 +555,20 @@ encodeLoop:

// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
l := int32(matchLen(src[s+4:], src[t+4:])) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -540,7 +606,20 @@ encodeLoop:
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

// Store this, since we have it.
nextHash := hash6(cv, hashLog)
Expand Down
11 changes: 11 additions & 0 deletions zstd/zstd.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ func printf(format string, a ...interface{}) {
}
}

// matchLenFast does matching, but will not match the last up to 7 bytes.
func matchLenFast(a, b []byte) int {
endI := len(a) & (math.MaxInt32 - 7)
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + bits.TrailingZeros64(diff)>>3
}
}
return endI
}

// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.
Expand Down

0 comments on commit 970d4dc

Please sign in to comment.