Skip to content

Commit

Permalink
flate: add TryWriteCopy to inline dictionary operations
Browse files Browse the repository at this point in the history
Most LZ77 dictionary copies involve short distances and short lengths.
It can be expensive making a WriteCopy function call for all of these cases.
Instead, we add a new method TryWriteCopy that only handles this case and is
simple enough such that it can be inlined into the caller.

benchmark                              old MB/s     new MB/s     speedup
BenchmarkDecodeDigitsSpeed1e4-4        74.49        80.41        1.08x
BenchmarkDecodeDigitsSpeed1e5-4        84.39        94.42        1.12x
BenchmarkDecodeDigitsSpeed1e6-4        87.19        98.66        1.13x
BenchmarkDecodeDigitsDefault1e4-4      75.46        80.85        1.07x
BenchmarkDecodeDigitsDefault1e5-4      90.90        95.78        1.05x
BenchmarkDecodeDigitsDefault1e6-4      95.28        98.92        1.04x
BenchmarkDecodeDigitsCompress1e4-4     75.36        81.33        1.08x
BenchmarkDecodeDigitsCompress1e5-4     91.63        95.74        1.04x
BenchmarkDecodeDigitsCompress1e6-4     95.05        98.90        1.04x
BenchmarkDecodeTwainSpeed1e4-4         73.05        79.83        1.09x
BenchmarkDecodeTwainSpeed1e5-4         91.18        97.11        1.07x
BenchmarkDecodeTwainSpeed1e6-4         96.61        102.12       1.06x
BenchmarkDecodeTwainDefault1e4-4       78.37        82.23        1.05x
BenchmarkDecodeTwainDefault1e5-4       108.44       113.87       1.05x
BenchmarkDecodeTwainDefault1e6-4       118.41       120.97       1.02x
BenchmarkDecodeTwainCompress1e4-4      78.25        81.77        1.04x
BenchmarkDecodeTwainCompress1e5-4      110.09       112.78       1.02x
BenchmarkDecodeTwainCompress1e6-4      119.64       122.13       1.02x
  • Loading branch information
dsnet committed Dec 18, 2015
1 parent 27c4ea9 commit 22c4336
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
39 changes: 33 additions & 6 deletions flate/dict_decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,31 +77,58 @@ func (dd *dictDecoder) WriteByte(c byte) {
dd.wrPos++
}

// TryWriteCopy tries to copy a string at a given (distance, length) to the
// output. This specialized version is optimized for short distances.
//
// This method is designed to be inlined for performance reasons.
//
// This invariant must be kept: 0 < dist <= HistSize()
func (dd *dictDecoder) TryWriteCopy(dist, length int) int {
wrPos := dd.wrPos
wrEnd := wrPos + length
if wrPos < dist || wrEnd > len(dd.hist) {
return 0
}

// Copy overlapping section before destination.
wrBase := wrPos
rdPos := wrPos - dist
loop:
wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
if wrPos < wrEnd {
goto loop // Avoid for-loop so that this function can be inlined

This comment has been minimized.

Copy link
@klauspost

klauspost Dec 17, 2017

It is probably worth checking if this is still a problem with the current compiler.

}
dd.wrPos = wrPos
return wrPos - wrBase
}

// WriteCopy copies a string at a given (distance, length) to the output.
// This returns the number of bytes copied and may be less than the requested
// length if the available space in the output buffer is too small.
//
// This invariant must be kept: 0 < dist <= HistSize()
func (dd *dictDecoder) WriteCopy(dist, length int) int {
wrBase := dd.wrPos
wrEnd := dd.wrPos + length
wrPos := wrBase
rdPos := wrPos - dist
wrEnd := wrPos + length
if wrEnd > len(dd.hist) {
wrEnd = len(dd.hist)
}

// Copy non-overlapping section after destination.
rdPos := dd.wrPos - dist
if rdPos < 0 {
rdPos += len(dd.hist)
dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:])
wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:])
rdPos = 0
}

// Copy overlapping section before destination.
for dd.wrPos < wrEnd {
dd.wrPos += copy(dd.hist[dd.wrPos:wrEnd], dd.hist[rdPos:dd.wrPos])
for wrPos < wrEnd {
wrPos += copy(dd.hist[wrPos:wrEnd], dd.hist[rdPos:wrPos])
}
return dd.wrPos - wrBase
dd.wrPos = wrPos
return wrPos - wrBase
}

// ReadFlush returns a slice of the historical buffer that is ready to be
Expand Down
5 changes: 4 additions & 1 deletion flate/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,10 @@ readLiteral:
copyDistance:
// Perform a backwards copy according to RFC section 3.2.3.
{
cnt := fr.dict.WriteCopy(fr.dist, fr.cpyLen)
cnt := fr.dict.TryWriteCopy(fr.dist, fr.cpyLen)
if cnt == 0 {
cnt = fr.dict.WriteCopy(fr.dist, fr.cpyLen)
}
fr.cpyLen -= cnt

if fr.cpyLen > 0 {
Expand Down

0 comments on commit 22c4336

Please sign in to comment.