From e75965c52999d471918d0b155e0971b9f48b444e Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Tue, 14 Jan 2020 11:13:14 +0800 Subject: [PATCH 1/6] change mergesort --- flate/huffman_code.go | 183 +++++++++++++++++++++++++++++++++++------- 1 file changed, 155 insertions(+), 28 deletions(-) diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 1810c6898d..f2850a05ce 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -7,7 +7,7 @@ package flate import ( "math" "math/bits" - "sort" + "sync" ) const ( @@ -25,8 +25,6 @@ type huffmanEncoder struct { codes []hcode freqcache []literalNode bitCount [17]int32 - lns byLiteral // stored to avoid repeated allocation in generate - lfs byFreq // stored to avoid repeated allocation in generate } type literalNode struct { @@ -270,7 +268,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // assigned in literal order (not frequency order). chunk := list[len(list)-int(bits):] - h.lns.sort(chunk) + sortbyLiteral(chunk) for _, node := range chunk { h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} code++ @@ -315,7 +313,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { } return } - h.lfs.sort(list) + sortbyFreq(list) // Get the number of literals for each bit count bitCount := h.bitCounts(list, maxBits) @@ -323,39 +321,168 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } -type byLiteral []literalNode +var sortpool = sync.Pool{New: func() interface{} { + return &[]literalNode{} +}} -func (s *byLiteral) sort(a []literalNode) { - *s = byLiteral(a) - sort.Sort(s) -} +func sortbyLiteral(list []literalNode) { + max_len := len(list) + t := sortpool.Get().(*[]literalNode) + defer sortpool.Put(t) + if len(*t) < max_len { + *t = make([]literalNode, max_len) + } + tmp := *t + for i := 0; i < max_len-max_len&1; i += 2 { + if list[i+1].literal < list[i].literal { + list[i], list[i+1] = list[i+1], list[i] + } -func (s byLiteral) Len() int { return len(s) } + } + for i := 0; i < max_len-max_len&3; i += 4 { + if list[i+2].literal < list[i].literal { + list[i], list[i+2] = list[i+2], list[i] + } + if list[i+3].literal < list[i+1].literal { + list[i+1], list[i+3] = list[i+3], list[i+1] + } + if list[i+2].literal < list[i+1].literal { + list[i+1], list[i+2] = list[i+2], list[i+1] + } -func (s byLiteral) Less(i, j int) bool { - return s[i].literal < s[j].literal + } + if max_len&3 == 3 { + i := max_len - 3 + if list[i+2].literal < list[i].literal { + list[i+1], list[i+2] = list[i+2], list[i+1] + list[i], list[i+1] = list[i+1], list[i] + } else if list[i+2].literal < list[i+1].literal { + list[i+1], list[i+2] = list[i+2], list[i+1] + } + } + var step, l, max, r, index, n int + step = 4 + for step < max_len { + n++ + step <<= 1 + if n&1 == 1 { + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index = i; index < max; index++ { + if l == step/2+i || (r < max && list[r].literal < list[l].literal) { + tmp[index] = list[r] + r++ + } else { + tmp[index] = list[l] + l++ + } + } + } + } else { + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index = i; index < max; index++ { + if l == step/2+i || (r < max && tmp[r].literal < tmp[l].literal) { + list[index] = tmp[r] + r++ + } else { + list[index] = tmp[l] + l++ + } + } + } + } + } + if n&1 == 1 { + copy(list, tmp) + } } -func (s byLiteral) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - -type byFreq []literalNode - -func (s *byFreq) sort(a []literalNode) { - *s = byFreq(a) - sort.Sort(s) -} +func sortbyFreq(list []literalNode) { + max_len := len(list) + t := sortpool.Get().(*[]literalNode) + defer sortpool.Put(t) + if len(*t) < max_len { + *t = make([]literalNode, max_len) + } + tmp := *t + for i := 0; i < max_len-max_len&1; i += 2 { + if list[i+1].freq == list[i].freq && list[i+1].literal < list[i].literal || list[i+1].freq < list[i].freq { + list[i], list[i+1] = list[i+1], list[i] + } -func (s byFreq) Len() int { return len(s) } + } + for i := 0; i < max_len-max_len&3; i += 4 { + if list[i+2].freq == list[i].freq && list[i+2].literal < list[i].literal || list[i+2].freq < list[i].freq { + list[i], list[i+2] = list[i+2], list[i] + } + if list[i+3].freq == list[i+1].freq && list[i+3].literal < list[i+1].literal || list[i+3].freq < list[i+1].freq { + list[i+1], list[i+3] = list[i+3], list[i+1] + } + if list[i+2].freq == list[i+1].freq && list[i+2].literal < list[i+1].literal || list[i+2].freq < list[i+1].freq { + list[i+1], list[i+2] = list[i+2], list[i+1] + } -func (s byFreq) Less(i, j int) bool { - if s[i].freq == s[j].freq { - return s[i].literal < s[j].literal } - return s[i].freq < s[j].freq + if max_len&3 == 3 { + i := max_len - 3 + if list[i+2].freq == list[i].freq && list[i+2].literal < list[i].literal || list[i+2].freq < list[i].freq { + list[i+1], list[i+2] = list[i+2], list[i+1] + list[i], list[i+1] = list[i+1], list[i] + } else if list[i+2].freq == list[i+1].freq && list[i+2].literal < list[i+1].literal || list[i+2].freq < list[i+1].freq { + list[i+1], list[i+2] = list[i+2], list[i+1] + } + } + var step, l, max, r, index, n int + step = 4 + for step < max_len { + n++ + step <<= 1 + if n&1 == 1 { + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index = i; index < max; index++ { + if l == step/2+i || (r < max && (list[r].freq == list[l].freq && list[r].literal < list[l].literal || list[r].freq < list[l].freq)) { + tmp[index] = list[r] + r++ + } else { + tmp[index] = list[l] + l++ + } + } + } + } else { + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index = i; index < max; index++ { + if l == step/2+i || (r < max && (tmp[r].freq == tmp[l].freq && tmp[r].literal < tmp[l].literal || tmp[r].freq < tmp[l].freq)) { + list[index] = tmp[r] + r++ + } else { + list[index] = tmp[l] + l++ + } + } + } + } + } + if n&1 == 1 { + copy(list, tmp) + } } -func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } - // histogramSize accumulates a histogram of b in h. // An estimated size in bits is returned. // Unassigned values are assigned '1' in the histogram. From 7f562738c1a1cecf209311af788bb43cf6f4fc8b Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Wed, 15 Jan 2020 11:07:24 +0800 Subject: [PATCH 2/6] optimize iteration --- flate/huffman_code.go | 80 ++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/flate/huffman_code.go b/flate/huffman_code.go index f2850a05ce..b44e2b21c2 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -360,34 +360,32 @@ func sortbyLiteral(list []literalNode) { list[i+1], list[i+2] = list[i+2], list[i+1] } } - var step, l, max, r, index, n int + var step, l, max, r int step = 4 for step < max_len { - n++ step <<= 1 - if n&1 == 1 { - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index = i; index < max; index++ { - if l == step/2+i || (r < max && list[r].literal < list[l].literal) { - tmp[index] = list[r] - r++ - } else { - tmp[index] = list[l] - l++ - } + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index := i; index < max; index++ { + if l == step/2+i || (r < max && list[r].literal < list[l].literal) { + tmp[index] = list[r] + r++ + } else { + tmp[index] = list[l] + l++ } } - } else { + } + if step < max_len { for i := 0; i < max_len; i += step { l, r, max = i, i+step/2, i+step if max > max_len { max = max_len } - for index = i; index < max; index++ { + for index := i; index < max; index++ { if l == step/2+i || (r < max && tmp[r].literal < tmp[l].literal) { list[index] = tmp[r] r++ @@ -397,11 +395,11 @@ func sortbyLiteral(list []literalNode) { } } } + } else { + copy(list, tmp) } } - if n&1 == 1 { - copy(list, tmp) - } + } func sortbyFreq(list []literalNode) { @@ -439,34 +437,32 @@ func sortbyFreq(list []literalNode) { list[i+1], list[i+2] = list[i+2], list[i+1] } } - var step, l, max, r, index, n int + var step, l, max, r int step = 4 for step < max_len { - n++ step <<= 1 - if n&1 == 1 { - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index = i; index < max; index++ { - if l == step/2+i || (r < max && (list[r].freq == list[l].freq && list[r].literal < list[l].literal || list[r].freq < list[l].freq)) { - tmp[index] = list[r] - r++ - } else { - tmp[index] = list[l] - l++ - } + for i := 0; i < max_len; i += step { + l, r, max = i, i+step/2, i+step + if max > max_len { + max = max_len + } + for index := i; index < max; index++ { + if l == step/2+i || (r < max && (list[r].freq == list[l].freq && list[r].literal < list[l].literal || list[r].freq < list[l].freq)) { + tmp[index] = list[r] + r++ + } else { + tmp[index] = list[l] + l++ } } - } else { + } + if step < max_len { for i := 0; i < max_len; i += step { l, r, max = i, i+step/2, i+step if max > max_len { max = max_len } - for index = i; index < max; index++ { + for index := i; index < max; index++ { if l == step/2+i || (r < max && (tmp[r].freq == tmp[l].freq && tmp[r].literal < tmp[l].literal || tmp[r].freq < tmp[l].freq)) { list[index] = tmp[r] r++ @@ -476,11 +472,11 @@ func sortbyFreq(list []literalNode) { } } } + } else { + copy(list, tmp) } } - if n&1 == 1 { - copy(list, tmp) - } + } // histogramSize accumulates a histogram of b in h. From 3669b5dde5b5317391ff11ef145c0fdcf7859c60 Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Wed, 15 Jan 2020 17:03:46 +0800 Subject: [PATCH 3/6] optimize quickSort --- flate/huffman_code.go | 165 +-------------------------- flate/huffman_sortByFreq.go | 174 +++++++++++++++++++++++++++++ flate/huffman_sortByLiteral.go | 197 +++++++++++++++++++++++++++++++++ 3 files changed, 375 insertions(+), 161 deletions(-) create mode 100644 flate/huffman_sortByFreq.go create mode 100644 flate/huffman_sortByLiteral.go diff --git a/flate/huffman_code.go b/flate/huffman_code.go index b44e2b21c2..afce90d7b3 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -7,7 +7,6 @@ package flate import ( "math" "math/bits" - "sync" ) const ( @@ -25,6 +24,8 @@ type huffmanEncoder struct { codes []hcode freqcache []literalNode bitCount [17]int32 + //lns byLiteral // stored to avoid repeated allocation in generate + //lfs byFreq // stored to avoid repeated allocation in generate } type literalNode struct { @@ -268,7 +269,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN // assigned in literal order (not frequency order). chunk := list[len(list)-int(bits):] - sortbyLiteral(chunk) + sortByLiteral(chunk) for _, node := range chunk { h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} code++ @@ -313,7 +314,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { } return } - sortbyFreq(list) + sortByFreq(list) // Get the number of literals for each bit count bitCount := h.bitCounts(list, maxBits) @@ -321,164 +322,6 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { h.assignEncodingAndSize(bitCount, list) } -var sortpool = sync.Pool{New: func() interface{} { - return &[]literalNode{} -}} - -func sortbyLiteral(list []literalNode) { - max_len := len(list) - t := sortpool.Get().(*[]literalNode) - defer sortpool.Put(t) - if len(*t) < max_len { - *t = make([]literalNode, max_len) - } - tmp := *t - for i := 0; i < max_len-max_len&1; i += 2 { - if list[i+1].literal < list[i].literal { - list[i], list[i+1] = list[i+1], list[i] - } - - } - for i := 0; i < max_len-max_len&3; i += 4 { - if list[i+2].literal < list[i].literal { - list[i], list[i+2] = list[i+2], list[i] - } - if list[i+3].literal < list[i+1].literal { - list[i+1], list[i+3] = list[i+3], list[i+1] - } - if list[i+2].literal < list[i+1].literal { - list[i+1], list[i+2] = list[i+2], list[i+1] - } - - } - if max_len&3 == 3 { - i := max_len - 3 - if list[i+2].literal < list[i].literal { - list[i+1], list[i+2] = list[i+2], list[i+1] - list[i], list[i+1] = list[i+1], list[i] - } else if list[i+2].literal < list[i+1].literal { - list[i+1], list[i+2] = list[i+2], list[i+1] - } - } - var step, l, max, r int - step = 4 - for step < max_len { - step <<= 1 - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index := i; index < max; index++ { - if l == step/2+i || (r < max && list[r].literal < list[l].literal) { - tmp[index] = list[r] - r++ - } else { - tmp[index] = list[l] - l++ - } - } - } - if step < max_len { - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index := i; index < max; index++ { - if l == step/2+i || (r < max && tmp[r].literal < tmp[l].literal) { - list[index] = tmp[r] - r++ - } else { - list[index] = tmp[l] - l++ - } - } - } - } else { - copy(list, tmp) - } - } - -} - -func sortbyFreq(list []literalNode) { - max_len := len(list) - t := sortpool.Get().(*[]literalNode) - defer sortpool.Put(t) - if len(*t) < max_len { - *t = make([]literalNode, max_len) - } - tmp := *t - for i := 0; i < max_len-max_len&1; i += 2 { - if list[i+1].freq == list[i].freq && list[i+1].literal < list[i].literal || list[i+1].freq < list[i].freq { - list[i], list[i+1] = list[i+1], list[i] - } - - } - for i := 0; i < max_len-max_len&3; i += 4 { - if list[i+2].freq == list[i].freq && list[i+2].literal < list[i].literal || list[i+2].freq < list[i].freq { - list[i], list[i+2] = list[i+2], list[i] - } - if list[i+3].freq == list[i+1].freq && list[i+3].literal < list[i+1].literal || list[i+3].freq < list[i+1].freq { - list[i+1], list[i+3] = list[i+3], list[i+1] - } - if list[i+2].freq == list[i+1].freq && list[i+2].literal < list[i+1].literal || list[i+2].freq < list[i+1].freq { - list[i+1], list[i+2] = list[i+2], list[i+1] - } - - } - if max_len&3 == 3 { - i := max_len - 3 - if list[i+2].freq == list[i].freq && list[i+2].literal < list[i].literal || list[i+2].freq < list[i].freq { - list[i+1], list[i+2] = list[i+2], list[i+1] - list[i], list[i+1] = list[i+1], list[i] - } else if list[i+2].freq == list[i+1].freq && list[i+2].literal < list[i+1].literal || list[i+2].freq < list[i+1].freq { - list[i+1], list[i+2] = list[i+2], list[i+1] - } - } - var step, l, max, r int - step = 4 - for step < max_len { - step <<= 1 - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index := i; index < max; index++ { - if l == step/2+i || (r < max && (list[r].freq == list[l].freq && list[r].literal < list[l].literal || list[r].freq < list[l].freq)) { - tmp[index] = list[r] - r++ - } else { - tmp[index] = list[l] - l++ - } - } - } - if step < max_len { - for i := 0; i < max_len; i += step { - l, r, max = i, i+step/2, i+step - if max > max_len { - max = max_len - } - for index := i; index < max; index++ { - if l == step/2+i || (r < max && (tmp[r].freq == tmp[l].freq && tmp[r].literal < tmp[l].literal || tmp[r].freq < tmp[l].freq)) { - list[index] = tmp[r] - r++ - } else { - list[index] = tmp[l] - l++ - } - } - } - } else { - copy(list, tmp) - } - } - -} - // histogramSize accumulates a histogram of b in h. // An estimated size in bits is returned. // Unassigned values are assigned '1' in the histogram. diff --git a/flate/huffman_sortByFreq.go b/flate/huffman_sortByFreq.go new file mode 100644 index 0000000000..17ae0d6eec --- /dev/null +++ b/flate/huffman_sortByFreq.go @@ -0,0 +1,174 @@ +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/flate/huffman_sortByLiteral.go b/flate/huffman_sortByLiteral.go new file mode 100644 index 0000000000..a5089b9bde --- /dev/null +++ b/flate/huffman_sortByLiteral.go @@ -0,0 +1,197 @@ +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} From 5464dc6c48d6422327a40e53b7e5abf32298ca97 Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Wed, 15 Jan 2020 21:08:42 +0800 Subject: [PATCH 4/6] optimize quickSort --- flate/huffman_sortByFreq.go | 4 ++++ flate/huffman_sortByLiteral.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/flate/huffman_sortByFreq.go b/flate/huffman_sortByFreq.go index 17ae0d6eec..9e6edba42c 100644 --- a/flate/huffman_sortByFreq.go +++ b/flate/huffman_sortByFreq.go @@ -1,3 +1,7 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package flate // Sort sorts data. diff --git a/flate/huffman_sortByLiteral.go b/flate/huffman_sortByLiteral.go index a5089b9bde..ffcc176441 100644 --- a/flate/huffman_sortByLiteral.go +++ b/flate/huffman_sortByLiteral.go @@ -1,3 +1,7 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package flate // Sort sorts data. From 4fb1985cfafd9b4cef8be824c2cf9878baed2164 Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Wed, 15 Jan 2020 21:31:30 +0800 Subject: [PATCH 5/6] go format --- flate/huffman_sortByFreq.go | 356 ++++++++++++++--------------- flate/huffman_sortByLiteral.go | 402 ++++++++++++++++----------------- 2 files changed, 379 insertions(+), 379 deletions(-) diff --git a/flate/huffman_sortByFreq.go b/flate/huffman_sortByFreq.go index 9e6edba42c..2077802990 100644 --- a/flate/huffman_sortByFreq.go +++ b/flate/huffman_sortByFreq.go @@ -1,178 +1,178 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByFreq(data []literalNode) { - n := len(data) - quickSortByFreq(data, 0, n, maxDepth(n)) -} - -func quickSortByFreq(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivotByFreq(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSortByFreq(data, a, mlo, maxDepth) - a = mhi // i.e., quickSortByFreq(data, mhi, b) - } else { - quickSortByFreq(data, mhi, b, maxDepth) - b = mlo // i.e., quickSortByFreq(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSortByFreq(data, a, b) - } -} - -// siftDownByFreq implements the heap property on data[lo, hi). -// first is an offset into the array where the root of the heap lies. -func siftDownByFreq(data []literalNode, lo, hi, first int) { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { - child++ - } - if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { - return - } - data[first+root], data[first+child] = data[first+child], data[first+root] - root = child - } -} -func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) - medianOfThreeSortByFreq(data, m, m-s, m+s) - medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThreeSortByFreq(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { - } - b := a - for { - for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot - } - for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot - } - for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSortByFreq(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// quickSortByFreq, loosely following Bentley and McIlroy, -// ``Engineering a Sort Function,'' SP&E November 1993. - -// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/flate/huffman_sortByLiteral.go b/flate/huffman_sortByLiteral.go index ffcc176441..93f1aea109 100644 --- a/flate/huffman_sortByLiteral.go +++ b/flate/huffman_sortByLiteral.go @@ -1,201 +1,201 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByLiteral(data []literalNode) { - n := len(data) - quickSort(data, 0, n, maxDepth(n)) -} - -func quickSort(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivot(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSort(data, a, mlo, maxDepth) - a = mhi // i.e., quickSort(data, mhi, b) - } else { - quickSort(data, mhi, b, maxDepth) - b = mlo // i.e., quickSort(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].literal < data[i-6].literal { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSort(data, a, b) - } -} -func heapSort(data []literalNode, a, b int) { - first := a - lo := 0 - hi := b - a - - // Build heap with greatest element at top. - for i := (hi - 1) / 2; i >= 0; i-- { - siftDown(data, i, hi, first) - } - - // Pop elements, largest first, into end of data. - for i := hi - 1; i >= 0; i-- { - data[first], data[first+i] = data[first+i], data[first] - siftDown(data, lo, i, first) - } -} - -// siftDown implements the heap property on data[lo, hi). -// first is an offset into the array where the root of the heap lies. -func siftDown(data []literalNode, lo, hi, first int) { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && data[first+child].literal < data[first+child+1].literal { - child++ - } - if data[first+root].literal > data[first+child].literal { - return - } - data[first+root], data[first+child] = data[first+child], data[first+root] - root = child - } -} -func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThree(data, lo, lo+s, lo+2*s) - medianOfThree(data, m, m-s, m+s) - medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThree(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && data[a].literal < data[pivot].literal; a++ { - } - b := a - for { - for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot - } - for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].literal > data[pivot].literal { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot - } - for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSort(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && data[j].literal < data[j-1].literal; j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// maxDepth returns a threshold at which quicksort should switch -// to heapsort. It returns 2*ceil(lg(n+1)). -func maxDepth(n int) int { - var depth int - for i := n; i > 0; i >>= 1 { - depth++ - } - return depth * 2 -} - -// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThree(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].literal < data[m1].literal { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} From edcf6f3f21127574d583b2298c82f71bcdd1c6b2 Mon Sep 17 00:00:00 2001 From: root <343636111@qq.com> Date: Thu, 16 Jan 2020 09:25:13 +0800 Subject: [PATCH 6/6] delete old fileds --- flate/huffman_code.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/flate/huffman_code.go b/flate/huffman_code.go index afce90d7b3..863ef8eedc 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -24,8 +24,6 @@ type huffmanEncoder struct { codes []hcode freqcache []literalNode bitCount [17]int32 - //lns byLiteral // stored to avoid repeated allocation in generate - //lfs byFreq // stored to avoid repeated allocation in generate } type literalNode struct {