From 8c1c18b2f2188eed3f72d402b01fd7509691d02b Mon Sep 17 00:00:00 2001 From: Juliusz Chroboczek Date: Sun, 26 May 2024 22:16:12 +0200 Subject: [PATCH] Use crypto/subtle.XORBytes The function XORBytes has been included in the Go standard library since Go version 1.20. This allows us to remove a bunch of assembler code, and instead rely on the stdlib. There are only three versions remaining: - xor_generic, which simply calls the stdlib function; - xor_old, which uses unsafe Go and is used with Go 1.19; - xor_arm, which is written in assembly and is used on 32-bit ARM, since the stdlib doesn't implement a fast version of XORBytes on that architecture. --- utils/xor/xor_amd64.go | 29 -------------- utils/xor/xor_amd64.s | 56 -------------------------- utils/xor/xor_arm64.go | 31 -------------- utils/xor/xor_arm64.s | 69 ------------------------------- utils/xor/xor_generic.go | 70 +++----------------------------- utils/xor/xor_old.go | 77 +++++++++++++++++++++++++++++++++++ utils/xor/xor_ppc64x.go | 29 -------------- utils/xor/xor_ppc64x.s | 87 ---------------------------------------- 8 files changed, 83 insertions(+), 365 deletions(-) delete mode 100644 utils/xor/xor_amd64.go delete mode 100644 utils/xor/xor_amd64.s delete mode 100644 utils/xor/xor_arm64.go delete mode 100644 utils/xor/xor_arm64.s create mode 100644 utils/xor/xor_old.go delete mode 100644 utils/xor/xor_ppc64x.go delete mode 100644 utils/xor/xor_ppc64x.s diff --git a/utils/xor/xor_amd64.go b/utils/xor/xor_amd64.go deleted file mode 100644 index ded8e0d..0000000 --- a/utils/xor/xor_amd64.go +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-FileCopyrightText: 2018 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -//go:build !gccgo -// +build !gccgo - -// Package xor provides utility functions used by other Pion -// packages. AMD64 arch. -package xor - -// XorBytes xors the bytes in a and b. The destination should have enough -// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. -// -//revive:disable-next-line -func XorBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - _ = dst[n-1] - xorBytesSSE2(&dst[0], &a[0], &b[0], n) // amd64 must have SSE2 - return n -} - -//go:noescape -func xorBytesSSE2(dst, a, b *byte, n int) diff --git a/utils/xor/xor_amd64.s b/utils/xor/xor_amd64.s deleted file mode 100644 index f66ac95..0000000 --- a/utils/xor/xor_amd64.s +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-FileCopyrightText: 2018 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -// go:build !gccgo -// +build !gccgo - -#include "textflag.h" - -// func xorBytesSSE2(dst, a, b *byte, n int) -TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 - MOVQ dst+0(FP), BX - MOVQ a+8(FP), SI - MOVQ b+16(FP), CX - MOVQ n+24(FP), DX - TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. - JNZ not_aligned - -aligned: - MOVQ $0, AX // position in slices - -loop16b: - MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. - MOVOU (CX)(AX*1), X1 - PXOR X1, X0 - MOVOU X0, (BX)(AX*1) - ADDQ $16, AX - CMPQ DX, AX - JNE loop16b - RET - -loop_1b: - SUBQ $1, DX // XOR 1byte backwards. - MOVB (SI)(DX*1), DI - MOVB (CX)(DX*1), AX - XORB AX, DI - MOVB DI, (BX)(DX*1) - TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. - JNZ loop_1b - CMPQ DX, $0 // if len is 0, ret. - JE ret - TESTQ $15, DX // AND 15 & len, if zero jump to aligned. - JZ aligned - -not_aligned: - TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. - JNE loop_1b - SUBQ $8, DX // XOR 8bytes backwards. - MOVQ (SI)(DX*1), DI - MOVQ (CX)(DX*1), AX - XORQ AX, DI - MOVQ DI, (BX)(DX*1) - CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. - JGE aligned - -ret: - RET diff --git a/utils/xor/xor_arm64.go b/utils/xor/xor_arm64.go deleted file mode 100644 index 7002ab7..0000000 --- a/utils/xor/xor_arm64.go +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-FileCopyrightText: 2020 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -//go:build !gccgo -// +build !gccgo - -// Package xor provides utility functions used by other Pion -// packages. ARM64 arch. -package xor - -// XorBytes xors the bytes in a and b. The destination should have enough -// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. -// -//revive:disable-next-line -func XorBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - // make sure dst has enough space - _ = dst[n-1] - - xorBytesARM64(&dst[0], &a[0], &b[0], n) - return n -} - -//go:noescape -func xorBytesARM64(dst, a, b *byte, n int) diff --git a/utils/xor/xor_arm64.s b/utils/xor/xor_arm64.s deleted file mode 100644 index 0b82d09..0000000 --- a/utils/xor/xor_arm64.s +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-FileCopyrightText: 2020 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -//go:build !gccgo -// +build !gccgo - -#include "textflag.h" - -// func xorBytesARM64(dst, a, b *byte, n int) -TEXT ·xorBytesARM64(SB), NOSPLIT|NOFRAME, $0 - MOVD dst+0(FP), R0 - MOVD a+8(FP), R1 - MOVD b+16(FP), R2 - MOVD n+24(FP), R3 - CMP $64, R3 - BLT tail -loop_64: - VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] - VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16] - VEOR V0.B16, V4.B16, V4.B16 - VEOR V1.B16, V5.B16, V5.B16 - VEOR V2.B16, V6.B16, V6.B16 - VEOR V3.B16, V7.B16, V7.B16 - VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0) - SUBS $64, R3 - CMP $64, R3 - BGE loop_64 -tail: - // quick end - CBZ R3, end - TBZ $5, R3, less_than32 - VLD1.P 32(R1), [V0.B16, V1.B16] - VLD1.P 32(R2), [V2.B16, V3.B16] - VEOR V0.B16, V2.B16, V2.B16 - VEOR V1.B16, V3.B16, V3.B16 - VST1.P [V2.B16, V3.B16], 32(R0) -less_than32: - TBZ $4, R3, less_than16 - LDP.P 16(R1), (R11, R12) - LDP.P 16(R2), (R13, R14) - EOR R11, R13, R13 - EOR R12, R14, R14 - STP.P (R13, R14), 16(R0) -less_than16: - TBZ $3, R3, less_than8 - MOVD.P 8(R1), R11 - MOVD.P 8(R2), R12 - EOR R11, R12, R12 - MOVD.P R12, 8(R0) -less_than8: - TBZ $2, R3, less_than4 - MOVWU.P 4(R1), R13 - MOVWU.P 4(R2), R14 - EORW R13, R14, R14 - MOVWU.P R14, 4(R0) -less_than4: - TBZ $1, R3, less_than2 - MOVHU.P 2(R1), R15 - MOVHU.P 2(R2), R16 - EORW R15, R16, R16 - MOVHU.P R16, 2(R0) -less_than2: - TBZ $0, R3, end - MOVBU (R1), R17 - MOVBU (R2), R19 - EORW R17, R19, R19 - MOVBU R19, (R0) -end: - RET diff --git a/utils/xor/xor_generic.go b/utils/xor/xor_generic.go index 967fed3..690549a 100644 --- a/utils/xor/xor_generic.go +++ b/utils/xor/xor_generic.go @@ -1,78 +1,20 @@ // SPDX-FileCopyrightText: 2013 The Go Authors. All rights reserved. // SPDX-License-Identifier: BSD-3-Clause -// SPDX-FileCopyrightText: 2022 The Pion community +// SPDX-FileCopyrightText: 2024 The Pion community // SPDX-License-Identifier: MIT -//go:build (!amd64 && !ppc64 && !ppc64le && !arm64 && !arm) || gccgo -// +build !amd64,!ppc64,!ppc64le,!arm64,!arm gccgo +//go:build go1.20 && !arm && !gccgo -// Package xor provides utility functions used by other Pion -// packages. Generic arch. +// Package xor provides the XorBytes function. package xor import ( - "runtime" - "unsafe" + "crypto/subtle" ) -const ( - wordSize = int(unsafe.Sizeof(uintptr(0))) // nolint:gosec - supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" // nolint:gochecknoglobals -) - -func isAligned(a *byte) bool { - return uintptr(unsafe.Pointer(a))%uintptr(wordSize) == 0 -} - -// XorBytes xors the bytes in a and b. The destination should have enough -// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. +// XorBytes calls [crypto/suble.XORBytes]. // //revive:disable-next-line func XorBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - - switch { - case supportsUnaligned: - fastXORBytes(dst, a, b, n) - case isAligned(&dst[0]) && isAligned(&a[0]) && isAligned(&b[0]): - fastXORBytes(dst, a, b, n) - default: - safeXORBytes(dst, a, b, n) - } - return n -} - -// fastXORBytes xors in bulk. It only works on architectures that -// support unaligned read/writes. -// n needs to be smaller or equal than the length of a and b. -func fastXORBytes(dst, a, b []byte, n int) { - // Assert dst has enough space - _ = dst[n-1] - - w := n / wordSize - if w > 0 { - dw := *(*[]uintptr)(unsafe.Pointer(&dst)) // nolint:gosec - aw := *(*[]uintptr)(unsafe.Pointer(&a)) // nolint:gosec - bw := *(*[]uintptr)(unsafe.Pointer(&b)) // nolint:gosec - for i := 0; i < w; i++ { - dw[i] = aw[i] ^ bw[i] - } - } - - for i := (n - n%wordSize); i < n; i++ { - dst[i] = a[i] ^ b[i] - } -} - -// n needs to be smaller or equal than the length of a and b. -func safeXORBytes(dst, a, b []byte, n int) { - for i := 0; i < n; i++ { - dst[i] = a[i] ^ b[i] - } + return subtle.XORBytes(dst, a, b) } diff --git a/utils/xor/xor_old.go b/utils/xor/xor_old.go new file mode 100644 index 0000000..bd14d7c --- /dev/null +++ b/utils/xor/xor_old.go @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: 2013 The Go Authors. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// SPDX-FileCopyrightText: 2022 The Pion community +// SPDX-License-Identifier: MIT + +//go:build (!go1.20 && !arm) || gccgo + +// Package xor provides the XorBytes function. +// This version is only used on Go up to version 1.19. +package xor + +import ( + "runtime" + "unsafe" +) + +const ( + wordSize = int(unsafe.Sizeof(uintptr(0))) // nolint:gosec + supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" // nolint:gochecknoglobals +) + +func isAligned(a *byte) bool { + return uintptr(unsafe.Pointer(a))%uintptr(wordSize) == 0 +} + +// XorBytes xors the bytes in a and b. The destination should have enough +// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. +// +//revive:disable-next-line +func XorBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + if n == 0 { + return 0 + } + + switch { + case supportsUnaligned: + fastXORBytes(dst, a, b, n) + case isAligned(&dst[0]) && isAligned(&a[0]) && isAligned(&b[0]): + fastXORBytes(dst, a, b, n) + default: + safeXORBytes(dst, a, b, n) + } + return n +} + +// fastXORBytes xors in bulk. It only works on architectures that +// support unaligned read/writes. +// n needs to be smaller or equal than the length of a and b. +func fastXORBytes(dst, a, b []byte, n int) { + // Assert dst has enough space + _ = dst[n-1] + + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) // nolint:gosec + aw := *(*[]uintptr)(unsafe.Pointer(&a)) // nolint:gosec + bw := *(*[]uintptr)(unsafe.Pointer(&b)) // nolint:gosec + for i := 0; i < w; i++ { + dw[i] = aw[i] ^ bw[i] + } + } + + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] ^ b[i] + } +} + +// n needs to be smaller or equal than the length of a and b. +func safeXORBytes(dst, a, b []byte, n int) { + for i := 0; i < n; i++ { + dst[i] = a[i] ^ b[i] + } +} diff --git a/utils/xor/xor_ppc64x.go b/utils/xor/xor_ppc64x.go deleted file mode 100644 index bcc5926..0000000 --- a/utils/xor/xor_ppc64x.go +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-FileCopyrightText: 2018 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -//go:build (ppc64 && !gccgo) || (ppc64le && !gccgo) -// +build ppc64,!gccgo ppc64le,!gccgo - -// Package xor provides utility functions used by other Pion -// packages. PPC64 arch. -package xor - -// XorBytes xors the bytes in a and b. The destination should have enough -// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. -// -//revive:disable-next-line -func XorBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - _ = dst[n-1] - xorBytesVSX(&dst[0], &a[0], &b[0], n) - return n -} - -//go:noescape -func xorBytesVSX(dst, a, b *byte, n int) diff --git a/utils/xor/xor_ppc64x.s b/utils/xor/xor_ppc64x.s deleted file mode 100644 index 2276353..0000000 --- a/utils/xor/xor_ppc64x.s +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-FileCopyrightText: 2018 The Go Authors. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause - -//go:build (ppc64 && !gccgo) || (ppc64le && !gccgo) -//+build ppc64,!gccgo ppc64le,!gccgo - -#include "textflag.h" - -// func xorBytesVSX(dst, a, b *byte, n int) -TEXT ·xorBytesVSX(SB), NOSPLIT, $0 - MOVD dst+0(FP), R3 // R3 = dst - MOVD a+8(FP), R4 // R4 = a - MOVD b+16(FP), R5 // R5 = b - MOVD n+24(FP), R6 // R6 = n - - CMPU R6, $32, CR7 // Check if n ≥ 32 bytes - MOVD R0, R8 // R8 = index - CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes - BLT CR6, small // Smaller than 8 - BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes - - // Case for n ≥ 32 bytes -preloop32: - SRD $5, R6, R7 // Setup loop counter - MOVD R7, CTR - MOVD $16, R10 - ANDCC $31, R6, R9 // Check for tailing bytes for later -loop32: - LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15] - LXVD2X (R4)(R10), VS34 - LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15] - LXVD2X (R5)(R10), VS35 - XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[] - XXLXOR VS34, VS35, VS34 - STXVD2X VS32, (R3)(R8) // Store to dst - STXVD2X VS34, (R3)(R10) - ADD $32, R8 // Update index - ADD $32, R10 - BC 16, 0, loop32 // bdnz loop16 - - BEQ CR0, done - - MOVD R9, R6 - CMP R6, $8 - BLT small -xor16: - CMP R6, $16 - BLT xor8 - LXVD2X (R4)(R8), VS32 - LXVD2X (R5)(R8), VS33 - XXLXOR VS32, VS33, VS32 - STXVD2X VS32, (R3)(R8) - ADD $16, R8 - ADD $-16, R6 - CMP R6, $8 - BLT small -xor8: - // Case for 8 ≤ n < 16 bytes - MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] - MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] - XOR R14, R15, R16 // R16 = a[] ^ b[] - SUB $8, R6 // n = n - 8 - MOVD R16, (R3)(R8) // Store to dst - ADD $8, R8 - - // Check if we're finished - CMP R6, R0 - BGT small - RET - - // Case for n < 8 bytes and tailing bytes from the - // previous cases. -small: - CMP R6, R0 - BEQ done - MOVD R6, CTR // Setup loop counter - -loop: - MOVBZ (R4)(R8), R14 // R14 = a[i] - MOVBZ (R5)(R8), R15 // R15 = b[i] - XOR R14, R15, R16 // R16 = a[i] ^ b[i] - MOVB R16, (R3)(R8) // Store to dst - ADD $1, R8 - BC 16, 0, loop // bdnz loop - -done: - RET