Skip to content

Commit

Permalink
crypto/aes: speed up AES by reducing allocations
Browse files Browse the repository at this point in the history
By embedding the arrays into aesCipher directly, we can save a few allocations
when creating a new AES cipher. This also avoids a lot of pointer chasing when
encrypting and decrypting, leading to 3-4% faster performance.

Fixes #65507.

name                     old time/op    new time/op    delta
Encrypt/AES-128-16         4.70ns ± 3%    4.46ns ± 2%   -5.08%  (p=0.000 n=10+10)
Encrypt/AES-192-16         4.90ns ± 3%    4.71ns ± 2%   -3.98%  (p=0.000 n=10+10)
Encrypt/AES-256-16         5.18ns ± 3%    4.91ns ± 1%   -5.15%  (p=0.000 n=10+10)
Decrypt/AES-128-16         4.51ns ± 2%    4.37ns ± 1%   -3.01%  (p=0.000 n=10+10)
Decrypt/AES-192-16         4.77ns ± 2%    4.63ns ± 2%   -3.05%  (p=0.000 n=10+10)
Decrypt/AES-256-16         5.10ns ± 2%    4.93ns ± 3%   -3.27%  (p=0.000 n=10+10)
Expand/AES-128-16          52.5ns ± 4%    55.9ns ± 3%   +6.58%  (p=0.000 n=10+10)
Expand/AES-192-16          45.6ns ± 5%    48.1ns ± 2%   +5.56%  (p=0.000 n=10+10)
Expand/AES-256-16          69.2ns ± 5%    71.7ns ± 3%   +3.61%  (p=0.006 n=9+10)
CreateCipher/AES-128-16     130ns ± 3%      84ns ± 3%  -35.13%  (p=0.000 n=10+10)
CreateCipher/AES-192-16     128ns ± 2%      78ns ± 3%  -38.82%  (p=0.000 n=10+10)
CreateCipher/AES-256-16     156ns ± 3%     105ns ± 4%  -32.33%  (p=0.000 n=10+10)

name                     old speed      new speed      delta
Encrypt/AES-128-16       3.40GB/s ± 2%  3.59GB/s ± 2%   +5.35%  (p=0.000 n=10+10)
Encrypt/AES-192-16       3.27GB/s ± 3%  3.40GB/s ± 2%   +4.13%  (p=0.000 n=10+10)
Encrypt/AES-256-16       3.09GB/s ± 2%  3.26GB/s ± 1%   +5.42%  (p=0.000 n=10+10)
Decrypt/AES-128-16       3.55GB/s ± 2%  3.66GB/s ± 1%   +3.09%  (p=0.000 n=10+10)
Decrypt/AES-192-16       3.35GB/s ± 2%  3.46GB/s ± 2%   +3.14%  (p=0.000 n=10+10)
Decrypt/AES-256-16       3.14GB/s ± 2%  3.24GB/s ± 3%   +3.39%  (p=0.000 n=10+10)

name                     old alloc/op   new alloc/op   delta
Encrypt/AES-128-16          0.00B          0.00B          ~     (all equal)
Encrypt/AES-192-16          0.00B          0.00B          ~     (all equal)
Encrypt/AES-256-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-128-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-192-16          0.00B          0.00B          ~     (all equal)
Decrypt/AES-256-16          0.00B          0.00B          ~     (all equal)
Expand/AES-128-16           0.00B          0.00B          ~     (all equal)
Expand/AES-192-16           0.00B          0.00B          ~     (all equal)
Expand/AES-256-16           0.00B          0.00B          ~     (all equal)
CreateCipher/AES-128-16      448B ± 0%      512B ± 0%  +14.29%  (p=0.000 n=10+10)
CreateCipher/AES-192-16      512B ± 0%      512B ± 0%     ~     (all equal)
CreateCipher/AES-256-16      576B ± 0%      512B ± 0%  -11.11%  (p=0.000 n=10+10)

name                     old allocs/op  new allocs/op  delta
Encrypt/AES-128-16           0.00           0.00          ~     (all equal)
Encrypt/AES-192-16           0.00           0.00          ~     (all equal)
Encrypt/AES-256-16           0.00           0.00          ~     (all equal)
Decrypt/AES-128-16           0.00           0.00          ~     (all equal)
Decrypt/AES-192-16           0.00           0.00          ~     (all equal)
Decrypt/AES-256-16           0.00           0.00          ~     (all equal)
Expand/AES-128-16            0.00           0.00          ~     (all equal)
Expand/AES-192-16            0.00           0.00          ~     (all equal)
Expand/AES-256-16            0.00           0.00          ~     (all equal)
CreateCipher/AES-128-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)
CreateCipher/AES-192-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)
CreateCipher/AES-256-16      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.000 n=10+10)

Change-Id: I0ea0b21cf84b11b6a5fc7c6ace144390eb55438b
Reviewed-on: https://go-review.googlesource.com/c/go/+/561080
Reviewed-by: Damien Neil <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Jorropo <[email protected]>
Reviewed-by: Roland Shoemaker <[email protected]>
  • Loading branch information
marten-seemann authored and neild committed Mar 25, 2024
1 parent c4792e6 commit 2c150f4
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 21 deletions.
2 changes: 1 addition & 1 deletion src/crypto/aes/aes_gcm.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var _ gcmAble = (*aesCipherGCM)(nil)
// NewGCM returns the AES cipher wrapped in Galois Counter Mode. This is only
// called by [crypto/cipher.NewGCM] via the gcmAble interface.
func (c *aesCipherGCM) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
g := &gcmAsm{ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
g := &gcmAsm{ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}
gcmAesInit(&g.productTable, g.ks)
return g, nil
}
Expand Down
41 changes: 35 additions & 6 deletions src/crypto/aes/aes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,12 @@ func mustPanic(t *testing.T, msg string, f func()) {
}

func BenchmarkEncrypt(b *testing.B) {
tt := encryptTests[0]
b.Run("AES-128", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[1]) })
b.Run("AES-192", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[2]) })
b.Run("AES-256", func(b *testing.B) { benchmarkEncrypt(b, encryptTests[3]) })
}

func benchmarkEncrypt(b *testing.B, tt CryptTest) {
c, err := NewCipher(tt.key)
if err != nil {
b.Fatal("NewCipher:", err)
Expand All @@ -359,7 +364,12 @@ func BenchmarkEncrypt(b *testing.B) {
}

func BenchmarkDecrypt(b *testing.B) {
tt := encryptTests[0]
b.Run("AES-128", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[1]) })
b.Run("AES-192", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[2]) })
b.Run("AES-256", func(b *testing.B) { benchmarkDecrypt(b, encryptTests[3]) })
}

func benchmarkDecrypt(b *testing.B, tt CryptTest) {
c, err := NewCipher(tt.key)
if err != nil {
b.Fatal("NewCipher:", err)
Expand All @@ -373,11 +383,30 @@ func BenchmarkDecrypt(b *testing.B) {
}

func BenchmarkExpand(b *testing.B) {
tt := encryptTests[0]
n := len(tt.key) + 28
c := &aesCipher{make([]uint32, n), make([]uint32, n)}
b.Run("AES-128", func(b *testing.B) { benchmarkExpand(b, encryptTests[1]) })
b.Run("AES-192", func(b *testing.B) { benchmarkExpand(b, encryptTests[2]) })
b.Run("AES-256", func(b *testing.B) { benchmarkExpand(b, encryptTests[3]) })
}

func benchmarkExpand(b *testing.B, tt CryptTest) {
c := &aesCipher{l: uint8(len(tt.key) + 28)}
b.ResetTimer()
for i := 0; i < b.N; i++ {
expandKey(tt.key, c.enc, c.dec)
expandKey(tt.key, c.enc[:c.l], c.dec[:c.l])
}
}

func BenchmarkCreateCipher(b *testing.B) {
b.Run("AES-128", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[1]) })
b.Run("AES-192", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[2]) })
b.Run("AES-256", func(b *testing.B) { benchmarkCreateCipher(b, encryptTests[3]) })
}

func benchmarkCreateCipher(b *testing.B, tt CryptTest) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if _, err := NewCipher(tt.key); err != nil {
b.Fatal(err)
}
}
}
14 changes: 7 additions & 7 deletions src/crypto/aes/cipher.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ const BlockSize = 16

// A cipher is an instance of AES encryption using a particular key.
type aesCipher struct {
enc []uint32
dec []uint32
l uint8 // only this length of the enc and dec array is actually used
enc [28 + 32]uint32
dec [28 + 32]uint32
}

type KeySizeError int
Expand Down Expand Up @@ -47,9 +48,8 @@ func NewCipher(key []byte) (cipher.Block, error) {
// newCipherGeneric creates and returns a new cipher.Block
// implemented in pure Go.
func newCipherGeneric(key []byte) (cipher.Block, error) {
n := len(key) + 28
c := aesCipher{make([]uint32, n), make([]uint32, n)}
expandKeyGo(key, c.enc, c.dec)
c := aesCipher{l: uint8(len(key) + 28)}
expandKeyGo(key, c.enc[:c.l], c.dec[:c.l])
return &c, nil
}

Expand All @@ -65,7 +65,7 @@ func (c *aesCipher) Encrypt(dst, src []byte) {
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("crypto/aes: invalid buffer overlap")
}
encryptBlockGo(c.enc, dst, src)
encryptBlockGo(c.enc[:c.l], dst, src)
}

func (c *aesCipher) Decrypt(dst, src []byte) {
Expand All @@ -78,5 +78,5 @@ func (c *aesCipher) Decrypt(dst, src []byte) {
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("crypto/aes: invalid buffer overlap")
}
decryptBlockGo(c.dec, dst, src)
decryptBlockGo(c.dec[:c.l], dst, src)
}
13 changes: 7 additions & 6 deletions src/crypto/aes/cipher_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ func newCipher(key []byte) (cipher.Block, error) {
if !supportsAES {
return newCipherGeneric(key)
}
n := len(key) + 28
c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
// Note that under certain circumstances, we only return the inner aesCipherAsm.
// This avoids an unnecessary allocation of the aesCipher struct.
c := aesCipherGCM{aesCipherAsm{aesCipher{l: uint8(len(key) + 28)}}}
var rounds int
switch len(key) {
case 128 / 8:
Expand All @@ -60,9 +61,9 @@ func newCipher(key []byte) (cipher.Block, error) {

expandKeyAsm(rounds, &key[0], &c.enc[0], &c.dec[0])
if supportsAES && supportsGFMUL {
return &aesCipherGCM{c}, nil
return &c, nil
}
return &c, nil
return &c.aesCipherAsm, nil
}

func (c *aesCipherAsm) BlockSize() int { return BlockSize }
Expand All @@ -78,7 +79,7 @@ func (c *aesCipherAsm) Encrypt(dst, src []byte) {
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("crypto/aes: invalid buffer overlap")
}
encryptBlockAsm(len(c.enc)/4-1, &c.enc[0], &dst[0], &src[0])
encryptBlockAsm(int(c.l)/4-1, &c.enc[0], &dst[0], &src[0])
}

func (c *aesCipherAsm) Decrypt(dst, src []byte) {
Expand All @@ -92,7 +93,7 @@ func (c *aesCipherAsm) Decrypt(dst, src []byte) {
if alias.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("crypto/aes: invalid buffer overlap")
}
decryptBlockAsm(len(c.dec)/4-1, &c.dec[0], &dst[0], &src[0])
decryptBlockAsm(int(c.l)/4-1, &c.dec[0], &dst[0], &src[0])
}

// expandKey is used by BenchmarkExpand to ensure that the asm implementation
Expand Down
2 changes: 1 addition & 1 deletion src/crypto/aes/gcm_ppc64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func counterCryptASM(nr int, out, in []byte, counter *[gcmBlockSize]byte, key *u
// called by [crypto/cipher.NewGCM] via the gcmAble interface.
func (c *aesCipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
var h1, h2 uint64
g := &gcmAsm{cipher: c, ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
g := &gcmAsm{cipher: c, ks: c.enc[:c.l], nonceSize: nonceSize, tagSize: tagSize}

hle := make([]byte, gcmBlockSize)

Expand Down

0 comments on commit 2c150f4

Please sign in to comment.