Skip to content

Commit

Permalink
crypto: arm64/aes-ce-gcm - operate on two input blocks at a time
Browse files Browse the repository at this point in the history
Update the core AES/GCM transform and the associated plumbing to operate
on 2 AES/GHASH blocks at a time. By itself, this is not expected to
result in a noticeable speedup, but it paves the way for reimplementing
the GHASH component using 2-way aggregation.

Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Herbert Xu <[email protected]>
  • Loading branch information
Ard Biesheuvel authored and herbertx committed Aug 7, 2018
1 parent 3465893 commit 71e52c2
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 69 deletions.
127 changes: 97 additions & 30 deletions arch/arm64/crypto/ghash-ce-core.S
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,10 @@ ENTRY(pmull_ghash_update_p8)
__pmull_ghash p8
ENDPROC(pmull_ghash_update_p8)

KS .req v8
CTR .req v9
INP .req v10
KS0 .req v8
KS1 .req v9
INP0 .req v10
INP1 .req v11

.macro load_round_keys, rounds, rk
cmp \rounds, #12
Expand Down Expand Up @@ -336,101 +337,167 @@ CPU_LE( rev x8, x8 )

.if \enc == 1
ldr x10, [sp]
ld1 {KS.16b}, [x10]
ld1 {KS0.16b-KS1.16b}, [x10]
.endif

0: ld1 {CTR.8b}, [x5] // load upper counter
ld1 {INP.16b}, [x3], #16
0: ld1 {INP0.16b-INP1.16b}, [x3], #32

rev x9, x8
add x8, x8, #1
sub w0, w0, #1
ins CTR.d[1], x9 // set lower counter
add x11, x8, #1
add x8, x8, #2

.if \enc == 1
eor INP.16b, INP.16b, KS.16b // encrypt input
st1 {INP.16b}, [x2], #16
eor INP0.16b, INP0.16b, KS0.16b // encrypt input
eor INP1.16b, INP1.16b, KS1.16b
.endif

rev64 T1.16b, INP.16b
ld1 {KS0.8b}, [x5] // load upper counter
rev x11, x11
sub w0, w0, #2
mov KS1.8b, KS0.8b
ins KS0.d[1], x9 // set lower counter
ins KS1.d[1], x11

rev64 T1.16b, INP0.16b

cmp w7, #12
b.ge 2f // AES-192/256?

1: enc_round CTR, v21
1: enc_round KS0, v21

ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8

enc_round CTR, v22
enc_round KS1, v21

eor T1.16b, T1.16b, T2.16b
eor XL.16b, XL.16b, IN1.16b

enc_round CTR, v23
enc_round KS0, v22

pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b

enc_round CTR, v24
enc_round KS1, v22

pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)

enc_round CTR, v25
enc_round KS0, v23

ext T1.16b, XL.16b, XH.16b, #8
eor T2.16b, XL.16b, XH.16b
eor XM.16b, XM.16b, T1.16b

enc_round CTR, v26
enc_round KS1, v23

eor XM.16b, XM.16b, T2.16b
pmull T2.1q, XL.1d, MASK.1d

enc_round CTR, v27
enc_round KS0, v24

mov XH.d[0], XM.d[1]
mov XM.d[1], XL.d[0]

enc_round CTR, v28
enc_round KS1, v24

eor XL.16b, XM.16b, T2.16b

enc_round CTR, v29
enc_round KS0, v25

ext T2.16b, XL.16b, XL.16b, #8

aese CTR.16b, v30.16b
enc_round KS1, v25

pmull XL.1q, XL.1d, MASK.1d
eor T2.16b, T2.16b, XH.16b

eor KS.16b, CTR.16b, v31.16b
enc_round KS0, v26

eor XL.16b, XL.16b, T2.16b
rev64 T1.16b, INP1.16b

enc_round KS1, v26

ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8

enc_round KS0, v27

eor T1.16b, T1.16b, T2.16b
eor XL.16b, XL.16b, IN1.16b

enc_round KS1, v27

pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b

enc_round KS0, v28

pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)

enc_round KS1, v28

ext T1.16b, XL.16b, XH.16b, #8
eor T2.16b, XL.16b, XH.16b
eor XM.16b, XM.16b, T1.16b

enc_round KS0, v29

eor XM.16b, XM.16b, T2.16b
pmull T2.1q, XL.1d, MASK.1d

enc_round KS1, v29

mov XH.d[0], XM.d[1]
mov XM.d[1], XL.d[0]

aese KS0.16b, v30.16b

eor XL.16b, XM.16b, T2.16b

aese KS1.16b, v30.16b

ext T2.16b, XL.16b, XL.16b, #8

eor KS0.16b, KS0.16b, v31.16b

pmull XL.1q, XL.1d, MASK.1d
eor T2.16b, T2.16b, XH.16b

eor KS1.16b, KS1.16b, v31.16b

eor XL.16b, XL.16b, T2.16b

.if \enc == 0
eor INP.16b, INP.16b, KS.16b
st1 {INP.16b}, [x2], #16
eor INP0.16b, INP0.16b, KS0.16b
eor INP1.16b, INP1.16b, KS1.16b
.endif

st1 {INP0.16b-INP1.16b}, [x2], #32

cbnz w0, 0b

CPU_LE( rev x8, x8 )
st1 {XL.2d}, [x1]
str x8, [x5, #8] // store lower counter

.if \enc == 1
st1 {KS.16b}, [x10]
st1 {KS0.16b-KS1.16b}, [x10]
.endif

ret

2: b.eq 3f // AES-192?
enc_round CTR, v17
enc_round CTR, v18
3: enc_round CTR, v19
enc_round CTR, v20
enc_round KS0, v17
enc_round KS1, v17
enc_round KS0, v18
enc_round KS1, v18
3: enc_round KS0, v19
enc_round KS1, v19
enc_round KS0, v20
enc_round KS1, v20
b 1b
.endm

Expand Down
Loading

0 comments on commit 71e52c2

Please sign in to comment.