Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CIRCL runs on non-amd64 architectures. #121

Merged
merged 12 commits into from
Jun 2, 2020
3 changes: 0 additions & 3 deletions .github/actions/golangci-lint/Dockerfile

This file was deleted.

5 changes: 0 additions & 5 deletions .github/actions/golangci-lint/action.yml

This file was deleted.

27 changes: 16 additions & 11 deletions .github/workflows/ci-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- master
jobs:
amd64_job:
name: amd64/Go-${{matrix.GOVER}}
name: Go-${{matrix.GOVER}}/amd64
runs-on: ubuntu-18.04
strategy:
matrix:
Expand All @@ -17,9 +17,10 @@ jobs:
- name: Checkout
uses: actions/checkout@v2
- name: Linting
uses: ./.github/actions/golangci-lint
uses: golangci/golangci-lint-action@v1
with:
args: run --config=.etc/golangci.yml ./...
version: v1.25
args: --config=./.etc/golangci.yml ./...
- name: Setup Go-${{ matrix.GOVER }}
uses: actions/setup-go@v2
with:
Expand All @@ -32,22 +33,26 @@ jobs:
run: go build -v ./...
- name: Testing
run: go test -v ./...
arm64_job:
exotic_job:
name: Go-${{matrix.CFG[2]}}/${{matrix.CFG[0]}}
runs-on: ubuntu-18.04
name: arm64/Go-1.14
env:
GOVER: 1.14
SHA256: sha256:943fa6421fe7ca2b9fa40db22a5c14f99ed95afd0c18f9b7dec1e05b9ffce804
strategy:
matrix:
CFG: [ [s390x,s390x,1.14], [arm64,arm64v8,1.14] ]
steps:
- uses: actions/checkout@v2
- name: Enabling Docker Experimental
run: |
echo $'{\n "experimental": true\n}' | sudo tee /etc/docker/daemon.json
sudo service docker restart
- name: Pulling Images
run: |
docker pull -q multiarch/qemu-user-static
docker pull -q arm64v8/golang@$SHA256
docker pull -q --platform linux/${{matrix.CFG[0]}} ${{matrix.CFG[1]}}/golang:${{matrix.CFG[2]}}
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
- name: Testing
run: |
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker run --rm -v `pwd`:`pwd` -w `pwd` arm64v8/golang@$SHA256 go test -v ./...
docker run --rm -e "GODEBUG=asyncpreemptoff=1" -v `pwd`:`pwd` -w `pwd` ${{matrix.CFG[1]}}/golang:${{matrix.CFG[2]}} go test -v ./...
coverage_amd64_job:
needs: [ amd64_job ]
if: github.event_name == 'push'
Expand Down
17 changes: 9 additions & 8 deletions dh/csidh/csidh_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package csidh
import (
"bytes"
"crypto/rand"
"encoding/binary"
"encoding/hex"
"encoding/json"
"os"
Expand Down Expand Up @@ -246,14 +247,14 @@ func TestKAT(t *testing.T) {
if err != nil {
t.Fatal(err.Error())
}
// Loop over all test cases
for i := range tests.Vectors {
if !hasADXandBMI2 && i >= numIter {
// The algorithm is relatively slow, so on slow systems test
// against smaller number of test vectors (otherwise CI may break)
return
}
test := tests.Vectors[i]
// Loop over numIter test cases
// The algorithm is relatively slow, so it tests a smaller number.
N := len(tests.Vectors)
var buf [2]byte
for i := 0; i < numIter; i++ {
_, _ = rand.Read(buf[:])
idx := binary.LittleEndian.Uint16(buf[:]) % uint16(N)
test := tests.Vectors[idx]
switch test.Status {
case StatusValues[Valid]:
checkSharedSecret(&test, t, Valid)
Expand Down
72 changes: 0 additions & 72 deletions dh/csidh/fp511.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,6 @@ package csidh

import (
"math/bits"

"golang.org/x/sys/cpu"
)

// CPU Capabilities. Those flags are referred by assembly code. According to
// https://github.com/golang/go/issues/28230, variables referred from the
// assembly must be in the same package.
// We declare variables not constants, in order to facilitate testing.
var (
// Signals support for BMI2 (MULX)
hasBMI2 = cpu.X86.HasBMI2 //nolint
// Signals support for ADX and BMI2
hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
)

// Constant time select.
Expand All @@ -35,65 +22,6 @@ func ctIsNonZero64(i uint64) int {
return int((i | (^(i - 1))) >> 63)
}

func mulGeneric(r, x, y *fp) {
var s fp // keeps intermediate results
var t1, t2 [9]uint64
var c, q uint64

for i := 0; i < numWords-1; i++ {
q = ((x[i] * y[0]) + s[0]) * pNegInv[0]
mul576(&t1, &p, q)
mul576(&t2, y, x[i])

// x[i]*y + q_i*p
t1[0], c = bits.Add64(t1[0], t2[0], 0)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], _ = bits.Add64(t1[8], t2[8], c)

// s = (s + x[i]*y + q_i * p) / R
_, c = bits.Add64(t1[0], s[0], 0)
s[0], c = bits.Add64(t1[1], s[1], c)
s[1], c = bits.Add64(t1[2], s[2], c)
s[2], c = bits.Add64(t1[3], s[3], c)
s[3], c = bits.Add64(t1[4], s[4], c)
s[4], c = bits.Add64(t1[5], s[5], c)
s[5], c = bits.Add64(t1[6], s[6], c)
s[6], c = bits.Add64(t1[7], s[7], c)
s[7], _ = bits.Add64(t1[8], 0, c)
}

// last iteration stores result in r
q = ((x[numWords-1] * y[0]) + s[0]) * pNegInv[0]
mul576(&t1, &p, q)
mul576(&t2, y, x[numWords-1])

t1[0], c = bits.Add64(t1[0], t2[0], c)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], _ = bits.Add64(t1[8], t2[8], c)

_, c = bits.Add64(t1[0], s[0], 0)
r[0], c = bits.Add64(t1[1], s[1], c)
r[1], c = bits.Add64(t1[2], s[2], c)
r[2], c = bits.Add64(t1[3], s[3], c)
r[3], c = bits.Add64(t1[4], s[4], c)
r[4], c = bits.Add64(t1[5], s[5], c)
r[5], c = bits.Add64(t1[6], s[6], c)
r[6], c = bits.Add64(t1[7], s[7], c)
r[7], _ = bits.Add64(t1[8], 0, c)
}

// Returns result of x<y operation.
func isLess(x, y *fp) bool {
for i := numWords - 1; i >= 0; i-- {
Expand Down
26 changes: 20 additions & 6 deletions dh/csidh/fp511_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,37 @@

package csidh

import "math/bits"
import (
"math/bits"

//go:noescape
func mul512(a, b *fp, c uint64)
"golang.org/x/sys/cpu"
)

var (
// Signals support for BMI2 (MULX)
hasBMI2 = cpu.X86.HasBMI2
// Signals support for ADX and BMI2
hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
)

var _ = hasBMI2

func mul512(r, m1 *fp, m2 uint64) { mul512Amd64(r, m1, m2) }
func cswap512(x, y *fp, choice uint8) { cswap512Amd64(x, y, choice) }
func mulRdc(r, x, y *fp) { mulRdcAmd64(r, x, y) }

//go:noescape
func mul576(a *[9]uint64, b *fp, c uint64)
func mul512Amd64(a, b *fp, c uint64)

//go:noescape
func cswap512(x, y *fp, choice uint8)
func cswap512Amd64(x, y *fp, choice uint8)

//go:noescape
func mulBmiAsm(res, x, y *fp)

// mulRdc performs montgomery multiplication r = x * y mod P.
// Returned result r is already reduced and in Montgomery domain.
func mulRdc(r, x, y *fp) {
func mulRdcAmd64(r, x, y *fp) {
var t fp
var c uint64

Expand Down
34 changes: 4 additions & 30 deletions dh/csidh/fp511_amd64.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build amd64,!noasm
// +build amd64

#include "textflag.h"

Expand All @@ -9,8 +9,8 @@
//
// Registers used: AX, CX, DX, SI, DI, R8
//
// func mul512(a, b *Fp, c uint64)
TEXT ·mul512(SB), NOSPLIT, $0-24
// func mul512Amd64(a, b *Fp, c uint64)
TEXT ·mul512Amd64(SB), NOSPLIT, $0-24
MOVQ a+0(FP), DI // result
MOVQ b+8(FP), SI // multiplicand

Expand Down Expand Up @@ -42,33 +42,7 @@ mul512_mulx:
MULXQ 56(SI), AX, R11; ADCQ R10, AX; MOVQ AX, 56(DI) // x[7]
RET

// Multipies 512-bit value by 64-bit value and returns 576-bit result. Uses MULQ instruction to
// multiply 2 64-bit values. Returns 576-bit result.
//
// Result: x = (y * z)
//
// Registers used: AX, CX, DX, SI, DI, R8
//
// func mul576(a, b *Fp, c uint64)
TEXT ·mul576(SB), NOSPLIT, $0-24
MOVQ a+0(FP), DI // result
MOVQ b+8(FP), SI // multiplicand

MOVQ c+16(FP), R10 // 64 bit multiplier, used by MULQ
MOVQ R10, AX; MULQ 0(SI); MOVQ DX, R11; MOVQ AX, 0(DI) //x[0]
MOVQ R10, AX; MULQ 8(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 8(DI) //x[1]
MOVQ R10, AX; MULQ 16(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 16(DI) //x[2]
MOVQ R10, AX; MULQ 24(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 24(DI) //x[3]
MOVQ R10, AX; MULQ 32(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 32(DI) //x[4]
MOVQ R10, AX; MULQ 40(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 40(DI) //x[5]
MOVQ R10, AX; MULQ 48(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 48(DI) //x[6]
MOVQ R10, AX; MULQ 56(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ AX, 56(DI) //x[7]
MOVQ DX, 64(DI) //x[8]

RET


TEXT ·cswap512(SB),NOSPLIT,$0-17
TEXT ·cswap512Amd64(SB),NOSPLIT,$0-17
MOVQ x+0(FP), DI
MOVQ y+8(FP), SI
MOVBLZX choice+16(FP), AX // AL = 0 or 1
Expand Down
75 changes: 66 additions & 9 deletions dh/csidh/fp511_generic.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
// +build noasm arm64

package csidh

import "math/bits"

// mul576 implements schoolbook multiplication of
// 64x512-bit integer. Returns result modulo 2^512.
// r = m1*m2
func mul512(r, m1 *fp, m2 uint64) {
// r = m1*m2.
func mul512Generic(r, m1 *fp, m2 uint64) {
var c, h, l uint64

c, r[0] = bits.Mul64(m2, m1[0])
Expand Down Expand Up @@ -43,8 +41,8 @@ func mul512(r, m1 *fp, m2 uint64) {
// mul576 implements schoolbook multiplication of
// 64x512-bit integer. Returns 576-bit result of
// multiplication.
// r = m1*m2
func mul576(r *[9]uint64, m1 *fp, m2 uint64) {
// r = m1*m2.
func mul576Generic(r *[9]uint64, m1 *fp, m2 uint64) {
var c, h, l uint64

c, r[0] = bits.Mul64(m2, m1[0])
Expand Down Expand Up @@ -82,7 +80,7 @@ func mul576(r *[9]uint64, m1 *fp, m2 uint64) {
// cswap512 implements constant time swap operation.
// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.
// If choice is neither 0 nor 1 then behaviour is undefined.
func cswap512(x, y *fp, choice uint8) {
func cswap512Generic(x, y *fp, choice uint8) {
var tmp uint64
mask64 := 0 - uint64(choice)

Expand All @@ -95,7 +93,7 @@ func cswap512(x, y *fp, choice uint8) {

// mulRdc performs montgomery multiplication r = x * y mod P.
// Returned result r is already reduced and in Montgomery domain.
func mulRdc(r, x, y *fp) {
func mulRdcGeneric(r, x, y *fp) {
var t fp
var c uint64

Expand All @@ -111,7 +109,7 @@ func mulRdc(r, x, y *fp) {
t[6], c = bits.Sub64(r[6], p[6], c)
t[7], c = bits.Sub64(r[7], p[7], c)

var w = uint64(0 - uint64(c))
w := 0 - c
r[0] = ctPick64(w, r[0], t[0])
r[1] = ctPick64(w, r[1], t[1])
r[2] = ctPick64(w, r[2], t[2])
Expand All @@ -121,3 +119,62 @@ func mulRdc(r, x, y *fp) {
r[6] = ctPick64(w, r[6], t[6])
r[7] = ctPick64(w, r[7], t[7])
}

func mulGeneric(r, x, y *fp) {
var s fp // keeps intermediate results
var t1, t2 [9]uint64
var c, q uint64

for i := 0; i < numWords-1; i++ {
q = ((x[i] * y[0]) + s[0]) * pNegInv[0]
mul576Generic(&t1, &p, q)
mul576Generic(&t2, y, x[i])

// x[i]*y + q_i*p
t1[0], c = bits.Add64(t1[0], t2[0], 0)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], _ = bits.Add64(t1[8], t2[8], c)

// s = (s + x[i]*y + q_i * p) / R
_, c = bits.Add64(t1[0], s[0], 0)
s[0], c = bits.Add64(t1[1], s[1], c)
s[1], c = bits.Add64(t1[2], s[2], c)
s[2], c = bits.Add64(t1[3], s[3], c)
s[3], c = bits.Add64(t1[4], s[4], c)
s[4], c = bits.Add64(t1[5], s[5], c)
s[5], c = bits.Add64(t1[6], s[6], c)
s[6], c = bits.Add64(t1[7], s[7], c)
s[7], _ = bits.Add64(t1[8], 0, c)
}

// last iteration stores result in r
q = ((x[numWords-1] * y[0]) + s[0]) * pNegInv[0]
mul576Generic(&t1, &p, q)
mul576Generic(&t2, y, x[numWords-1])

t1[0], c = bits.Add64(t1[0], t2[0], c)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], _ = bits.Add64(t1[8], t2[8], c)

_, c = bits.Add64(t1[0], s[0], 0)
r[0], c = bits.Add64(t1[1], s[1], c)
r[1], c = bits.Add64(t1[2], s[2], c)
r[2], c = bits.Add64(t1[3], s[3], c)
r[3], c = bits.Add64(t1[4], s[4], c)
r[4], c = bits.Add64(t1[5], s[5], c)
r[5], c = bits.Add64(t1[6], s[6], c)
r[6], c = bits.Add64(t1[7], s[7], c)
r[7], _ = bits.Add64(t1[8], 0, c)
}
Loading