From 363739eee30345d4dd4626e8d07a68f3d26e8dc3 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 10:17:21 -0600 Subject: [PATCH 01/25] refactor: remove sis maker, and bn254 dead code --- ecc/bls12-377/fr/sis/sis.go | 15 - ecc/bn254/fr/sis/sis.go | 468 ---------------- ecc/bn254/fr/sis/sis.sage | 272 ---------- ecc/bn254/fr/sis/sis_test.go | 488 ----------------- ecc/bn254/fr/sis/test_cases.json | 86 --- ecc/bn254/fr/tensor-commitment/commitment.go | 475 ---------------- .../fr/tensor-commitment/commitment_test.go | 509 ------------------ field/babybear/sis/sis.go | 15 - .../internal/templates/sis/sis.go.tmpl | 15 - field/goldilocks/sis/sis.go | 15 - field/koalabear/sis/sis.go | 15 - 11 files changed, 2373 deletions(-) delete mode 100644 ecc/bn254/fr/sis/sis.go delete mode 100644 ecc/bn254/fr/sis/sis.sage delete mode 100644 ecc/bn254/fr/sis/sis_test.go delete mode 100644 ecc/bn254/fr/sis/test_cases.json delete mode 100644 ecc/bn254/fr/tensor-commitment/commitment.go delete mode 100644 ecc/bn254/fr/tensor-commitment/commitment_test.go diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 9d4e82fe45..d482377e8f 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -9,7 +9,6 @@ import ( "bytes" "encoding/binary" "errors" - "hash" "math/bits" "github.com/bits-and-blooms/bitset" @@ -229,20 +228,6 @@ func (r *RSis) BlockSize() int { return 0 } -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - func genRandom(seed, i, j int64, buf *bytes.Buffer) fr.Element { buf.Reset() diff --git a/ecc/bn254/fr/sis/sis.go b/ecc/bn254/fr/sis/sis.go deleted file mode 100644 index 5af87d5e1b..0000000000 --- a/ecc/bn254/fr/sis/sis.go +++ /dev/null @@ -1,468 +0,0 @@ -// Copyright 2020-2024 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "bytes" - "encoding/binary" - "errors" - "hash" - "math/bits" - - "github.com/bits-and-blooms/bitset" - "github.com/consensys/gnark-crypto/ecc/bn254/fr" - "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" - "github.com/consensys/gnark-crypto/internal/parallel" - "golang.org/x/crypto/blake2b" -) - -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - -// Ring-SIS instance -type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - - // Vectors in ℤ_{p}/Xⁿ+1 - // A[i] is the i-th polynomial. - // Ag the evaluation form of the polynomials in A on the coset √(g) * - A [][]fr.Element - Ag [][]fr.Element - - // LogTwoBound (Infinity norm) of the vector to hash. It means that each component in m - // is < 2^B, where m is the vector to hash (the hash being A*m). - // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. - LogTwoBound int - - // domain for the polynomial multiplication - Domain *fft.Domain - twiddleCosets []fr.Element // see FFT64 and precomputeTwiddlesCoset - - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int - maxNbElementsToHash int - - // allocate memory once per instance (used in Sum()) - bufM, bufRes fr.Vector - bufMValues *bitset.BitSet -} - -// NewRSis creates an instance of RSis. -// seed: seed for the randomness for generating A. -// logTwoDegree: if d := logTwoDegree, the ring will be ℤ_{p}[X]/Xᵈ-1, where X^{2ᵈ} is the 2ᵈ⁺¹-th cyclotomic polynomial -// logTwoBound: the bound of the vector to hash (using the infinity norm). -// maxNbElementsToHash: maximum number of field elements the instance handles -// used to derived n, the number of polynomials in A, and max size of instance's internal buffer. -func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - - if logTwoBound > 64 { - return nil, errors.New("logTwoBound too large") - } - if bits.UintSize == 32 { - return nil, errors.New("unsupported architecture; need 64bit target") - } - - degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * fr.Bytes - - // n: number of polynomials in A - // len(m) == degree * n - // with each element in m being logTwoBounds bits from the instance buffer. - // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) - - // First n <- #limbs to represent a single field element - n := (fr.Bytes * 8) / logTwoBound - if n*logTwoBound < fr.Bytes*8 { - n++ - } - - // Then multiply by the number of field elements - n *= maxNbElementsToHash - - // And divide (+ ceil) to get the number of polynomials - if n%degree == 0 { - n /= degree - } else { - n /= degree // number of polynomials - n++ - } - - // domains (shift is √{gen} ) - shift, err := fr.Generator(uint64(2 * degree)) - if err != nil { - return nil, err - } - - r := &RSis{ - LogTwoBound: logTwoBound, - capacity: capacity, - Degree: degree, - Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), - A: make([][]fr.Element, n), - Ag: make([][]fr.Element, n), - bufM: make(fr.Vector, degree*n), - bufRes: make(fr.Vector, degree), - bufMValues: bitset.New(uint(n)), - maxNbElementsToHash: maxNbElementsToHash, - } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } - - // filling A - a := make([]fr.Element, n*r.Degree) - ag := make([]fr.Element, n*r.Degree) - - parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer - for i := start; i < end; i++ { - rstart, rend := i*r.Degree, (i+1)*r.Degree - r.A[i] = a[rstart:rend:rend] - r.Ag[i] = ag[rstart:rend:rend] - for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) - } - - // fill Ag the evaluation form of the polynomials in A on the coset √(g) * - copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) - } - }) - - return r, nil -} - -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []fr.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") - } - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues - - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (fr.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) - } - - // we can hash now. - res := r.bufRes - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } - - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} - -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} - -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { - - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * fr.Modulus().BitLen() / 8 - - return totalSize -} - -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 -} - -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - -func genRandom(seed, i, j int64, buf *bytes.Buffer) fr.Element { - - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) - - var res fr.Element - res.SetBytes(digest[:]) - - return res -} - -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []fr.Element) []fr.Element { - - res := make([]fr.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. -func mulModAcc(res []fr.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []fr.Element) { - var t fr.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } -} - -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make(fr.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make(fr.Vector, len(r.bufRes)) - return res -} - -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } -} - -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m fr.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} - -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// This function is called when logTwoBound divides the number of bits used to represent a -// fr element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / fr.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := fr.Bytes - 1; j >= 0; j-- { - curByte := buf[i*fr.Bytes+j] - curPos := i*fr.Bytes*nbChunksPerBytes + (fr.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - m[curPos+k][0] = uint64((curByte >> (k * logTwoBound)) & mask) - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } -} - -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / fr.Bytes - nbChunksPerElements := fr.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*fr.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { - - m[curElmt][0] |= (uint64(buf[curPos-k]) << ((k - 1) * 8)) - - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } -} - -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 - } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < fr.Bytes*8; { - - j := bitInField % logTwoBound - - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + fr.Bytes*8 - bitInField - 1 - - m[mPos][0] |= uint64(getIthBit(at) << j) - - bitInField++ - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } - - if j == logTwoBound-1 || bitInField == fr.Bytes*8 { - mPos++ - } - } - fieldStart += fr.Bytes * 8 - } -} - -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m fr.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := fr.Bytes - 1; startPos < len(buf); startPos += fr.Bytes { - for i := startPos; i >= startPos-fr.Bytes+1; i-- { - - m[j][0] = uint64(buf[i]) - - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } - } -} diff --git a/ecc/bn254/fr/sis/sis.sage b/ecc/bn254/fr/sis/sis.sage deleted file mode 100644 index 723749f386..0000000000 --- a/ecc/bn254/fr/sis/sis.sage +++ /dev/null @@ -1,272 +0,0 @@ -## "sage sis.sage" will generate test_cases.json -## tested with a fresh sage install on macOS (Feb 2023) - -import json - -# BN254 Fr -r = 21888242871839275222246405745257275088548364400416034343698204186575808495617 -frByteSize = 32 -countToDeath = int(5) -gfr = GF(r) -Fr = GF(r) -Fr. = Fr[] -rz = IntegerRing() - -# Montgomery constant -rr = Fr(2**256) - -# utils - - -def buildPoly(a): - """ Builds a poly from the array a - - Args: - a an array - - Returns: - a[0]+a[1]*X + .. + a[n]*X**n - """ - - res = Fr(0) - for i, v in enumerate(a): - res += Fr(v)*x**i - return res - - -def bitAt(i, b): - """ - Args: - i: index of the bit to retrieve - b: array of bytes - - Returns: - the i-th bit of b, when it is written b[0] || b[1] || ... - """ - k = i//8 - if k >= len(b): - return 0 - j = i % 8 - return (b[k] >> (7-j)) & 1 - - -def toBytes(m, s): - """ - - Args: - m: a bit int - s: the expected number of bytes of the result. If s is bigger than the - number of bytes in m, the remaining bytes are set to zero. - - Returns: - the byte representation of m as a byte array, as - in gnark-crypto. - """ - _m = rz(m) - res = s*[0] - mask = 255 - for i in range(s): - res[s-1-i] = _m & 255 - _m = _m >> 8 - return res - - -def splitCoeffs(b, logTwoBound): - """ - Args: - b: an array of bytes - logTwoBound: number of bits of the bound - - Returns: - an array of coeffs, each coeff being the i-th chunk of logTwoBounds bits of b. - The coeffs are formed as follow. The input byte string is implicitly parsed as - a slice of field elements of 32 bytes each in bigendian-natural form. the outputs - are in a little-endian form. That is, each chunk of size 256 / logTwoBounds of the - output can be seen as a polynomial, such that, when evaluated at 2 we get the original - field element. - """ - nbBits = len(b)*8 - res = [] - i = 0 - - if len(b) % frByteSize != 0: - exit("the length of b should divide the field size") - - # The number of fields that we are parsing. In case we have that - # logTwoBound does not divide the number of bits to represent a - # field element, we do not merge them. - nbField = len(b) / 32 - nbBitsInField = int(frByteSize * 8) - - for fieldID in range(nbField): - fieldStart = fieldID * 256 - e = 0 - for bitInField in range(nbBitsInField): - j = bitInField % logTwoBound - at = fieldStart + nbBitsInField - 1 - bitInField - e |= bitAt(at, b) << j - # Switch to a new limb - if j == logTwoBound - 1 or bitInField == frByteSize * 8 - 1: - res.append(e) - e = 0 - - # careful Montgomery constant... - return [Fr(e)*rr**-1 for e in res] - - -def polyRand(seed, n): - """ Generates a pseudo random polynomial of size n from seed. - - Args: - seed: seed for the pseudo random gen - n: degree of the polynomial - """ - seed = gfr(seed) - a = n*[0] - for i in range(n): - a[i] = seed**2 - seed = a[i] - return buildPoly(a) - - -# SIS -class SIS: - def __init__(self, seed, logTwoDegree, logTwoBound, maxNbElementsToHash): - """ - Args: - seed - logTwoDegree: - logTwoBound: bound of SIS - maxNbElementsToHash - """ - capacity = maxNbElementsToHash * frByteSize - degree = 1 << logTwoDegree - - n = capacity * 8 / logTwoBound # number of coefficients - if n % degree == 0: # check how sage / python rounds the int div. - n = n / degree - else: - n = n / degree - n = n + 1 - - n = int(n) - - self.logTwoBound = logTwoBound - self.degree = degree - self.size = n - self.key = n * [0] - for i in range(n): - self.key[i] = polyRand(seed, self.degree) - seed += 1 - - def hash(self, inputs): - """ - Args: - inputs is a vector of Fr elements - - Returns: - the sis hash of m. - """ - b = [] - for i in inputs: - b.extend(toBytes(i, 32)) - - return self.hash_bytes(b) - - def hash_bytes(self, b): - """ - Args: - b is a list of bytes to hash - - Returns: - the sis hash of m. - """ - # step 1: build the polynomials from m - c = splitCoeffs(b, self.logTwoBound) - mp = [buildPoly(c[self.degree*i:self.degree*(i+1)]) - for i in range(self.size)] - - # step 2: compute sum_i mp[i]*key[i] mod X^n+1 - modulo = x**self.degree+1 - res = 0 - for i in range(self.size): - res += self.key[i]*mp[i] - res = res % modulo - return res - - -def vectorToString(v): - # v is a vector of field elements - # we return a list of strings in base10 - r = [] - for e in v: - r.append("0x"+rz(e).hex()) - return r - - -def SISParams(seed, logTwoDegree, logTwoBound, maxNbElementsToHash): - p = {} - p['seed'] = int(seed) - p['logTwoDegree'] = int(logTwoDegree) - p['logTwoBound'] = int(logTwoBound) - p['maxNbElementsToHash'] = int(maxNbElementsToHash) - return p - -params = [ - SISParams(5, 2, 3, 10), - SISParams(5, 4, 3, 10), - SISParams(5, 4, 4, 10), - SISParams(5, 5, 4, 10), - SISParams(5, 6, 5, 10), - # SISParams(5, 8, 6, 10), - SISParams(5, 10, 6, 10), - SISParams(5, 11, 7, 10), - SISParams(5, 12, 7, 10), -] - -inputs = [ - [Fr(21888242871839275222246405745257275088548364400416034343698204186575808495614)], - [Fr(1)], - [Fr(42),Fr(8000)], - [Fr(1),Fr(2), Fr(0),Fr(21888242871839275222246405745257275088548364400416034343698204186575808495616)], - [Fr(1), Fr(0)], - [Fr(0), Fr(1)], - [Fr(0)], - [Fr(0),Fr(0),Fr(0),Fr(0)], - [Fr(0),Fr(0),Fr(8000),Fr(0)], -] - -# sprinkle some random elements -for i in range(10): - line = [] - for j in range(i): - line.append(gfr.random_element()) - inputs.append(line) - -testCases = {} -testCases['inputs'] = [] -testCases['entries'] = [] - - -for i, v in enumerate(inputs): - testCases['inputs'].append(vectorToString(v)) - - -for p in params: - entry = {} - entry['params'] = p - entry['expected'] = [] - - print("generating test cases with SIS params " + json.dumps(p)) - instance = SIS(p['seed'], p['logTwoDegree'], p['logTwoBound'], p['maxNbElementsToHash']) - for i, v in enumerate(inputs): - # hash the vector - hResult = instance.hash(v) - entry['expected'].append(vectorToString(hResult)) - - testCases['entries'].append(entry) - - -testCases_json = json.dumps(testCases, indent=4) -with open("test_cases.json", "w") as outfile: - outfile.write(testCases_json) diff --git a/ecc/bn254/fr/sis/sis_test.go b/ecc/bn254/fr/sis/sis_test.go deleted file mode 100644 index 08431ad778..0000000000 --- a/ecc/bn254/fr/sis/sis_test.go +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright 2020-2024 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "bytes" - "crypto/rand" - "encoding/binary" - "encoding/json" - "fmt" - "io" - "math/big" - "math/bits" - "os" - "testing" - "time" - - "github.com/bits-and-blooms/bitset" - "github.com/consensys/gnark-crypto/ecc/bn254/fr" - "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" - "github.com/stretchr/testify/require" -) - -type sisParams struct { - logTwoBound, logTwoDegree int -} - -var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, - {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, -} - -type TestCases struct { - Inputs []fr.Element `json:"inputs"` - Entries []struct { - Params struct { - Seed int64 `json:"seed"` - LogTwoDegree int `json:"logTwoDegree"` - LogTwoBound int `json:"logTwoBound"` - MaxNbElementsToHash int `json:"maxNbElementsToHash"` - } `json:"params"` - Expected []fr.Element `json:"expected"` - } `json:"entries"` -} - -func TestReference(t *testing.T) { - if bits.UintSize == 32 { - t.Skip("skipping this test in 32bit.") - } - assert := require.New(t) - - // read the test case file - var testCases TestCases - data, err := os.ReadFile("test_cases.json") - assert.NoError(err, "reading test cases failed") - err = json.Unmarshal(data, &testCases) - assert.NoError(err, "reading test cases failed") - - inputs := testCases.Inputs - - for testCaseID, testCase := range testCases.Entries { - - // create the SIS instance - sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) - assert.NoError(err) - - // key generation same than in sage - makeKeyDeterministic(t, sis, testCase.Params.Seed) - - sis.Reset() - - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) - assert.NoError(err) - - assert.EqualValues( - testCase.Expected, goHash, - "mismatch between reference test and computed value (testcase %v)", - testCaseID, - ) - - } - -} - -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { - - var montConstant fr.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, fr.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]fr.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 8 - - for cc := 0; cc < 3; cc++ { - m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - } - - var x fr.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := fr.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } - } - logTwoBound *= 2 - } - -} - -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant fr.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, fr.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]fr.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 2 - - for cc := 0; cc < 3; cc++ { - - m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - m2 := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - - // the limbs are set as is, they are NOT converted in Montgomery form - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - limbDecomposeBytesSmallBound(buf.Bytes(), m2, logTwoBound, 4, nil) - - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - m2[i].Mul(&m2[i], &montConstant) - } - var x fr.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := fr.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } - r = eval(m2[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytesSmallBound failed") - } - } - logTwoBound *= 2 - } - -} - -func eval(p []fr.Element, x fr.Element) fr.Element { - var res fr.Element - for i := len(p) - 1; i >= 0; i-- { - res.Mul(&res, &x).Add(&res, &p[i]) - } - return res -} - -func TestMulMod(t *testing.T) { - - size := 4 - - p := make([]fr.Element, size) - q := make([]fr.Element, size) - pCopy := make([]fr.Element, size) - qCopy := make([]fr.Element, size) - for i := 0; i < size; i++ { - p[i].SetRandom() - pCopy[i].Set(&p[i]) - q[i].SetRandom() - qCopy[i].Set(&q[i]) - } - - // creation of the domain - shift, err := fr.Generator(uint64(2 * size)) - if err != nil { - t.Fatal(err) - } - var g fr.Element - g.Square(&shift) - domain := fft.NewDomain(uint64(size), fft.WithShift(shift)) - - // mul mod - domain.FFT(p, fft.DIF, fft.OnCoset()) - domain.FFT(q, fft.DIF, fft.OnCoset()) - r := mulMod(p, q) - domain.FFTInverse(r, fft.DIT, fft.OnCoset()) - - // manually check the product on the zeroes of X^4+1 - for i := 0; i < 4; i++ { - u := eval(pCopy, shift) - v := eval(qCopy, shift) - w := eval(r, shift) - u.Mul(&u, &v) - if !w.Equal(&u) { - t.Fatal("mul mol failed") - } - shift.Mul(&shift, &g) - } - -} - -func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { - t.Helper() - // generate the key deterministically, the same way - // we do in sage to generate the test vectors. - - polyRand := func(seed fr.Element, deg int) []fr.Element { - res := make([]fr.Element, deg) - for i := 0; i < deg; i++ { - res[i].Square(&seed) - seed.Set(&res[i]) - } - return res - } - - var seed, one fr.Element - one.SetOne() - seed.SetInt64(_seed) - for i := 0; i < len(sis.A); i++ { - sis.A[i] = polyRand(seed, sis.Degree) - copy(sis.Ag[i], sis.A[i]) - sis.Domain.FFT(sis.Ag[i], fft.DIF, fft.OnCoset()) - seed.Add(&seed, &one) - } -} - -const ( - LATENCY_MUL_FIELD_NS int = 18 - LATENCY_ADD_FIELD_NS int = 4 -) - -// Estimate the theoretical performances that are achievable using ring-SIS -// operations. The time is obtained by counting the number of additions and -// multiplications occurring in the computation. This does not account for the -// possibilities to use SIMD instructions or for cache-locality issues. Thus, it -// does not represents a maximum even though it returns a good idea of what is -// achievable . This returns performances in term of ns/field. This also does not -// account for the time taken for "limb-splitting" the input. -func estimateSisTheory(p sisParams) float64 { - - // Since the FFT occurs over a coset, we need to multiply all the coefficients - // of the input by some coset factors (for an entire polynomial) - timeCosetShift := (1 << p.logTwoDegree) * LATENCY_MUL_FIELD_NS - - // The two additions are from the butterfly, and the multiplication represents - // the one by the twiddle. (for an entire polynomial) - timeFFT := (1 << p.logTwoDegree) * p.logTwoDegree * (2*LATENCY_ADD_FIELD_NS + LATENCY_MUL_FIELD_NS) - - // Time taken to multiply by the key and accumulate (for an entire polynomial) - timeMulAddKey := (1 << p.logTwoDegree) * (LATENCY_MUL_FIELD_NS + LATENCY_ADD_FIELD_NS) - - // Total computation time for an entire polynomial - totalTimePoly := timeCosetShift + timeFFT + timeMulAddKey - - // Convert this into a time per input field - r := totalTimePoly * fr.Bits / p.logTwoBound / (1 << p.logTwoDegree) - return float64(r) -} - -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]fr.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - -func BenchmarkSIS(b *testing.B) { - - // max nb field elements to hash - const nbInputs = 1 << 16 - - // Assign the input with random bytes. In practice, theses bytes encodes - // a string of field element. It would be more meaningful to take a slice - // of field element directly because otherwise the conversion time is not - // accounted for in the benchmark. - inputs := make(fr.Vector, nbInputs) - for i := 0; i < len(inputs); i++ { - inputs[i].SetRandom() - } - - for _, param := range params128Bits { - for n := 1 << 10; n <= nbInputs; n <<= 1 { - in := inputs[:n] - benchmarkSIS(b, in, false, param.logTwoBound, param.logTwoDegree, estimateSisTheory(param)) - } - - } -} - -func benchmarkSIS(b *testing.B, input []fr.Element, sparse bool, logTwoBound, logTwoDegree int, theoretical float64) { - b.Helper() - - n := len(input) - - benchName := "ring-sis/" - if sparse { - benchName += "sparse/" - } - benchName += fmt.Sprintf("inputs=%v/log2-bound=%v/log2-degree=%v", n, logTwoBound, logTwoDegree) - - b.Run(benchName, func(b *testing.B) { - instance, err := NewRSis(0, logTwoDegree, logTwoBound, n) - if err != nil { - b.Fatal(err) - } - - // We introduce a custom metric which is the time per field element - // Since the benchmark object allows to report extra meta but does - // not allow accessing them. We measure the time ourself. - - startTime := time.Now() - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err = instance.Hash(input) - if err != nil { - b.Fatal(err) - } - } - b.StopTimer() - - totalDuration := time.Since(startTime) - nsPerField := totalDuration.Nanoseconds() / int64(b.N) / int64(n) - - b.ReportMetric(float64(nsPerField), "ns/field") - - b.ReportMetric(theoretical, "ns/field(theory)") - - }) -} - -// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, -// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense -func (r *RSis) Hash(v []fr.Element) ([]fr.Element, error) { - if len(v) > r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/fr.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result fr.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := fr.Bytes; size < 5*fr.Bytes; size += fr.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]fr.Element, size) - mValues := bitset.New(uint(size)) - n := make([]fr.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - -func TestUnrolledFFT(t *testing.T) { - - var shift fr.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]fr.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]fr.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/ecc/bn254/fr/sis/test_cases.json b/ecc/bn254/fr/sis/test_cases.json deleted file mode 100644 index 344e272497..0000000000 --- a/ecc/bn254/fr/sis/test_cases.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "inputs": [ - "5", - "25", - "125", - "625", - "3125", - "15625", - "78125", - "390625", - "1953125", - "9765625" - ], - "entries": [ - { - "expected": [ - "19052984516876627274640238270428312641066914502976343854130933042152395214437", - "19579499093159636168223528817491239872946408580551360482200183465178423634745", - "3708668380994841899936676613411331898010921342788081195849365322630935912434", - "5433511930939366077412770734404741296375787859263964469718544218782774942954" - ], - "params": { - "maxNbElementsToHash": 10, - "seed": 5, - "logTwoDegree": 2, - "logTwoBound": 2 - } - }, - { - "expected": [ - "12217087487196706286935392960359417153662545188297682064251163865575683215147", - "21548337336470889698231140636706520345756649225861573817734471546023502758709", - "9320394412515272050870390009764753183869476247430369751056203305656629612983", - "18570900243421525732330663177341878179250339015557480174214866645347627194504" - ], - "params": { - "maxNbElementsToHash": 10, - "seed": 5, - "logTwoDegree": 2, - "logTwoBound": 4 - } - }, - { - "expected": [ - "16692170502388384390674572361848717230910923132874717358201841963873546552628", - "10418559609645272031357396256918216874674189335083221341531922773216817537771", - "906042409609440161513541043605057525610952609830270797384406918592312743401", - "14580691556330400310069765056830001347584784126854837868214733018631095864290" - ], - "params": { - "maxNbElementsToHash": 10, - "seed": 5, - "logTwoDegree": 2, - "logTwoBound": 8 - } - }, - { - "expected": [ - "17810538377305826979727126885080619194828255122466505789376132059304678590982", - "13274549151241879186367017598419480441052851608459995571043019156210835065884", - "12217919828955713195296074461355704099077575572697456698819124326004773727872", - "3325213248998312940839248497345183525722445334666772759713346022206056968778" - ], - "params": { - "maxNbElementsToHash": 10, - "seed": 5, - "logTwoDegree": 2, - "logTwoBound": 16 - } - }, - { - "expected": [ - "1049896500426401685945922049369234922485914665192048996511968651754212430847", - "18925053839250027789719219571375251204394727876911726560469458538529452247978", - "9323628684818709197255012865135973461462156185665546071174101521851226715555", - "16161396862971423997723820824078836309904422899378926199292781195549690434491" - ], - "params": { - "maxNbElementsToHash": 10, - "seed": 5, - "logTwoDegree": 2, - "logTwoBound": 32 - } - } - ] -} \ No newline at end of file diff --git a/ecc/bn254/fr/tensor-commitment/commitment.go b/ecc/bn254/fr/tensor-commitment/commitment.go deleted file mode 100644 index 65542c5f40..0000000000 --- a/ecc/bn254/fr/tensor-commitment/commitment.go +++ /dev/null @@ -1,475 +0,0 @@ -// Copyright 2020-2024 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -package tensorcommitment - -import ( - "bytes" - "errors" - "hash" - "math/big" - - "github.com/consensys/gnark-crypto/ecc/bn254/fr" - "github.com/consensys/gnark-crypto/ecc/bn254/fr/fft" - "github.com/consensys/gnark-crypto/internal/parallel" -) - -var ( - ErrWrongSize = errors.New("polynomial is too large") - ErrNotSquare = errors.New("the size of the polynomial must be a square") - ErrProofFailedHash = errors.New("hash of one of the columns is wrong") - ErrProofFailedEncoding = errors.New("inconsistency with the code word") - ErrProofFailedOob = errors.New("the entry is out of bound") - ErrMaxNbColumns = errors.New("the state is full") - ErrCommitmentNotDone = errors.New("the proof cannot be built before the computation of the digest") -) - -// commitment (TODO Merkle tree for that...) -// The i-th entry is the hash of the i-th columns of P, -// where P is written as a matrix √(m) x √(m) -// (m = len(P)), and the ij-th entry of M is p[m*j + i]. -type Digest [][]byte - -// Proof that a commitment is correct -// cf https://eprint.iacr.org/2021/1043.pdf page 10 -type Proof struct { - - // list of entries of ̂{u} to query (see https://eprint.iacr.org/2021/1043.pdf for notations) - EntryList []int - - // columns on against which the linear combination is checked - // (the i-th entry is the EntryList[i]-th column) - Columns [][]fr.Element - - // Linear combination of the rows of the polynomial P written as a square matrix - LinearCombination []fr.Element - - // small domain, to retrieve the canonical form of the linear combination - Domain *fft.Domain - - // root of unity of the big domain - Generator fr.Element -} - -// TcParams stores the public parameters of the tensor commitment -type TcParams struct { - // NbColumns number of columns of the matrix storing the polynomials. The total size of - // the polynomials which are committed is NbColumns x NbRows. - // The Number of columns is a power of 2, it corresponds to the original size of the codewords - // of the Reed Solomon code. - NbColumns int - - // NbRows number of rows of the matrix storing the polynomials. If a polynomial p is appended - // whose size if not 0 mod NbRows, it is padded as p' so that len(p')=0 mod NbRows. - NbRows int - - // Domains[1] used for the Reed Solomon encoding - Domains [2]*fft.Domain - - // Rho⁻¹, rate of the RS code ( > 1) - Rho int - - // Function that returns a fresh hasher. The returned hash function is used for hashing the - // columns. We use this and not directly a hasher for threadsafety hasher. Indeed, if different - // thread share the same hasher, they will end up mixing hash inputs that should remain separate. - MakeHash func() hash.Hash -} - -// TensorCommitment stores the data to use a tensor commitment -type TensorCommitment struct { - // The public parameters of the tensor commitment - params *TcParams - - // State contains the polynomials that have been appended so far. - // when we append a polynomial p, it is stored in the state like this: - // state[i][j] = p[j*nbRows + i]: - // p[0] | p[nbRows] | p[2*nbRows] ... - // p[1] | p[nbRows+1] | p[2*nbRows+1] - // p[2] | p[nbRows+2] | p[2*nbRows+2] - // .. - // p[nbRows-1] | p[2*nbRows-1] | p[3*nbRows-1] .. - State [][]fr.Element - - // same content as state, but the polynomials are displayed as a matrix - // and the rows are encoded. - // encodedState = encodeRows(M_0 || .. || M_n) - // where M_i is the i-th polynomial laid out as a matrix, that is - // M_i_jk = p_i[i*m+j] where m = \sqrt(len(p)). - EncodedState [][]fr.Element - - // boolean telling if the commitment has already been done. - // The method BuildProof cannot be called before Commit(), - // because it would allow to build a proof before giving the commitment - // to a verifier, making the workflow not secure. - isCommitted bool - - // number of columns which have already been hashed (atomic) - NbColumnsHashed int - - // counts the number of time `Append` was called (atomic). - NbAppendsSoFar int -} - -// NewTensorCommitment returns a new TensorCommitment -// * ρ rate of the code ( > 1) -// * size size of the polynomial to be committed. The size of the commitment is -// then ρ * √(m) where m² = size -func NewTCParams(codeRate, NbColumns, NbRows int, makeHash func() hash.Hash) (*TcParams, error) { - var res TcParams - - // domain[0]: domain to perform the FFT^-1, of size capacity * sqrt - // domain[1]: domain to perform FFT, of size rho * capacity * sqrt - res.Domains[0] = fft.NewDomain(uint64(NbColumns)) - res.Domains[1] = fft.NewDomain(uint64(codeRate * NbColumns)) - - // size of the matrix - res.NbColumns = int(res.Domains[0].Cardinality) - res.NbRows = NbRows - - // rate - res.Rho = codeRate - - // Hash function - res.MakeHash = makeHash - - return &res, nil -} - -// Initializes an instance of tensor commitment that we can use start -// appending value into it -func NewTensorCommitment(params *TcParams) *TensorCommitment { - var res TensorCommitment - - // create the state. It's the matrix containing the polynomials, the ij-th - // entry of the matrix is state[i][j]. The polynomials are split and stacked - // columns per column. - res.State = make([][]fr.Element, params.NbRows) - for i := 0; i < params.NbRows; i++ { - res.State[i] = make([]fr.Element, params.NbColumns) - } - - // nothing has been committed... - res.isCommitted = false - res.params = params - return &res -} - -// Append appends p to the state. -// when we append a polynomial p, it is stored in the state like this: -// state[i][j] = p[j*nbRows + i]: -// p[0] | p[nbRows] | p[2*nbRows] ... -// p[1] | p[nbRows+1] | p[2*nbRows+1] -// p[2] | p[nbRows+2] | p[2*nbRows+2] -// .. -// p[nbRows-1] | p[2*nbRows-1] | p[3*nbRows-1] .. -// If p doesn't fill a full submatrix it is padded with zeroes. -func (tc *TensorCommitment) Append(ps ...[]fr.Element) ([][]byte, error) { - - nbColumnsTakenByPs := make([]int, len(ps)) - totalNumberOfColumnsTakenByPs := 0 - // Short-hand to avoid writing `tc.params.NbRows` all over the places - numRows := tc.params.NbRows - - /* - Precomputes the number of columns that will be taken by each colums - */ - for iPol, p := range ps { - // check if there is some room for p - nbColumnsTakenByP := len(p) / numRows - // Note, Alex. Really, if you want to not handle the padding and just - // panic whenever you receive "incomplete" columns this would be fine. - if len(p)%numRows != 0 { - // If the division has a remainder. Add an extra column - // Implicitly, it will be padded - nbColumnsTakenByP += 1 - } - - nbColumnsTakenByPs[iPol] = nbColumnsTakenByP - totalNumberOfColumnsTakenByPs += nbColumnsTakenByP - } - - // Position at which we need to start inserting columns in the state - currentColumnToFill := int(tc.NbColumnsHashed) - - // Check that we are not inserting more columns that we can handle - if currentColumnToFill+totalNumberOfColumnsTakenByPs > tc.params.NbColumns { - return nil, ErrMaxNbColumns - } - - // Update the internal state variables to keep track of how many poly - // have been appended so far and how many columns. - tc.NbAppendsSoFar += len(ps) - tc.NbColumnsHashed += totalNumberOfColumnsTakenByPs - - backupCurrentColumnToFill := currentColumnToFill - - // put p in the state - for iPol, p := range ps { - - pIsPadded := false - if len(p)%numRows != 0 { - pIsPadded = true - } - - // Number of column taken by P, ignoring the last one if it is padded - nbFullColumnsTakenByP := nbColumnsTakenByPs[iPol] - if pIsPadded { - nbFullColumnsTakenByP-- - } - - // Insert the "full columns" in the state - for i := 0; i < nbFullColumnsTakenByP; i++ { - for j := 0; j < numRows; j++ { - tc.State[j][currentColumnToFill+i] = p[i*numRows+j] - } - } - - // Insert the padded column in the state if any - currentColumnToFill += nbFullColumnsTakenByP - if pIsPadded { - offsetP := len(p) - len(p)%numRows - for j := offsetP; j < len(p); j++ { - tc.State[j-offsetP][currentColumnToFill] = p[j] - } - currentColumnToFill += 1 - } - } - - // Preallocate the result, and as well a buffer for the columns to hash - res := make([][]byte, totalNumberOfColumnsTakenByPs) - - parallel.Execute(totalNumberOfColumnsTakenByPs, func(start, stop int) { - hasher := tc.params.MakeHash() - for i := start; i < stop; i++ { - hasher.Reset() - for j := 0; j < tc.params.NbRows; j++ { - hasher.Write(tc.State[j][i+backupCurrentColumnToFill].Marshal()) - } - res[i] = hasher.Sum(nil) - } - }) - - return res, nil -} - -// Commit to p. The commitment procedure is the following: -// * Encode the rows of the state to get M' -// * Hash the columns of M' -func (tc *TensorCommitment) Commit() (Digest, error) { - - // we encode the rows of p using Reed Solomon - // encodedState[i][:] = i-th line of M. It is of size domain[1].Cardinality - tc.EncodedState = make([][]fr.Element, tc.params.NbRows) - for i := 0; i < tc.params.NbRows; i++ { // we fill encodedState line by line - tc.EncodedState[i] = make([]fr.Element, tc.params.Domains[1].Cardinality) // size = NbRows*rho*capacity - for j := 0; j < tc.params.NbColumns; j++ { // for each polynomial - tc.EncodedState[i][j].Set(&tc.State[i][j]) - } - tc.params.Domains[0].FFTInverse(tc.EncodedState[i][:tc.params.Domains[0].Cardinality], fft.DIF) - fft.BitReverse(tc.EncodedState[i][:tc.params.Domains[0].Cardinality]) - tc.params.Domains[1].FFT(tc.EncodedState[i], fft.DIF) - fft.BitReverse(tc.EncodedState[i]) - } - - // now we hash each columns of _p - res := make([][]byte, tc.params.Domains[1].Cardinality) - - parallel.Execute(int(tc.params.Domains[1].Cardinality), func(start, stop int) { - hasher := tc.params.MakeHash() - for i := start; i < stop; i++ { - hasher.Reset() - for j := 0; j < tc.params.NbRows; j++ { - hasher.Write(tc.EncodedState[j][i].Marshal()) - } - res[i] = hasher.Sum(nil) - } - }) - - // records that the commitment has been built - tc.isCommitted = true - - return res, nil - -} - -// BuildProofAtOnceForTest builds a proof to be tested against a previous commitment of a list of -// polynomials. -// * l the random linear coefficients used for the linear combination of size NbRows -// * entryList list of columns to hash -// l and entryList are supposed to be precomputed using Fiat Shamir -// -// The proof is the linear combination (using l) of the encoded rows of p written -// as a matrix. Only the entries contained in entryList are kept. -func (tc *TensorCommitment) BuildProofAtOnceForTest(l []fr.Element, entryList []int) (Proof, error) { - linComb, err := tc.ProverComputeLinComb(l) - if err != nil { - return Proof{}, err - } - - openedColumns, err := tc.ProverOpenColumns(entryList) - if err != nil { - return Proof{}, err - } - - return BuildProof(tc.params, linComb, entryList, openedColumns), nil -} - -// func printVector(v []fr.Element) { -// fmt.Printf("[") -// for i := 0; i < len(v); i++ { -// fmt.Printf("%s,", v[i].String()) -// } -// fmt.Printf("]\n") -// } - -// BuildProof builds a proof to be tested against a previous commitment of a list of -// polynomials. -// * l the random linear coefficients used for the linear combination of size NbRows -// * entryList list of columns to hash -// l and entryList are supposed to be precomputed using Fiat Shamir -// -// The proof is the linear combination (using l) of the encoded rows of p written -// as a matrix. Only the entries contained in entryList are kept. -func (tc *TensorCommitment) ProverComputeLinComb(l []fr.Element) ([]fr.Element, error) { - - // check that the digest has been computed - if !tc.isCommitted { - return []fr.Element{}, ErrCommitmentNotDone - } - - // since the digest has been computed, the encodedState is already stored. - // We use it to build the proof, without recomputing the ffts. - - // linear combination of the rows of the state - linComb := make([]fr.Element, tc.params.NbColumns) - for i := 0; i < tc.params.NbColumns; i++ { - var tmp fr.Element - for j := 0; j < tc.params.NbRows; j++ { - tmp.Mul(&tc.State[j][i], &l[j]) - linComb[i].Add(&linComb[i], &tmp) - } - } - - return linComb, nil -} - -func (tc *TensorCommitment) ProverOpenColumns(entryList []int) ([][]fr.Element, error) { - - // check that the digest has been computed - if !tc.isCommitted { - return [][]fr.Element{}, ErrCommitmentNotDone - } - - // columns of the state whose rows have been encoded, written as a matrix, - // corresponding to the indices in entryList (we will select the columns - // entryList[0], entryList[1], etc. - openedColumns := make([][]fr.Element, len(entryList)) - for i := 0; i < len(entryList); i++ { // for each column (corresponding to an elmt in entryList) - openedColumns[i] = make([]fr.Element, tc.params.NbRows) - for j := 0; j < tc.params.NbRows; j++ { - openedColumns[i][j] = tc.EncodedState[j][entryList[i]] - } - } - - return openedColumns, nil -} - -/* -Reconstruct the proof from the prover's outputs -*/ -func BuildProof(params *TcParams, linComb []fr.Element, entryList []int, openedCols [][]fr.Element) Proof { - - var res Proof - - // small domain to express the linear combination in canonical form - res.Domain = params.Domains[0] - - // generator g of the biggest domain, used to evaluate the canonical form of - // the linear combination at some powers of g. - res.Generator.Set(¶ms.Domains[1].Generator) - - res.Columns = openedCols - res.EntryList = entryList - res.LinearCombination = linComb - - return res -} - -// evalAtPower returns p(x**n) where p is interpreted as a polynomial -// p[0] + p[1]X + .. p[len(p)-1]xˡᵉⁿ⁽ᵖ⁾⁻¹ -func evalAtPower(p []fr.Element, x fr.Element, n int) fr.Element { - - var xexp fr.Element - xexp.Exp(x, big.NewInt(int64(n))) - - var res fr.Element - for i := 0; i < len(p); i++ { - res.Mul(&res, &xexp) - res.Add(&p[len(p)-1-i], &res) - } - - return res - -} - -// Verify a proof that digest is the hash of a polynomial given a proof -// proof: contains the linear combination of the non-encoded rows + the -// digest: hash of the polynomial -// l: random coefficients for the linear combination, chosen by the verifier -// h: hash function that is used for hashing the columns of the polynomial -// TODO make this function private and add a Verify function that derives -// the randomness using Fiat Shamir -// -// Note (alex), A more convenient API would be to expose two functions, -// one that does FS for you and what that let you do it for yourself. And likewise -// for the prover. -func Verify(proof Proof, digest Digest, l []fr.Element, h hash.Hash) error { - - // for each entry in the list -> it corresponds to the sampling - // set on which we probabilistically check that - // Encoded(linear_combination) = linear_combination(encoded) - for i := 0; i < len(proof.EntryList); i++ { - - // check that the hash of the columns correspond to what's in the digest - h.Reset() - for j := 0; j < len(proof.Columns[i]); j++ { - h.Write(proof.Columns[i][j].Marshal()) - } - s := h.Sum(nil) - if !bytes.Equal(s, digest[proof.EntryList[i]]) { - return ErrProofFailedHash - } - - if proof.EntryList[i] >= len(digest) { - return ErrProofFailedOob - } - - // linear combination of the i-th column, whose entries - // are the entryList[i]-th entries of the encoded lines - // of p - var linCombEncoded, tmp fr.Element - for j := 0; j < len(proof.Columns[i]); j++ { - - // linear combination of the encoded rows at column i - tmp.Mul(&proof.Columns[i][j], &l[j]) - linCombEncoded.Add(&linCombEncoded, &tmp) - } - - // entry i of the encoded linear combination - var encodedLinComb fr.Element - linCombCanonical := make([]fr.Element, proof.Domain.Cardinality) - copy(linCombCanonical, proof.LinearCombination) - proof.Domain.FFTInverse(linCombCanonical, fft.DIF) - fft.BitReverse(linCombCanonical) - encodedLinComb = evalAtPower(linCombCanonical, proof.Generator, proof.EntryList[i]) - - // compare both values - if !encodedLinComb.Equal(&linCombEncoded) { - return ErrProofFailedEncoding - - } - } - - return nil - -} diff --git a/ecc/bn254/fr/tensor-commitment/commitment_test.go b/ecc/bn254/fr/tensor-commitment/commitment_test.go deleted file mode 100644 index fa5c43299a..0000000000 --- a/ecc/bn254/fr/tensor-commitment/commitment_test.go +++ /dev/null @@ -1,509 +0,0 @@ -// Copyright 2020-2024 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -package tensorcommitment - -import ( - "bytes" - "hash" - "math/big" - "math/bits" - "strconv" - "testing" - - "github.com/consensys/gnark-crypto/ecc/bn254/fr" - "github.com/consensys/gnark-crypto/ecc/bn254/fr/sis" - "github.com/stretchr/testify/require" -) - -type DummyHash uint - -func (d DummyHash) Write(p []byte) (n int, err error) { - return 0, nil -} - -func (d DummyHash) Sum(b []byte) []byte { - return b -} - -func (d DummyHash) Reset() {} - -func (d DummyHash) Size() int { - return 0 -} - -func (d DummyHash) BlockSize() int { - return 0 -} - -func DummyHashMaker() hash.Hash { - var res DummyHash - return &res -} - -func TestAppend(t *testing.T) { - if bits.UintSize == 32 { - t.Skip("skipping this test in 32bit.") - } - - assert := require.New(t) - - // tensor commitment - const ( - rho = 4 - nbRows = 10 - nbColumns = 16 - ) - params, err := NewTCParams(rho, nbColumns, nbRows, DummyHashMaker) - assert.NoError(err) - - tc := NewTensorCommitment(params) - - { - // random Polynomial of size nbRows - p := make([]fr.Element, nbRows) - for i := 0; i < nbRows; i++ { - p[i].SetRandom() - } - _, err := tc.Append(p) - assert.NoError(err) - - // check if p corresponds to the first column of the state - for i := 0; i < nbRows; i++ { - assert.True(tc.State[i][0].Equal(&p[i]), "a column is not filled correctly") - } - - } - - // after a first polynomial has been filled - { - // random Polynomial of size nbRows - p := make([]fr.Element, nbRows) - for i := 0; i < nbRows; i++ { - p[i].SetRandom() - } - _, err := tc.Append(p) - assert.NoError(err) - - // check if p corresponds to the second column of the state - for i := 0; i < nbRows; i++ { - assert.True(tc.State[i][1].Equal(&p[i]), "a column is not filled correctly") - } - } - - // polynomial whose size is not a multiple of nbRows - { - // random Polynomial of size nbRows - offset := 4 - p := make([]fr.Element, nbRows+offset) - for i := 0; i < nbRows+offset; i++ { - p[i].SetRandom() - } - _, err := tc.Append(p) - assert.NoError(err) - - // check if p corresponds to the first column of the state - for i := 0; i < nbRows; i++ { - assert.True(tc.State[i][2].Equal(&p[i]), "a column is not filled correctly") - } - for i := 0; i < offset; i++ { - assert.True(tc.State[i][3].Equal(&p[i+nbRows]), "a column is not filled correctly") - } - } - - // same to see if the last column was correctly offset - { - // random Polynomial of size nbRows - offset := 4 - p := make([]fr.Element, nbRows+offset) - for i := 0; i < nbRows+offset; i++ { - p[i].SetRandom() - } - _, err := tc.Append(p) - assert.NoError(err) - - // check if p corresponds to the first column of the state - for i := 0; i < nbRows; i++ { - assert.True(tc.State[i][4].Equal(&p[i]), "a column is not filled correctly") - } - for i := 0; i < offset; i++ { - assert.True(tc.State[i][5].Equal(&p[i+nbRows]), "a column is not filled correctly") - } - } - -} - -func TestLinearCombination(t *testing.T) { - - rho := 4 - nbRows := 8 - nbColumns := 8 - params, err := NewTCParams(rho, nbColumns, nbRows, DummyHashMaker) - if err != nil { - t.Fatal(err) - } - tc := NewTensorCommitment(params) - - // build a random polynomial - p := make([]fr.Element, nbRows*nbColumns) - for i := 0; i < 64; i++ { - p[i].SetRandom() - } - - // we select all the entries for the test - entryList := make([]int, rho*nbColumns) - for i := 0; i < rho*nbColumns; i++ { - entryList[i] = i - } - - // append p and commit (otherwise the proof cannot be built) - tc.Append(p) - _, err = tc.Commit() - if err != nil { - t.Fatal(err) - } - - // at each trial, it's the i-th line which is selected - for i := 0; i < nbRows; i++ { - - // used for the random linear combination. - // it will act as a selector for the test: it selects the i-th - // row of p, when p is written as a matrix M_ij, where M_ij=p[i*m+j]. - // The i-th entry of l is 1, the others are 0. - l := make([]fr.Element, nbRows) - l[i].SetInt64(1) - - proof, err := tc.BuildProofAtOnceForTest(l, entryList) - if err != nil { - t.Fatal(err) - } - - // the i-th line of p is the one that is supposed to be selected - // (corresponding to the linear combination) - expected := make([]fr.Element, nbColumns) - for j := 0; j < nbColumns; j++ { - expected[j].Set(&p[j*nbRows+i]) - } - - for j := 0; j < nbColumns; j++ { - if !expected[j].Equal(&proof.LinearCombination[j]) { - t.Fatal("expected linear combination is incorrect") - } - } - - } -} - -// Test the verification of a correct proof using a mock hash -func TestCommitmentDummyHash(t *testing.T) { - - var rho, nbColumns, nbRows int - rho = 4 - nbColumns = 8 - nbRows = 8 - - var h DummyHash - params, err := NewTCParams(rho, nbColumns, nbRows, DummyHashMaker) - if err != nil { - t.Fatal(err) - } - tc := NewTensorCommitment(params) - - // random polynomial - p := make([]fr.Element, nbRows*nbColumns) - for i := 0; i < nbRows*nbColumns; i++ { - p[i].SetRandom() - } - - // coefficients for the linear combination - l := make([]fr.Element, nbRows) - for i := 0; i < nbRows; i++ { - l[i].SetRandom() - } - - // we select all the entries for the test - entryList := make([]int, rho*nbColumns) - for i := 0; i < rho*nbColumns; i++ { - entryList[i] = i - } - - // compute the digest... - _, err = tc.Append(p) - if err != nil { - t.Fatal(err) - } - digest, err := tc.Commit() - if err != nil { - t.Fatal(err) - } - - // build the proof... - proof, err := tc.BuildProofAtOnceForTest(l, entryList) - if err != nil { - t.Fatal(err) - } - - // verify that the proof is correct - err = Verify(proof, digest, l, h) - if err != nil { - t.Fatal(err) - } - -} - -// Test the opening using a dummy hash -func TestOpeningDummyHash(t *testing.T) { - - var rho, nbColumns, nbRows int - rho = 4 - nbColumns = 8 - nbRows = 8 - - params, err := NewTCParams(rho, nbColumns, nbRows, DummyHashMaker) - if err != nil { - t.Fatal(err) - } - tc := NewTensorCommitment(params) - - // random polynomial - p := make([]fr.Element, nbColumns*nbRows) - for i := 0; i < nbColumns*nbRows; i++ { - p[i].SetRandom() - } - - // the coefficients are (1,x,x^2,..,x^{n-1}) where x is the point - // at which the opening is done - var xm, x fr.Element - x.SetRandom() - hi := make([]fr.Element, nbColumns) // stores [1,x^{nbRows},..,x^{nbRows*nbColumns^-1}] - lo := make([]fr.Element, nbRows) // stores [1,x,..,x^{nbRows-1}] - lo[0].SetInt64(1) - hi[0].SetInt64(1) - xm.Exp(x, big.NewInt(int64(nbRows))) - for i := 1; i < nbColumns; i++ { - lo[i].Mul(&lo[i-1], &x) - hi[i].Mul(&hi[i-1], &xm) - } - - // create the digest before computing the proof - _, err = tc.Append(p) - if err != nil { - t.Fatal(err) - } - _, err = tc.Commit() - if err != nil { - t.Fatal(err) - } - - // build the proof - entryList := make([]int, rho*nbColumns) - for i := 0; i < rho*nbColumns; i++ { - entryList[i] = i - } - proof, err := tc.BuildProofAtOnceForTest(lo, entryList) - if err != nil { - t.Fatal(err) - } - - // finish the evaluation by computing - // [linearCombination] * [hi]^t - var eval, tmp fr.Element - for i := 0; i < nbColumns; i++ { - tmp.Mul(&proof.LinearCombination[i], &hi[i]) - eval.Add(&eval, &tmp) - } - - // compute the real evaluation of p at x manually - var expectedEval fr.Element - for i := 0; i < nbRows*nbColumns; i++ { - expectedEval.Mul(&expectedEval, &x) - expectedEval.Add(&expectedEval, &p[len(p)-i-1]) - } - - // the results coincide - if !expectedEval.Equal(&eval) { - t.Fatal("p(x) != [ lo ] x M x [ hi ]^t") - } - -} - -// Check the commitments are correctly formed when appending a polynomial -func TestAppendSis(t *testing.T) { - if bits.UintSize == 32 { - t.Skip("skipping this test in 32bit.") - } - const ( - rho = 4 - nbColumns = 8 - nbRows = 8 - logTwoDegree = 1 - logTwoBound = 4 - ) - - assert := require.New(t) - - // keySize := 256 - hMaker, err := sis.NewRingSISMaker(5, logTwoDegree, logTwoBound, 8) - assert.NoError(err) - - params, err := NewTCParams(rho, nbColumns, nbRows, hMaker) - assert.NoError(err) - - tc := NewTensorCommitment(params) - - // random polynomial (that does not fill the full matrix) - offset := 4 - p := make([]fr.Element, nbRows*nbColumns-offset) - for i := 0; i < nbRows*nbColumns-offset; i++ { - p[i].SetRandom() - } - - s, err := tc.Append(p) - assert.NoError(err) - - assert.Equal(nbColumns, len(s)) - - // check the hashes of the columns - h := hMaker() - for i := 0; i < nbColumns-1; i++ { - h.Reset() - for j := 0; j < nbRows; j++ { - h.Write(p[i*nbRows+j].Marshal()) - } - _s := h.Sum(nil) - assert.True(bytes.Equal(_s, s[i]), "error hash column when appending a polynomial for column", i) - } - - // last column - h.Reset() - for i := (nbColumns - 1) * nbRows; i < nbColumns*nbRows-offset; i++ { - h.Write(p[i].Marshal()) - } - var tmp fr.Element - for i := nbColumns*nbRows - offset; i < nbColumns*nbRows; i++ { - h.Write(tmp.Marshal()) - } - _s := h.Sum(nil) - assert.True(bytes.Equal(_s, s[nbColumns-1]), "error hash column when appending a polynomial") -} - -// Test the verification of a correct proof using SIS as hash -func TestCommitmentSis(t *testing.T) { - if bits.UintSize == 32 { - t.Skip("skipping this test in 32bit.") - } - var rho, nbColumns, nbRows int - rho = 4 - nbColumns = 8 - nbRows = 8 - - logTwoDegree := 1 - logTwoBound := 4 - hMaker, err := sis.NewRingSISMaker(5, logTwoDegree, logTwoBound, 8) - if err != nil { - t.Fatal(err) - } - - params, err := NewTCParams(rho, nbColumns, nbRows, hMaker) - if err != nil { - t.Fatal(err) - } - tc := NewTensorCommitment(params) - - // random polynomial - p := make([]fr.Element, nbRows*nbColumns) - for i := 0; i < nbRows*nbColumns; i++ { - p[i].SetRandom() - } - - // coefficients for the linear combination - l := make([]fr.Element, nbRows) - for i := 0; i < nbRows; i++ { - l[i].SetRandom() - } - - // compute the digest... - _, err = tc.Append(p) - if err != nil { - t.Fatal(err) - } - digest, err := tc.Commit() - if err != nil { - t.Fatal(err) - } - - // test 1: we select all the entries - { - entryList := make([]int, rho*nbColumns) - for i := 0; i < rho*nbColumns; i++ { - entryList[i] = i - } - - // build the proof... - proof, err := tc.BuildProofAtOnceForTest(l, entryList) - if err != nil { - t.Fatal(err) - } - - // verify that the proof is correct - err = Verify(proof, digest, l, hMaker()) - if err != nil { - t.Fatal(err) - } - } - // test 2: we select a subset of the entries - { - - entryList := make([]int, 2) - entryList[0] = 1 - entryList[1] = 4 - - // build the proof... - proof, err := tc.BuildProofAtOnceForTest(l, entryList) - if err != nil { - t.Fatal(err) - } - - // verify that the proof is correct - err = Verify(proof, digest, l, hMaker()) - if err != nil { - t.Fatal(err) - } - } -} - -// benches -func BenchmarkTensorCommitment(b *testing.B) { - - // prepare the tensor commitment - logTwoDegree := 4 - logTwoBound := 4 - rho := 4 - - for i := 0; i < 6; i++ { - - nbColumns := (1 << (3 + i)) - nbRows := nbColumns - - h, _ := sis.NewRingSISMaker(5, logTwoDegree, logTwoBound, nbRows) - params, _ := NewTCParams(rho, nbColumns, nbRows, h) - tc := NewTensorCommitment(params) - - // random polynomial - p := make([]fr.Element, nbRows*nbColumns) - for i := 0; i < nbRows*nbColumns; i++ { - p[i].SetRandom() - } - - // run the benchmark - b.Run("size poly"+strconv.Itoa(nbRows*nbColumns), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - tc.Append(p) - tc.Commit() - } - }) - - } - -} diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index d0d0ae505d..70e25c05e5 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -9,7 +9,6 @@ import ( "bytes" "encoding/binary" "errors" - "hash" "math/bits" "github.com/bits-and-blooms/bitset" @@ -229,20 +228,6 @@ func (r *RSis) BlockSize() int { return 0 } -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - func genRandom(seed, i, j int64, buf *bytes.Buffer) babybear.Element { buf.Reset() diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 8f40014ff6..4f405ec356 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -2,7 +2,6 @@ import ( "bytes" "encoding/binary" "errors" - "hash" "math/bits" "github.com/bits-and-blooms/bitset" @@ -222,20 +221,6 @@ func (r *RSis) BlockSize() int { return 0 } -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - func genRandom(seed, i, j int64, buf *bytes.Buffer) {{ .FF }}.Element { buf.Reset() diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index fc773e91e3..7936fec96c 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -9,7 +9,6 @@ import ( "bytes" "encoding/binary" "errors" - "hash" "math/bits" "github.com/bits-and-blooms/bitset" @@ -229,20 +228,6 @@ func (r *RSis) BlockSize() int { return 0 } -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - func genRandom(seed, i, j int64, buf *bytes.Buffer) goldilocks.Element { buf.Reset() diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 81a312f22e..54383e5f2e 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -9,7 +9,6 @@ import ( "bytes" "encoding/binary" "errors" - "hash" "math/bits" "github.com/bits-and-blooms/bitset" @@ -229,20 +228,6 @@ func (r *RSis) BlockSize() int { return 0 } -// Construct a hasher generator. It takes as input the same parameters -// as `NewRingSIS` and outputs a function which returns fresh hasher -// everytime it is called -func NewRingSISMaker(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (func() hash.Hash, error) { - return func() hash.Hash { - h, err := NewRSis(seed, logTwoDegree, logTwoBound, maxNbElementsToHash) - if err != nil { - panic(err) - } - return h - }, nil - -} - func genRandom(seed, i, j int64, buf *bytes.Buffer) koalabear.Element { buf.Reset() From 92d9dbe05365ad7dadd4adbef2d9b1e5170c7db8 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 10:42:12 -0600 Subject: [PATCH 02/25] refactor: kill bufRes --- ecc/bls12-377/fr/sis/sis.go | 154 +++++++----------- ecc/bls12-377/fr/sis/sis_test.go | 81 +-------- field/babybear/sis/sis.go | 154 +++++++----------- field/babybear/sis/sis_test.go | 81 +-------- .../internal/templates/sis/sis.go.tmpl | 151 +++++++---------- .../internal/templates/sis/sis.test.go.tmpl | 81 +-------- field/goldilocks/sis/sis.go | 154 +++++++----------- field/goldilocks/sis/sis_test.go | 81 +-------- field/koalabear/sis/sis.go | 154 +++++++----------- field/koalabear/sis/sis_test.go | 81 +-------- 10 files changed, 334 insertions(+), 838 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index d482377e8f..c83503e129 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -9,6 +9,7 @@ import ( "bytes" "encoding/binary" "errors" + "fmt" "math/bits" "github.com/bits-and-blooms/bitset" @@ -52,8 +53,8 @@ type RSis struct { maxNbElementsToHash int // allocate memory once per instance (used in Sum()) - bufM, bufRes fr.Vector - bufMValues *bitset.BitSet + bufM fr.Vector + bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -112,7 +113,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R A: make([][]fr.Element, n), Ag: make([][]fr.Element, n), bufM: make(fr.Vector, degree*n), - bufRes: make(fr.Vector, degree), bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } @@ -144,88 +144,76 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []fr.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") +// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, +// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 +// +// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); +// ! note @gbotrel: this is a place holder, may not make sense +func (r *RSis) Hash(v, res []fr.Element) error { + if len(res) != r.Degree { + return fmt.Errorf("output vector must have length %d", r.Degree) + } + // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + if len(v) > r.maxNbElementsToHash { + return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues + // reset the buffer + r.buffer.Reset() - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (fr.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + // write the elements to the buffer + // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. + for _, e := range v { + r.buffer.Write(e.Marshal()) } - // we can hash now. - res := r.bufRes + { + // previous Sum() - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue + buf := r.buffer.Bytes() + if len(buf) > r.capacity { + panic("buffer too large") } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} - -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} + // clear the buffers of the instance. + defer r.cleanupBuffers() -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { + m := r.bufM + mValues := r.bufMValues - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * fr.Modulus().BitLen() / 8 + if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { + limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else if r.LogTwoBound >= 8 && (fr.Bytes*8)%r.LogTwoBound == 0 { + limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else { + limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + } - return totalSize -} + // method 1: fft + for i := 0; i < len(r.Ag); i++ { + if !mValues.Test(uint(i)) { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue + } + k := m[i*r.Degree : (i+1)*r.Degree] + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) + } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 + return nil + } } func genRandom(seed, i, j int64, buf *bytes.Buffer) fr.Element { @@ -244,26 +232,12 @@ func genRandom(seed, i, j int64, buf *bytes.Buffer) fr.Element { return res } -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. +// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []fr.Element) []fr.Element { - - res := make([]fr.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. +// then accumulates the mulMod result in res. func mulModAcc(res []fr.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []fr.Element) { var t fr.Element for i := 0; i < len(pLagrangeCosetBitReversed); i++ { @@ -281,7 +255,6 @@ func (r *RSis) CopyWithFreshBuffer() RSis { res.buffer = bytes.Buffer{} res.bufM = make(fr.Vector, len(r.bufM)) res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make(fr.Vector, len(r.bufRes)) return res } @@ -291,9 +264,6 @@ func (r *RSis) cleanupBuffers() { for i := 0; i < len(r.bufM); i++ { r.bufM[i].SetZero() } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } } // Split an slice of bytes representing an array of serialized field element in diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index f4292af51f..cd5b261536 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -8,10 +8,8 @@ package sis import ( "bytes" "crypto/rand" - "encoding/binary" "encoding/json" "fmt" - "io" "math/big" "math/bits" "os" @@ -75,10 +73,9 @@ func TestReference(t *testing.T) { // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) - sis.Reset() - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) + goHash := make([]fr.Element, 1< r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/fr.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result fr.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - func TestLimbDecompositionFastPath(t *testing.T) { assert := require.New(t) diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 70e25c05e5..a68bae1e68 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -9,6 +9,7 @@ import ( "bytes" "encoding/binary" "errors" + "fmt" "math/bits" "github.com/bits-and-blooms/bitset" @@ -52,8 +53,8 @@ type RSis struct { maxNbElementsToHash int // allocate memory once per instance (used in Sum()) - bufM, bufRes babybear.Vector - bufMValues *bitset.BitSet + bufM babybear.Vector + bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -112,7 +113,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R A: make([][]babybear.Element, n), Ag: make([][]babybear.Element, n), bufM: make(babybear.Vector, degree*n), - bufRes: make(babybear.Vector, degree), bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } @@ -144,88 +144,76 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []babybear.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") +// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, +// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 +// +// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); +// ! note @gbotrel: this is a place holder, may not make sense +func (r *RSis) Hash(v, res []babybear.Element) error { + if len(res) != r.Degree { + return fmt.Errorf("output vector must have length %d", r.Degree) + } + // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + if len(v) > r.maxNbElementsToHash { + return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues + // reset the buffer + r.buffer.Reset() - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (babybear.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + // write the elements to the buffer + // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. + for _, e := range v { + r.buffer.Write(e.Marshal()) } - // we can hash now. - res := r.bufRes + { + // previous Sum() - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue + buf := r.buffer.Bytes() + if len(buf) > r.capacity { + panic("buffer too large") } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} - -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} + // clear the buffers of the instance. + defer r.cleanupBuffers() -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { + m := r.bufM + mValues := r.bufMValues - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * babybear.Modulus().BitLen() / 8 + if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { + limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else if r.LogTwoBound >= 8 && (babybear.Bytes*8)%r.LogTwoBound == 0 { + limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else { + limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + } - return totalSize -} + // method 1: fft + for i := 0; i < len(r.Ag); i++ { + if !mValues.Test(uint(i)) { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue + } + k := m[i*r.Degree : (i+1)*r.Degree] + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) + } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 + return nil + } } func genRandom(seed, i, j int64, buf *bytes.Buffer) babybear.Element { @@ -244,26 +232,12 @@ func genRandom(seed, i, j int64, buf *bytes.Buffer) babybear.Element { return res } -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. +// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []babybear.Element) []babybear.Element { - - res := make([]babybear.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. +// then accumulates the mulMod result in res. func mulModAcc(res []babybear.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []babybear.Element) { var t babybear.Element for i := 0; i < len(pLagrangeCosetBitReversed); i++ { @@ -281,7 +255,6 @@ func (r *RSis) CopyWithFreshBuffer() RSis { res.buffer = bytes.Buffer{} res.bufM = make(babybear.Vector, len(r.bufM)) res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make(babybear.Vector, len(r.bufRes)) return res } @@ -291,9 +264,6 @@ func (r *RSis) cleanupBuffers() { for i := 0; i < len(r.bufM); i++ { r.bufM[i].SetZero() } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } } // Split an slice of bytes representing an array of serialized field element in diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 770c02f3e6..8e97db534e 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -8,10 +8,8 @@ package sis import ( "bytes" "crypto/rand" - "encoding/binary" "encoding/json" "fmt" - "io" "math/big" "math/bits" "os" @@ -75,10 +73,9 @@ func TestReference(t *testing.T) { // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) - sis.Reset() - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) + goHash := make([]babybear.Element, 1< r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/babybear.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result babybear.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - func TestLimbDecompositionFastPath(t *testing.T) { assert := require.New(t) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 4f405ec356..7c9def9431 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -3,6 +3,7 @@ import ( "encoding/binary" "errors" "math/bits" + "fmt" "github.com/bits-and-blooms/bitset" "{{ .FieldPackagePath }}" @@ -45,7 +46,7 @@ type RSis struct { maxNbElementsToHash int // allocate memory once per instance (used in Sum()) - bufM, bufRes {{ .FF }}.Vector + bufM {{ .FF }}.Vector bufMValues *bitset.BitSet } @@ -105,7 +106,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R A: make([][]{{ .FF }}.Element, n), Ag: make([][]{{ .FF }}.Element, n), bufM: make({{ .FF }}.Vector, degree*n), - bufRes: make({{ .FF }}.Vector, degree), bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } @@ -137,88 +137,77 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []{{ .FF }}.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") +// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, +// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 +// +// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); +// ! note @gbotrel: this is a place holder, may not make sense +func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { + if len(res) != r.Degree { + return fmt.Errorf("output vector must have length %d", r.Degree) + } + // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + if len(v) > r.maxNbElementsToHash { + return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues + // reset the buffer + r.buffer.Reset() - if r.LogTwoBound<8 && (8%r.LogTwoBound==0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound>=8 && ({{ .FF }}.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + // write the elements to the buffer + // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. + for _, e := range v { + r.buffer.Write(e.Marshal()) } - // we can hash now. - res := r.bufRes + { + // previous Sum() - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue + buf := r.buffer.Bytes() + if len(buf) > r.capacity { + panic("buffer too large") } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} + fastPath := r.LogTwoBound == 8 && r.Degree == 64 -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} + // clear the buffers of the instance. + defer r.cleanupBuffers() -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { + m := r.bufM + mValues := r.bufMValues - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * {{ .FF }}.Modulus().BitLen() / 8 + if r.LogTwoBound<8 && (8%r.LogTwoBound==0) { + limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else if r.LogTwoBound>=8 && ({{ .FF }}.Bytes*8)%r.LogTwoBound == 0 { + limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else { + limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + } - return totalSize -} + // method 1: fft + for i := 0; i < len(r.Ag); i++ { + if !mValues.Test(uint(i)) { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue + } + k := m[i*r.Degree : (i+1)*r.Degree] + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) + } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 + return nil + } } func genRandom(seed, i, j int64, buf *bytes.Buffer) {{ .FF }}.Element { @@ -237,26 +226,12 @@ func genRandom(seed, i, j int64, buf *bytes.Buffer) {{ .FF }}.Element { return res } -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. +// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []{{ .FF }}.Element) []{{ .FF }}.Element { - - res := make([]{{ .FF }}.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. +// then accumulates the mulMod result in res. func mulModAcc(res []{{ .FF }}.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []{{ .FF }}.Element) { var t {{ .FF }}.Element for i := 0; i < len(pLagrangeCosetBitReversed); i++ { @@ -274,7 +249,6 @@ func (r *RSis) CopyWithFreshBuffer() RSis { res.buffer = bytes.Buffer{} res.bufM = make({{ .FF }}.Vector, len(r.bufM)) res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make({{ .FF }}.Vector, len(r.bufRes)) return res } @@ -284,9 +258,6 @@ func (r *RSis) cleanupBuffers() { for i := 0; i < len(r.bufM); i++ { r.bufM[i].SetZero() } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } } // Split an slice of bytes representing an array of serialized field element in diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 3433c4b07c..777fd33cb2 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -1,10 +1,8 @@ import ( "bytes" "crypto/rand" - "encoding/binary" "encoding/json" "fmt" - "io" "math/bits" "os" "testing" @@ -68,10 +66,9 @@ func TestReference(t *testing.T) { // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) - sis.Reset() - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) + goHash := make([]{{ .FF }}.Element, 1 << testCase.Params.LogTwoDegree) + err = sis.Hash(inputs, goHash) assert.NoError(err) assert.EqualValues( @@ -187,50 +184,6 @@ func eval(p []{{ .FF }}.Element, x {{ .FF }}.Element) {{ .FF }}.Element { return res } -func TestMulMod(t *testing.T) { - - size := 4 - - p := make([]{{ .FF }}.Element, size) - q := make([]{{ .FF }}.Element, size) - pCopy := make([]{{ .FF }}.Element, size) - qCopy := make([]{{ .FF }}.Element, size) - for i := 0; i < size; i++ { - p[i].SetRandom() - pCopy[i].Set(&p[i]) - q[i].SetRandom() - qCopy[i].Set(&q[i]) - } - - // creation of the domain - shift, err := {{ .FF }}.Generator(uint64(2 * size)) - if err != nil { - t.Fatal(err) - } - var g {{ .FF }}.Element - g.Square(&shift) - domain := fft.NewDomain(uint64(size), fft.WithShift(shift)) - - // mul mod - domain.FFT(p, fft.DIF, fft.OnCoset()) - domain.FFT(q, fft.DIF, fft.OnCoset()) - r := mulMod(p, q) - domain.FFTInverse(r, fft.DIT, fft.OnCoset()) - - // manually check the product on the zeroes of X^4+1 - for i := 0; i < 4; i++ { - u := eval(pCopy, shift) - v := eval(qCopy, shift) - w := eval(r, shift) - u.Mul(&u, &v) - if !w.Equal(&u) { - t.Fatal("mul mol failed") - } - shift.Mul(&shift, &g) - } - -} - func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { t.Helper() // generate the key deterministically, the same way @@ -374,6 +327,8 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo b.Fatal(err) } + res := make([]{{ .FF }}.Element, 1 << logTwoDegree) + // We introduce a custom metric which is the time per field element // Since the benchmark object allows to report extra meta but does // not allow accessing them. We measure the time ourself. @@ -381,7 +336,7 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo startTime := time.Now() b.ResetTimer() for i := 0; i < b.N; i++ { - _, err = instance.Hash(input) + err = instance.Hash(input, res) if err != nil { b.Fatal(err) } @@ -398,32 +353,6 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo }) } -// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, -// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense -func (r *RSis) Hash(v []{{ .FF }}.Element) ([]{{ .FF }}.Element, error) { - if len(v) > r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/{{ .FF }}.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result {{ .FF }}.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - func TestLimbDecompositionFastPath(t *testing.T) { assert := require.New(t) diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 7936fec96c..aa0321d98c 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -9,6 +9,7 @@ import ( "bytes" "encoding/binary" "errors" + "fmt" "math/bits" "github.com/bits-and-blooms/bitset" @@ -52,8 +53,8 @@ type RSis struct { maxNbElementsToHash int // allocate memory once per instance (used in Sum()) - bufM, bufRes goldilocks.Vector - bufMValues *bitset.BitSet + bufM goldilocks.Vector + bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -112,7 +113,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R A: make([][]goldilocks.Element, n), Ag: make([][]goldilocks.Element, n), bufM: make(goldilocks.Vector, degree*n), - bufRes: make(goldilocks.Vector, degree), bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } @@ -144,88 +144,76 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []goldilocks.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") +// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, +// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 +// +// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); +// ! note @gbotrel: this is a place holder, may not make sense +func (r *RSis) Hash(v, res []goldilocks.Element) error { + if len(res) != r.Degree { + return fmt.Errorf("output vector must have length %d", r.Degree) + } + // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + if len(v) > r.maxNbElementsToHash { + return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues + // reset the buffer + r.buffer.Reset() - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (goldilocks.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + // write the elements to the buffer + // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. + for _, e := range v { + r.buffer.Write(e.Marshal()) } - // we can hash now. - res := r.bufRes + { + // previous Sum() - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue + buf := r.buffer.Bytes() + if len(buf) > r.capacity { + panic("buffer too large") } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} - -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} + // clear the buffers of the instance. + defer r.cleanupBuffers() -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { + m := r.bufM + mValues := r.bufMValues - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * goldilocks.Modulus().BitLen() / 8 + if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { + limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else if r.LogTwoBound >= 8 && (goldilocks.Bytes*8)%r.LogTwoBound == 0 { + limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else { + limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + } - return totalSize -} + // method 1: fft + for i := 0; i < len(r.Ag); i++ { + if !mValues.Test(uint(i)) { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue + } + k := m[i*r.Degree : (i+1)*r.Degree] + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) + } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 + return nil + } } func genRandom(seed, i, j int64, buf *bytes.Buffer) goldilocks.Element { @@ -244,26 +232,12 @@ func genRandom(seed, i, j int64, buf *bytes.Buffer) goldilocks.Element { return res } -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. +// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []goldilocks.Element) []goldilocks.Element { - - res := make([]goldilocks.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. +// then accumulates the mulMod result in res. func mulModAcc(res []goldilocks.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []goldilocks.Element) { var t goldilocks.Element for i := 0; i < len(pLagrangeCosetBitReversed); i++ { @@ -281,7 +255,6 @@ func (r *RSis) CopyWithFreshBuffer() RSis { res.buffer = bytes.Buffer{} res.bufM = make(goldilocks.Vector, len(r.bufM)) res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make(goldilocks.Vector, len(r.bufRes)) return res } @@ -291,9 +264,6 @@ func (r *RSis) cleanupBuffers() { for i := 0; i < len(r.bufM); i++ { r.bufM[i].SetZero() } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } } // Split an slice of bytes representing an array of serialized field element in diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index ad9b370a2e..8c931677d5 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -8,10 +8,8 @@ package sis import ( "bytes" "crypto/rand" - "encoding/binary" "encoding/json" "fmt" - "io" "math/big" "math/bits" "os" @@ -75,10 +73,9 @@ func TestReference(t *testing.T) { // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) - sis.Reset() - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) + goHash := make([]goldilocks.Element, 1< r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/goldilocks.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result goldilocks.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - func TestLimbDecompositionFastPath(t *testing.T) { assert := require.New(t) diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 54383e5f2e..ed10639ac9 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -9,6 +9,7 @@ import ( "bytes" "encoding/binary" "errors" + "fmt" "math/bits" "github.com/bits-and-blooms/bitset" @@ -52,8 +53,8 @@ type RSis struct { maxNbElementsToHash int // allocate memory once per instance (used in Sum()) - bufM, bufRes koalabear.Vector - bufMValues *bitset.BitSet + bufM koalabear.Vector + bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -112,7 +113,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R A: make([][]koalabear.Element, n), Ag: make([][]koalabear.Element, n), bufM: make(koalabear.Vector, degree*n), - bufRes: make(koalabear.Vector, degree), bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } @@ -144,88 +144,76 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } -func (r *RSis) Write(p []byte) (n int, err error) { - r.buffer.Write(p) - return len(p), nil -} - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -// The instance buffer is interpreted as a sequence of coefficients of size r.Bound bits long. -// The function returns the hash of the polynomial as a a sequence []koalabear.Elements, interpreted as []bytes, -// corresponding to sum_i A[i]*m Mod X^{d}+1 -func (r *RSis) Sum(b []byte) []byte { - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") +// Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, +// and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 +// +// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); +// ! note @gbotrel: this is a place holder, may not make sense +func (r *RSis) Hash(v, res []koalabear.Element) error { + if len(res) != r.Degree { + return fmt.Errorf("output vector must have length %d", r.Degree) + } + // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + if len(v) > r.maxNbElementsToHash { + return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues + // reset the buffer + r.buffer.Reset() - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (koalabear.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + // write the elements to the buffer + // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. + for _, e := range v { + r.buffer.Write(e.Marshal()) } - // we can hash now. - res := r.bufRes + { + // previous Sum() - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue + buf := r.buffer.Bytes() + if len(buf) > r.capacity { + panic("buffer too large") } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) - } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - resBytes, err := res.MarshalBinary() - if err != nil { - panic(err) - } + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - return append(b, resBytes[4:]...) // first 4 bytes are uint32(len(res)) -} - -// Reset resets the Hash to its initial state. -func (r *RSis) Reset() { - r.buffer.Reset() -} + // clear the buffers of the instance. + defer r.cleanupBuffers() -// Size returns the number of bytes Sum will return. -func (r *RSis) Size() int { + m := r.bufM + mValues := r.bufMValues - // The size in bits is the size in bits of a polynomial in A. - degree := len(r.A[0]) - totalSize := degree * koalabear.Modulus().BitLen() / 8 + if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { + limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else if r.LogTwoBound >= 8 && (koalabear.Bytes*8)%r.LogTwoBound == 0 { + limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) + } else { + limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + } - return totalSize -} + // method 1: fft + for i := 0; i < len(r.Ag); i++ { + if !mValues.Test(uint(i)) { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue + } + k := m[i*r.Degree : (i+1)*r.Degree] + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) + } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 -// BlockSize returns the hash's underlying block size. -// The Write method must be able to accept any amount -// of data, but it may operate more efficiently if all writes -// are a multiple of the block size. -func (r *RSis) BlockSize() int { - return 0 + return nil + } } func genRandom(seed, i, j int64, buf *bytes.Buffer) koalabear.Element { @@ -244,26 +232,12 @@ func genRandom(seed, i, j int64, buf *bytes.Buffer) koalabear.Element { return res } -// mulMod computes p * q in ℤ_{p}[X]/Xᵈ+1. +// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. -func mulMod(pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []koalabear.Element) []koalabear.Element { - - res := make([]koalabear.Element, len(pLagrangeCosetBitReversed)) - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - res[i].Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - } - - // NOT fft inv for now, wait until every part of the keys have been multiplied - // r.Domain.FFTInverse(res, fft.DIT, true) - - return res - -} - -// mulMod + accumulate in res. +// then accumulates the mulMod result in res. func mulModAcc(res []koalabear.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []koalabear.Element) { var t koalabear.Element for i := 0; i < len(pLagrangeCosetBitReversed); i++ { @@ -281,7 +255,6 @@ func (r *RSis) CopyWithFreshBuffer() RSis { res.buffer = bytes.Buffer{} res.bufM = make(koalabear.Vector, len(r.bufM)) res.bufMValues = bitset.New(r.bufMValues.Len()) - res.bufRes = make(koalabear.Vector, len(r.bufRes)) return res } @@ -291,9 +264,6 @@ func (r *RSis) cleanupBuffers() { for i := 0; i < len(r.bufM); i++ { r.bufM[i].SetZero() } - for i := 0; i < len(r.bufRes); i++ { - r.bufRes[i].SetZero() - } } // Split an slice of bytes representing an array of serialized field element in diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index b66013d77d..6d4e135eb4 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -8,10 +8,8 @@ package sis import ( "bytes" "crypto/rand" - "encoding/binary" "encoding/json" "fmt" - "io" "math/big" "math/bits" "os" @@ -75,10 +73,9 @@ func TestReference(t *testing.T) { // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) - sis.Reset() - // hash test case entry input and compare with expected (computed by sage) - goHash, err := sis.Hash(inputs) + goHash := make([]koalabear.Element, 1< r.maxNbElementsToHash { - return nil, fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) - } - - r.Reset() - for _, e := range v { - r.Write(e.Marshal()) - } - sum := r.Sum(nil) - var rlen [4]byte - binary.BigEndian.PutUint32(rlen[:], uint32(len(sum)/koalabear.Bytes)) - reader := io.MultiReader(bytes.NewReader(rlen[:]), bytes.NewReader(sum)) - var result koalabear.Vector - _, err := result.ReadFrom(reader) - if err != nil { - return nil, err - } - return result, nil -} - func TestLimbDecompositionFastPath(t *testing.T) { assert := require.New(t) From 9c60488e06d4c35658a1df5d3efe69dff50350ed Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 10:52:15 -0600 Subject: [PATCH 03/25] refactor: simplify genRandom --- ecc/bls12-377/fr/sis/sis.go | 18 ++++++++---------- field/babybear/sis/sis.go | 18 ++++++++---------- .../internal/templates/sis/sis.go.tmpl | 18 ++++++++---------- field/goldilocks/sis/sis.go | 18 ++++++++---------- field/koalabear/sis/sis.go | 18 ++++++++---------- 5 files changed, 40 insertions(+), 50 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index c83503e129..6b5e147449 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -126,13 +126,12 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R ag := make([]fr.Element, n*r.Degree) parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer for i := start; i < end; i++ { rstart, rend := i*r.Degree, (i+1)*r.Degree r.A[i] = a[rstart:rend:rend] r.Ag[i] = ag[rstart:rend:rend] for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) + r.A[i][j] = deriveRandomElementFromSeed(seed, int64(i), int64(j)) } // fill Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -216,15 +215,14 @@ func (r *RSis) Hash(v, res []fr.Element) error { } } -func genRandom(seed, i, j int64, buf *bytes.Buffer) fr.Element { +func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) + digest := blake2b.Sum256(buf[:]) var res fr.Element res.SetBytes(digest[:]) diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index a68bae1e68..e040a890f3 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -126,13 +126,12 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R ag := make([]babybear.Element, n*r.Degree) parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer for i := start; i < end; i++ { rstart, rend := i*r.Degree, (i+1)*r.Degree r.A[i] = a[rstart:rend:rend] r.Ag[i] = ag[rstart:rend:rend] for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) + r.A[i][j] = deriveRandomElementFromSeed(seed, int64(i), int64(j)) } // fill Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -216,15 +215,14 @@ func (r *RSis) Hash(v, res []babybear.Element) error { } } -func genRandom(seed, i, j int64, buf *bytes.Buffer) babybear.Element { +func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) + digest := blake2b.Sum256(buf[:]) var res babybear.Element res.SetBytes(digest[:]) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 7c9def9431..b17890cb2a 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -119,13 +119,12 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R ag := make([]{{ .FF }}.Element, n*r.Degree) parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer for i := start; i < end; i++ { rstart, rend := i*r.Degree, (i+1)*r.Degree r.A[i] = a[rstart:rend:rend] r.Ag[i] = ag[rstart:rend:rend] for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) + r.A[i][j] = deriveRandomElementFromSeed(seed, int64(i), int64(j)) } // fill Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -210,15 +209,14 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { } } -func genRandom(seed, i, j int64, buf *bytes.Buffer) {{ .FF }}.Element { +func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) + digest := blake2b.Sum256(buf[:]) var res {{ .FF }}.Element res.SetBytes(digest[:]) diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index aa0321d98c..87a7af91a8 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -126,13 +126,12 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R ag := make([]goldilocks.Element, n*r.Degree) parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer for i := start; i < end; i++ { rstart, rend := i*r.Degree, (i+1)*r.Degree r.A[i] = a[rstart:rend:rend] r.Ag[i] = ag[rstart:rend:rend] for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) + r.A[i][j] = deriveRandomElementFromSeed(seed, int64(i), int64(j)) } // fill Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -216,15 +215,14 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { } } -func genRandom(seed, i, j int64, buf *bytes.Buffer) goldilocks.Element { +func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) + digest := blake2b.Sum256(buf[:]) var res goldilocks.Element res.SetBytes(digest[:]) diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index ed10639ac9..625a18706f 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -126,13 +126,12 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R ag := make([]koalabear.Element, n*r.Degree) parallel.Execute(n, func(start, end int) { - var buf bytes.Buffer for i := start; i < end; i++ { rstart, rend := i*r.Degree, (i+1)*r.Degree r.A[i] = a[rstart:rend:rend] r.Ag[i] = ag[rstart:rend:rend] for j := 0; j < r.Degree; j++ { - r.A[i][j] = genRandom(seed, int64(i), int64(j), &buf) + r.A[i][j] = deriveRandomElementFromSeed(seed, int64(i), int64(j)) } // fill Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -216,15 +215,14 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { } } -func genRandom(seed, i, j int64, buf *bytes.Buffer) koalabear.Element { +func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) - buf.Reset() - buf.WriteString("SIS") - binary.Write(buf, binary.BigEndian, seed) - binary.Write(buf, binary.BigEndian, i) - binary.Write(buf, binary.BigEndian, j) - - digest := blake2b.Sum256(buf.Bytes()) + digest := blake2b.Sum256(buf[:]) var res koalabear.Element res.SetBytes(digest[:]) From f89a50f7fd86d4cee899c1c3bc4529ec255598ed Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 11:06:24 -0600 Subject: [PATCH 04/25] refactor: more code cleaning --- ecc/bls12-377/fr/sis/sis.go | 34 +++++------ field/babybear/sis/sis.go | 34 +++++------ .../internal/templates/sis/sis.go.tmpl | 60 +++++++------------ field/goldilocks/sis/sis.go | 34 +++++------ field/koalabear/sis/sis.go | 34 +++++------ 5 files changed, 83 insertions(+), 113 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 6b5e147449..4a8367672f 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -19,10 +19,6 @@ import ( "golang.org/x/crypto/blake2b" ) -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - // Ring-SIS instance type RSis struct { @@ -215,21 +211,6 @@ func (r *RSis) Hash(v, res []fr.Element) error { } } -func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res fr.Element - res.SetBytes(digest[:]) - - return res -} - // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * @@ -419,3 +400,18 @@ func limbDecomposeBytes8_64(buf []byte, m fr.Vector, mValues *bitset.BitSet) { } } } + +func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) + + digest := blake2b.Sum256(buf[:]) + + var res fr.Element + res.SetBytes(digest[:]) + + return res +} diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index e040a890f3..6fbf5f8b54 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -19,10 +19,6 @@ import ( "golang.org/x/crypto/blake2b" ) -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - // Ring-SIS instance type RSis struct { @@ -215,21 +211,6 @@ func (r *RSis) Hash(v, res []babybear.Element) error { } } -func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res babybear.Element - res.SetBytes(digest[:]) - - return res -} - // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * @@ -419,3 +400,18 @@ func limbDecomposeBytes8_64(buf []byte, m babybear.Vector, mValues *bitset.BitSe } } } + +func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) + + digest := blake2b.Sum256(buf[:]) + + var res babybear.Element + res.SetBytes(digest[:]) + + return res +} diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index b17890cb2a..68c7e6f948 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -12,16 +12,8 @@ import ( "golang.org/x/crypto/blake2b" ) -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - -// Ring-SIS instance +// RSis is the Ring-SIS instance type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - // Vectors in ℤ_{p}/Xⁿ+1 // A[i] is the i-th polynomial. // Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -33,18 +25,18 @@ type RSis struct { // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. LogTwoBound int + // d, the degree of X^{d}+1 + Degree int + // domain for the polynomial multiplication Domain *fft.Domain twiddleCosets []{{ .FF }}.Element // see FFT64 and precomputeTwiddlesCoset - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int maxNbElementsToHash int + // buffer storing the data to hash + buffer bytes.Buffer + // allocate memory once per instance (used in Sum()) bufM {{ .FF }}.Vector bufMValues *bitset.BitSet @@ -66,14 +58,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R } degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * {{ .FF }}.Bytes // n: number of polynomials in A // len(m) == degree * n // with each element in m being logTwoBounds bits from the instance buffer. // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) // First n <- #limbs to represent a single field element n := ({{ .FF }}.Bytes * 8) / logTwoBound @@ -100,7 +89,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r := &RSis{ LogTwoBound: logTwoBound, - capacity: capacity, Degree: degree, Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]{{ .FF }}.Element, n), @@ -167,9 +155,6 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { // previous Sum() buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") - } fastPath := r.LogTwoBound == 8 && r.Degree == 64 @@ -209,21 +194,6 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { } } -func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res {{ .FF }}.Element - res.SetBytes(digest[:]) - - return res -} - // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * @@ -424,3 +394,19 @@ func limbDecomposeBytes8_64(buf []byte, m {{ .FF }}.Vector, mValues *bitset.BitS } } } + + +func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) + + digest := blake2b.Sum256(buf[:]) + + var res {{ .FF }}.Element + res.SetBytes(digest[:]) + + return res +} \ No newline at end of file diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 87a7af91a8..ddd7960752 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -19,10 +19,6 @@ import ( "golang.org/x/crypto/blake2b" ) -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - // Ring-SIS instance type RSis struct { @@ -215,21 +211,6 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { } } -func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res goldilocks.Element - res.SetBytes(digest[:]) - - return res -} - // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * @@ -419,3 +400,18 @@ func limbDecomposeBytes8_64(buf []byte, m goldilocks.Vector, mValues *bitset.Bit } } } + +func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) + + digest := blake2b.Sum256(buf[:]) + + var res goldilocks.Element + res.SetBytes(digest[:]) + + return res +} diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 625a18706f..eacfc32c05 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -19,10 +19,6 @@ import ( "golang.org/x/crypto/blake2b" ) -var ( - ErrNotAPowerOfTwo = errors.New("d must be a power of 2") -) - // Ring-SIS instance type RSis struct { @@ -215,21 +211,6 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { } } -func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res koalabear.Element - res.SetBytes(digest[:]) - - return res -} - // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. // Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes // and that they are in evaluation form on √(g) * @@ -419,3 +400,18 @@ func limbDecomposeBytes8_64(buf []byte, m koalabear.Vector, mValues *bitset.BitS } } } + +func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) + + digest := blake2b.Sum256(buf[:]) + + var res koalabear.Element + res.SetBytes(digest[:]) + + return res +} From e38ee6fe29bc7d928f02a429c3ed99715aba2440 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 16:00:42 -0600 Subject: [PATCH 05/25] refactor: limit log2bound to be mod 8, simplify stuff in SIS --- ecc/bls12-377/fr/sis/sis.go | 344 +++++------------ ecc/bls12-377/fr/sis/sis_test.go | 150 +------- field/babybear/sis/sis.go | 332 +++++------------ field/babybear/sis/sis_test.go | 152 +------- .../internal/templates/element/conv.go | 3 - .../internal/templates/sis/sis.go.tmpl | 351 ++++++------------ .../internal/templates/sis/sis.test.go.tmpl | 158 +------- field/goldilocks/sis/sis.go | 344 +++++------------ field/goldilocks/sis/sis_test.go | 150 +------- field/koalabear/sis/sis.go | 332 +++++------------ field/koalabear/sis/sis_test.go | 152 +------- 11 files changed, 575 insertions(+), 1893 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 4a8367672f..569323ebc1 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -6,25 +6,19 @@ package sis import ( - "bytes" "encoding/binary" "errors" "fmt" "math/bits" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/ecc/bls12-377/fr" "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/fft" "github.com/consensys/gnark-crypto/internal/parallel" "golang.org/x/crypto/blake2b" ) -// Ring-SIS instance +// RSis is the Ring-SIS instance type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - // Vectors in ℤ_{p}/Xⁿ+1 // A[i] is the i-th polynomial. // Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -36,21 +30,14 @@ type RSis struct { // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. LogTwoBound int + // d, the degree of X^{d}+1 + Degree int + // domain for the polynomial multiplication Domain *fft.Domain twiddleCosets []fr.Element // see FFT64 and precomputeTwiddlesCoset - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int maxNbElementsToHash int - - // allocate memory once per instance (used in Sum()) - bufM fr.Vector - bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -61,22 +48,22 @@ type RSis struct { // used to derived n, the number of polynomials in A, and max size of instance's internal buffer. func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - if logTwoBound > 64 { + if logTwoBound > 64 || logTwoBound > fr.Bits { return nil, errors.New("logTwoBound too large") } + if logTwoBound%8 != 0 { + panic("logTwoBound must be a multiple of 8") + } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") } degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * fr.Bytes // n: number of polynomials in A // len(m) == degree * n // with each element in m being logTwoBounds bits from the instance buffer. // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) // First n <- #limbs to represent a single field element n := (fr.Bytes * 8) / logTwoBound @@ -103,13 +90,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r := &RSis{ LogTwoBound: logTwoBound, - capacity: capacity, Degree: degree, Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]fr.Element, n), Ag: make([][]fr.Element, n), - bufM: make(fr.Vector, degree*n), - bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } if r.LogTwoBound == 8 && r.Degree == 64 { @@ -148,67 +132,46 @@ func (r *RSis) Hash(v, res []fr.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) } - // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + // TODO @gbotrel ensure that this is needed. res[i].SetZero() } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - // reset the buffer - r.buffer.Reset() + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - // write the elements to the buffer - // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. - for _, e := range v { - r.buffer.Write(e.Marshal()) - } + reader := NewVectorLimbReader(v, r.LogTwoBound/8) - { - // previous Sum() + kz := make([]fr.Element, r.Degree) + k := make([]fr.Element, r.Degree) + for i := 0; i < len(r.Ag); i++ { + copy(k, kz) - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") + zero := uint64(0) + for j := 0; j < r.Degree; j++ { + l := reader.NextLimb() + zero |= l + k[j][0] = l } - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues - - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (fr.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue } - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - return nil + mulModAcc(res, r.Ag[i], k) } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + + return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -225,193 +188,92 @@ func mulModAcc(res []fr.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitRev } } -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make(fr.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - return res -} +func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } -} + digest := blake2b.Sum256(buf[:]) -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m fr.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} + var res fr.Element + res.SetBytes(digest[:]) -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// This function is called when logTwoBound divides the number of bits used to represent a -// fr element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / fr.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := fr.Bytes - 1; j >= 0; j-- { - curByte := buf[i*fr.Bytes+j] - curPos := i*fr.Bytes*nbChunksPerBytes + (fr.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - m[curPos+k][0] = uint64((curByte >> (k * logTwoBound)) & mask) - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } + return res } -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / fr.Bytes - nbChunksPerElements := fr.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*fr.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { +// VectorLimbReader reads a vector of field element, limb by limb. +// The elements are interpreted in little endian. +// The limb is also interpreted in little endian. +type VectorLimbReader struct { + v fr.Vector + buf [fr.Bytes]byte - m[curElmt][0] |= (uint64(buf[curPos-k]) << ((k - 1) * 8)) + i int // position in vector + j int // position in buf - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } + next func(buf []byte, pos *int) uint64 } -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m fr.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 +// NewVectorLimbReader creates a new VectorLimbReader +// v: the vector to read +// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { + var next func(buf []byte, pos *int) uint64 + switch limbSize { + case 1: + next = nextUint8 + case 2: + next = nextUint16 + + case 4: + next = nextUint32 + case 8: + next = nextUint64 + default: + panic("unsupported limb size") } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < fr.Bytes*8; { - - j := bitInField % logTwoBound - - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + fr.Bytes*8 - bitInField - 1 - - m[mPos][0] |= uint64(getIthBit(at) << j) - - bitInField++ - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } - - if j == logTwoBound-1 || bitInField == fr.Bytes*8 { - mPos++ - } - } - fieldStart += fr.Bytes * 8 + return &VectorLimbReader{ + v: v, + j: fr.Bytes, + next: next, } } -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m fr.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := fr.Bytes - 1; startPos < len(buf); startPos += fr.Bytes { - for i := startPos; i >= startPos-fr.Bytes+1; i-- { - - m[j][0] = uint64(buf[i]) - - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } +// NextLimb returns the next limb of the vector. +// This does not perform any bound check, may trigger an out of bound panic. +// If underlying vector is "out of limb" +func (vr *VectorLimbReader) NextLimb() uint64 { + if vr.j == fr.Bytes { + vr.j = 0 + // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) + fr.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) + vr.i++ } + return vr.next(vr.buf[:], &vr.j) } -func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) +func nextUint8(buf []byte, pos *int) uint64 { + r := uint64(buf[*pos]) + *pos++ + return r +} - digest := blake2b.Sum256(buf[:]) +func nextUint16(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint16(buf[*pos:])) + *pos += 2 + return r +} - var res fr.Element - res.SetBytes(digest[:]) +func nextUint32(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint32(buf[*pos:])) + *pos += 4 + return r +} - return res +func nextUint64(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint64(buf[*pos:])) + *pos += 8 + return r } diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index cd5b261536..e6e6cdf608 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -6,8 +6,6 @@ package sis import ( - "bytes" - "crypto/rand" "encoding/json" "fmt" "math/big" @@ -16,7 +14,6 @@ import ( "testing" "time" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/ecc/bls12-377/fr" "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/fft" "github.com/stretchr/testify/require" @@ -27,11 +24,7 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, {logTwoBound: 16, logTwoDegree: 7}, {logTwoBound: 32, logTwoDegree: 8}, } @@ -65,6 +58,15 @@ func TestReference(t *testing.T) { inputs := testCases.Inputs for testCaseID, testCase := range testCases.Entries { + if testCase.Params.LogTwoBound%8 != 0 { + t.Logf("skipping test case %d, logTwoBound is not a multiple of 8", testCaseID) + continue + } + if testCase.Params.LogTwoBound > fr.Bits { + t.Logf("skipping test case %d, logTwoBound %d is greater than field bit size (%d)", testCaseID, testCase.Params.LogTwoBound, fr.Bits) + continue + } + t.Logf("logTwoBound = %d, logTwoDegree = %d", testCase.Params.LogTwoBound, testCase.Params.LogTwoDegree) // create the SIS instance sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) @@ -88,7 +90,7 @@ func TestReference(t *testing.T) { } -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { +func TestLimbDecomposeBytes(t *testing.T) { var montConstant fr.Element var bMontConstant big.Int @@ -99,71 +101,22 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { nbElmts := 10 a := make([]fr.Element, nbElmts) for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) + a[i].SetRandom() } logTwoBound := 8 for cc := 0; cc < 3; cc++ { + vr := NewVectorLimbReader(a, logTwoBound/8) m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - } - - var x fr.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := fr.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + m[i][0] = vr.NextLimb() } - logTwoBound *= 2 - } - -} - -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant fr.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, fr.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]fr.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 2 - - for cc := 0; cc < 3; cc++ { - - m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - m2 := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - - // the limbs are set as is, they are NOT converted in Montgomery form - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - limbDecomposeBytesSmallBound(buf.Bytes(), m2, logTwoBound, 4, nil) for i := 0; i < len(m); i++ { m[i].Mul(&m[i], &montConstant) - m2[i].Mul(&m2[i], &montConstant) } + var x fr.Element x.SetUint64(1 << logTwoBound) @@ -173,10 +126,6 @@ func TestLimbDecomposeBytesSmallBound(t *testing.T) { if !r.Equal(&a[i]) { t.Fatal("limbDecomposeBytes failed") } - r = eval(m2[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytesSmallBound failed") - } } logTwoBound *= 2 } @@ -249,51 +198,6 @@ func estimateSisTheory(p sisParams) float64 { return float64(r) } -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]fr.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - func BenchmarkSIS(b *testing.B) { // max nb field elements to hash @@ -360,32 +264,6 @@ func benchmarkSIS(b *testing.B, input []fr.Element, sparse bool, logTwoBound, lo }) } -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := fr.Bytes; size < 5*fr.Bytes; size += fr.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]fr.Element, size) - mValues := bitset.New(uint(size)) - n := make([]fr.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - func TestUnrolledFFT(t *testing.T) { var shift fr.Element diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 6fbf5f8b54..eeba906849 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -6,25 +6,19 @@ package sis import ( - "bytes" "encoding/binary" "errors" "fmt" "math/bits" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/babybear" "github.com/consensys/gnark-crypto/field/babybear/fft" "github.com/consensys/gnark-crypto/internal/parallel" "golang.org/x/crypto/blake2b" ) -// Ring-SIS instance +// RSis is the Ring-SIS instance type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - // Vectors in ℤ_{p}/Xⁿ+1 // A[i] is the i-th polynomial. // Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -36,21 +30,14 @@ type RSis struct { // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. LogTwoBound int + // d, the degree of X^{d}+1 + Degree int + // domain for the polynomial multiplication Domain *fft.Domain twiddleCosets []babybear.Element // see FFT64 and precomputeTwiddlesCoset - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int maxNbElementsToHash int - - // allocate memory once per instance (used in Sum()) - bufM babybear.Vector - bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -61,22 +48,22 @@ type RSis struct { // used to derived n, the number of polynomials in A, and max size of instance's internal buffer. func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - if logTwoBound > 64 { + if logTwoBound > 64 || logTwoBound > babybear.Bits { return nil, errors.New("logTwoBound too large") } + if logTwoBound%8 != 0 { + panic("logTwoBound must be a multiple of 8") + } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") } degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * babybear.Bytes // n: number of polynomials in A // len(m) == degree * n // with each element in m being logTwoBounds bits from the instance buffer. // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) // First n <- #limbs to represent a single field element n := (babybear.Bytes * 8) / logTwoBound @@ -103,13 +90,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r := &RSis{ LogTwoBound: logTwoBound, - capacity: capacity, Degree: degree, Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]babybear.Element, n), Ag: make([][]babybear.Element, n), - bufM: make(babybear.Vector, degree*n), - bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } if r.LogTwoBound == 8 && r.Degree == 64 { @@ -148,67 +132,46 @@ func (r *RSis) Hash(v, res []babybear.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) } - // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + // TODO @gbotrel ensure that this is needed. res[i].SetZero() } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - // reset the buffer - r.buffer.Reset() + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - // write the elements to the buffer - // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. - for _, e := range v { - r.buffer.Write(e.Marshal()) - } + reader := NewVectorLimbReader(v, r.LogTwoBound/8) - { - // previous Sum() + kz := make([]babybear.Element, r.Degree) + k := make([]babybear.Element, r.Degree) + for i := 0; i < len(r.Ag); i++ { + copy(k, kz) - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") + zero := uint32(0) + for j := 0; j < r.Degree; j++ { + l := reader.NextLimb() + zero |= l + k[j][0] = l } - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues - - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (babybear.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue } - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - return nil + mulModAcc(res, r.Ag[i], k) } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + + return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -225,193 +188,76 @@ func mulModAcc(res []babybear.Element, pLagrangeCosetBitReversed, qLagrangeCoset } } -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make(babybear.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - return res -} +func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } -} + digest := blake2b.Sum256(buf[:]) -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m babybear.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} + var res babybear.Element + res.SetBytes(digest[:]) -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// This function is called when logTwoBound divides the number of bits used to represent a -// babybear element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m babybear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / babybear.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := babybear.Bytes - 1; j >= 0; j-- { - curByte := buf[i*babybear.Bytes+j] - curPos := i*babybear.Bytes*nbChunksPerBytes + (babybear.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - m[curPos+k][0] = uint32((curByte >> (k * logTwoBound)) & mask) - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } + return res } -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m babybear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / babybear.Bytes - nbChunksPerElements := babybear.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*babybear.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { +// VectorLimbReader reads a vector of field element, limb by limb. +// The elements are interpreted in little endian. +// The limb is also interpreted in little endian. +type VectorLimbReader struct { + v babybear.Vector + buf [babybear.Bytes]byte - m[curElmt][0] |= (uint32(buf[curPos-k]) << ((k - 1) * 8)) + i int // position in vector + j int // position in buf - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } + next func(buf []byte, pos *int) uint32 } -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m babybear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 +// NewVectorLimbReader creates a new VectorLimbReader +// v: the vector to read +// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { + var next func(buf []byte, pos *int) uint32 + switch limbSize { + case 1: + next = nextUint8 + case 2: + next = nextUint16 + + default: + panic("unsupported limb size") } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < babybear.Bytes*8; { - - j := bitInField % logTwoBound - - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + babybear.Bytes*8 - bitInField - 1 - - m[mPos][0] |= uint32(getIthBit(at) << j) - - bitInField++ - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } - - if j == logTwoBound-1 || bitInField == babybear.Bytes*8 { - mPos++ - } - } - fieldStart += babybear.Bytes * 8 + return &VectorLimbReader{ + v: v, + j: babybear.Bytes, + next: next, } } -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m babybear.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := babybear.Bytes - 1; startPos < len(buf); startPos += babybear.Bytes { - for i := startPos; i >= startPos-babybear.Bytes+1; i-- { - - m[j][0] = uint32(buf[i]) - - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } +// NextLimb returns the next limb of the vector. +// This does not perform any bound check, may trigger an out of bound panic. +// If underlying vector is "out of limb" +func (vr *VectorLimbReader) NextLimb() uint32 { + if vr.j == babybear.Bytes { + vr.j = 0 + // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) + babybear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) + vr.i++ } + return vr.next(vr.buf[:], &vr.j) } -func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res babybear.Element - res.SetBytes(digest[:]) +func nextUint8(buf []byte, pos *int) uint32 { + r := uint32(buf[*pos]) + *pos++ + return r +} - return res +func nextUint16(buf []byte, pos *int) uint32 { + r := uint32(binary.LittleEndian.Uint16(buf[*pos:])) + *pos += 2 + return r } diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 8e97db534e..9d973961fd 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -6,8 +6,6 @@ package sis import ( - "bytes" - "crypto/rand" "encoding/json" "fmt" "math/big" @@ -16,7 +14,6 @@ import ( "testing" "time" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/babybear" "github.com/consensys/gnark-crypto/field/babybear/fft" "github.com/stretchr/testify/require" @@ -27,11 +24,7 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, {logTwoBound: 16, logTwoDegree: 7}, {logTwoBound: 32, logTwoDegree: 8}, } @@ -65,6 +58,15 @@ func TestReference(t *testing.T) { inputs := testCases.Inputs for testCaseID, testCase := range testCases.Entries { + if testCase.Params.LogTwoBound%8 != 0 { + t.Logf("skipping test case %d, logTwoBound is not a multiple of 8", testCaseID) + continue + } + if testCase.Params.LogTwoBound > babybear.Bits { + t.Logf("skipping test case %d, logTwoBound %d is greater than field bit size (%d)", testCaseID, testCase.Params.LogTwoBound, babybear.Bits) + continue + } + t.Logf("logTwoBound = %d, logTwoDegree = %d", testCase.Params.LogTwoBound, testCase.Params.LogTwoDegree) // create the SIS instance sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) @@ -88,7 +90,7 @@ func TestReference(t *testing.T) { } -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { +func TestLimbDecomposeBytes(t *testing.T) { var montConstant babybear.Element var bMontConstant big.Int @@ -99,71 +101,22 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { nbElmts := 10 a := make([]babybear.Element, nbElmts) for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) + a[i].SetRandom() } logTwoBound := 8 - for cc := 0; cc < 3; cc++ { + for cc := 0; cc < 1; cc++ { + vr := NewVectorLimbReader(a, logTwoBound/8) m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - } - - var x babybear.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := babybear.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + m[i][0] = vr.NextLimb() } - logTwoBound *= 2 - } - -} - -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant babybear.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, babybear.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]babybear.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 2 - - for cc := 0; cc < 3; cc++ { - - m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) - m2 := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) - - // the limbs are set as is, they are NOT converted in Montgomery form - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - limbDecomposeBytesSmallBound(buf.Bytes(), m2, logTwoBound, 4, nil) for i := 0; i < len(m); i++ { m[i].Mul(&m[i], &montConstant) - m2[i].Mul(&m2[i], &montConstant) } + var x babybear.Element x.SetUint64(1 << logTwoBound) @@ -173,10 +126,6 @@ func TestLimbDecomposeBytesSmallBound(t *testing.T) { if !r.Equal(&a[i]) { t.Fatal("limbDecomposeBytes failed") } - r = eval(m2[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytesSmallBound failed") - } } logTwoBound *= 2 } @@ -249,51 +198,6 @@ func estimateSisTheory(p sisParams) float64 { return float64(r) } -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]babybear.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - func BenchmarkSIS(b *testing.B) { // max nb field elements to hash @@ -360,32 +264,6 @@ func benchmarkSIS(b *testing.B, input []babybear.Element, sparse bool, logTwoBou }) } -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := babybear.Bytes; size < 5*babybear.Bytes; size += babybear.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]babybear.Element, size) - mValues := bitset.New(uint(size)) - n := make([]babybear.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - func TestUnrolledFFT(t *testing.T) { var shift babybear.Element diff --git a/field/generator/internal/templates/element/conv.go b/field/generator/internal/templates/element/conv.go index 70bffcf631..f9d3039acd 100644 --- a/field/generator/internal/templates/element/conv.go +++ b/field/generator/internal/templates/element/conv.go @@ -401,7 +401,4 @@ func (littleEndian) PutElement(b *[Bytes]byte, e {{.ElementName}}) { func (littleEndian) String() string { return "LittleEndian" } - - - ` diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 68c7e6f948..837772abcb 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -1,17 +1,22 @@ import ( - "bytes" "encoding/binary" "errors" "math/bits" "fmt" - "github.com/bits-and-blooms/bitset" "{{ .FieldPackagePath }}" "{{ .FieldPackagePath }}/fft" "github.com/consensys/gnark-crypto/internal/parallel" "golang.org/x/crypto/blake2b" ) +{{- $f31 := or (eq .FF "babybear") (eq .FF "koalabear")}} +{{$tReturn := "uint64"}} +{{- if $f31 -}} +{{$tReturn = "uint32"}} +{{- end -}} + + // RSis is the Ring-SIS instance type RSis struct { // Vectors in ℤ_{p}/Xⁿ+1 @@ -34,12 +39,6 @@ type RSis struct { maxNbElementsToHash int - // buffer storing the data to hash - buffer bytes.Buffer - - // allocate memory once per instance (used in Sum()) - bufM {{ .FF }}.Vector - bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -50,9 +49,12 @@ type RSis struct { // used to derived n, the number of polynomials in A, and max size of instance's internal buffer. func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - if logTwoBound > 64 { + if logTwoBound > 64 || logTwoBound > {{ .FF }}.Bits { return nil, errors.New("logTwoBound too large") } + if logTwoBound % 8 != 0 { + panic("logTwoBound must be a multiple of 8") + } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") } @@ -93,8 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]{{ .FF }}.Element, n), Ag: make([][]{{ .FF }}.Element, n), - bufM: make({{ .FF }}.Vector, degree*n), - bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } if r.LogTwoBound == 8 && r.Degree == 64 { @@ -134,64 +134,46 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) } - // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + // TODO @gbotrel ensure that this is needed. res[i].SetZero() } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - // reset the buffer - r.buffer.Reset() - - // write the elements to the buffer - // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. - for _, e := range v { - r.buffer.Write(e.Marshal()) - } - - { - // previous Sum() - - buf := r.buffer.Bytes() - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - // clear the buffers of the instance. - defer r.cleanupBuffers() + reader := NewVectorLimbReader(v, r.LogTwoBound/8) - m := r.bufM - mValues := r.bufMValues + kz := make([]{{ .FF }}.Element, r.Degree) + k := make([]{{ .FF }}.Element, r.Degree) + for i := 0; i < len(r.Ag); i++ { + copy(k, kz) - if r.LogTwoBound<8 && (8%r.LogTwoBound==0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound>=8 && ({{ .FF }}.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + zero := {{$tReturn}}(0) + for j := 0; j < r.Degree; j++ { + l := reader.NextLimb() + zero |= l + k[j][0] = l } - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - return nil + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + mulModAcc(res, r.Ag[i], k) } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + + return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -208,205 +190,98 @@ func mulModAcc(res []{{ .FF }}.Element, pLagrangeCosetBitReversed, qLagrangeCose } } -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make({{ .FF }}.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - return res -} -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } -} +func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m {{ .FF }}.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} + digest := blake2b.Sum256(buf[:]) -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// -// This function is called when logTwoBound divides the number of bits used to represent a -// {{ .FF }} element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m {{ .FF }}.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / {{ .FF }}.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := {{ .FF }}.Bytes - 1; j >= 0; j-- { - curByte := buf[i*{{ .FF }}.Bytes+j] - curPos := i*{{ .FF }}.Bytes*nbChunksPerBytes + ({{ .FF }}.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - {{ if (or (eq .FF "babybear") (eq .FF "koalabear" ) ) }} - m[curPos+k][0] = uint32((curByte >> (k * logTwoBound)) & mask) - {{ else }} - m[curPos+k][0] = uint64((curByte >> (k * logTwoBound)) & mask) - {{ end }} - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } -} + var res {{ .FF }}.Element + res.SetBytes(digest[:]) -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m {{ .FF }}.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / {{ .FF }}.Bytes - nbChunksPerElements := {{ .FF }}.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*{{ .FF }}.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { - {{ if (or (eq .FF "babybear") (eq .FF "koalabear" ) ) }} - m[curElmt][0] |= (uint32(buf[curPos-k]) << ((k - 1) * 8)) - {{ else }} - m[curElmt][0] |= (uint64(buf[curPos-k]) << ((k - 1) * 8)) - {{ end }} - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } + return res } -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m {{ .FF }}.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 - } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < {{ .FF }}.Bytes*8; { - - j := bitInField % logTwoBound - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + {{ .FF }}.Bytes*8 - bitInField - 1 +// VectorLimbReader reads a vector of field element, limb by limb. +// The elements are interpreted in little endian. +// The limb is also interpreted in little endian. +type VectorLimbReader struct { + v {{ .FF }}.Vector + buf [{{ .FF }}.Bytes]byte - {{ if (or (eq .FF "babybear") (eq .FF "koalabear" ) ) }} - m[mPos][0] |= uint32(getIthBit(at) << j) - {{ else }} - m[mPos][0] |= uint64(getIthBit(at) << j) - {{ end }} - bitInField++ + i int // position in vector + j int // position in buf - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } + next func(buf []byte, pos *int) {{$tReturn}} +} - if j == logTwoBound-1 || bitInField == {{ .FF }}.Bytes*8 { - mPos++ - } - } - fieldStart += {{ .FF }}.Bytes * 8 +// NewVectorLimbReader creates a new VectorLimbReader +// v: the vector to read +// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { + var next func(buf []byte, pos *int) {{$tReturn}} + switch limbSize { + case 1: + next = nextUint8 + case 2: + next = nextUint16 + {{if not $f31 }} + case 4: + next = nextUint32 + case 8: + next = nextUint64 + {{- end}} + default: + panic("unsupported limb size") + } + return &VectorLimbReader{ + v: v, + j: {{ .FF }}.Bytes, + next: next, } } -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m {{ .FF }}.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := {{ .FF }}.Bytes - 1; startPos < len(buf); startPos += {{ .FF }}.Bytes { - for i := startPos; i >= startPos-{{ .FF }}.Bytes+1; i-- { - {{ if (or (eq .FF "babybear") (eq .FF "koalabear") ) }} - m[j][0] = uint32(buf[i]) - {{ else }} - m[j][0] = uint64(buf[i]) - {{ end }} - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } +// NextLimb returns the next limb of the vector. +// This does not perform any bound check, may trigger an out of bound panic. +// If underlying vector is "out of limb" +func (vr *VectorLimbReader) NextLimb() {{$tReturn}} { + if vr.j == {{ .FF }}.Bytes { + vr.j = 0 + // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) + {{.FF}}.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) + vr.i++ } + return vr.next(vr.buf[:], &vr.j) } -func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - digest := blake2b.Sum256(buf[:]) +func nextUint8(buf []byte, pos *int) {{$tReturn}} { + r := {{$tReturn}} (buf[*pos]) + *pos++ + return r +} - var res {{ .FF }}.Element - res.SetBytes(digest[:]) +func nextUint16(buf []byte, pos *int) {{$tReturn}} { + r := {{$tReturn}} (binary.LittleEndian.Uint16(buf[*pos:])) + *pos += 2 + return r +} +{{ if not $f31 }} +func nextUint32(buf []byte, pos *int) {{$tReturn}} { + r := {{$tReturn}} (binary.LittleEndian.Uint32(buf[*pos:])) + *pos += 4 + return r +} - return res -} \ No newline at end of file +func nextUint64(buf []byte, pos *int) {{$tReturn}} { + r := {{$tReturn}} (binary.LittleEndian.Uint64(buf[*pos:])) + *pos += 8 + return r +} +{{- end}} \ No newline at end of file diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 777fd33cb2..ce4bd51257 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -1,6 +1,4 @@ import ( - "bytes" - "crypto/rand" "encoding/json" "fmt" "math/bits" @@ -9,7 +7,6 @@ import ( "time" "math/big" - "github.com/bits-and-blooms/bitset" "{{ .FieldPackagePath }}" "{{ .FieldPackagePath }}/fft" "github.com/stretchr/testify/require" @@ -20,11 +17,7 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, {logTwoBound: 16, logTwoDegree: 7}, {logTwoBound: 32, logTwoDegree: 8}, } @@ -58,11 +51,22 @@ func TestReference(t *testing.T) { inputs := testCases.Inputs for testCaseID, testCase := range testCases.Entries { + if testCase.Params.LogTwoBound % 8 != 0 { + t.Logf("skipping test case %d, logTwoBound is not a multiple of 8", testCaseID) + continue + } + if testCase.Params.LogTwoBound > {{.FF}}.Bits { + t.Logf("skipping test case %d, logTwoBound %d is greater than field bit size (%d)", testCaseID, testCase.Params.LogTwoBound, {{.FF}}.Bits) + continue + } + t.Logf("logTwoBound = %d, logTwoDegree = %d", testCase.Params.LogTwoBound, testCase.Params.LogTwoDegree) + // create the SIS instance sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) assert.NoError(err) + // key generation same than in sage makeKeyDeterministic(t, sis, testCase.Params.Seed) @@ -81,7 +85,7 @@ func TestReference(t *testing.T) { } -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { +func TestLimbDecomposeBytes(t *testing.T) { var montConstant {{ .FF }}.Element var bMontConstant big.Int @@ -92,18 +96,19 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { nbElmts := 10 a := make([]{{ .FF }}.Element, nbElmts) for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) + a[i].SetRandom() } + {{- $f31 := or (eq .FF "babybear") (eq .FF "koalabear")}} + logTwoBound := 8 - for cc:=0;cc<3; cc++ { + for cc:=0;cc<{{- if $f31 }}1{{- else }}3{{- end}}; cc++ { + vr := NewVectorLimbReader(a, logTwoBound/8) m := make({{ .FF }}.Vector, nbElmts*{{ .FF }}.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) + for i := 0; i < len(m); i++ { + m[i][0] = vr.NextLimb() + } for i := 0; i < len(m); i++ { m[i].Mul(&m[i], &montConstant) @@ -124,58 +129,6 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { } -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant {{ .FF }}.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, {{ .FF }}.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]{{ .FF }}.Element, nbElmts) - for i := 0; i= 0; i-- { @@ -242,51 +195,6 @@ func estimateSisTheory(p sisParams) float64 { return float64(r) } -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]{{ .FF }}.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make({{ .FF }}.Vector, nbElmts*{{ .FF }}.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - func BenchmarkSIS(b *testing.B) { // max nb field elements to hash @@ -353,32 +261,6 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo }) } -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := {{ .FF }}.Bytes; size < 5*{{ .FF }}.Bytes; size += {{ .FF }}.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]{{ .FF }}.Element, size) - mValues := bitset.New(uint(size)) - n := make([]{{ .FF }}.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - func TestUnrolledFFT(t *testing.T) { var shift {{ .FF }}.Element diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index ddd7960752..0435af6983 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -6,25 +6,19 @@ package sis import ( - "bytes" "encoding/binary" "errors" "fmt" "math/bits" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/goldilocks" "github.com/consensys/gnark-crypto/field/goldilocks/fft" "github.com/consensys/gnark-crypto/internal/parallel" "golang.org/x/crypto/blake2b" ) -// Ring-SIS instance +// RSis is the Ring-SIS instance type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - // Vectors in ℤ_{p}/Xⁿ+1 // A[i] is the i-th polynomial. // Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -36,21 +30,14 @@ type RSis struct { // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. LogTwoBound int + // d, the degree of X^{d}+1 + Degree int + // domain for the polynomial multiplication Domain *fft.Domain twiddleCosets []goldilocks.Element // see FFT64 and precomputeTwiddlesCoset - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int maxNbElementsToHash int - - // allocate memory once per instance (used in Sum()) - bufM goldilocks.Vector - bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -61,22 +48,22 @@ type RSis struct { // used to derived n, the number of polynomials in A, and max size of instance's internal buffer. func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - if logTwoBound > 64 { + if logTwoBound > 64 || logTwoBound > goldilocks.Bits { return nil, errors.New("logTwoBound too large") } + if logTwoBound%8 != 0 { + panic("logTwoBound must be a multiple of 8") + } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") } degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * goldilocks.Bytes // n: number of polynomials in A // len(m) == degree * n // with each element in m being logTwoBounds bits from the instance buffer. // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) // First n <- #limbs to represent a single field element n := (goldilocks.Bytes * 8) / logTwoBound @@ -103,13 +90,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r := &RSis{ LogTwoBound: logTwoBound, - capacity: capacity, Degree: degree, Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]goldilocks.Element, n), Ag: make([][]goldilocks.Element, n), - bufM: make(goldilocks.Vector, degree*n), - bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } if r.LogTwoBound == 8 && r.Degree == 64 { @@ -148,67 +132,46 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) } - // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + // TODO @gbotrel ensure that this is needed. res[i].SetZero() } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - // reset the buffer - r.buffer.Reset() + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - // write the elements to the buffer - // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. - for _, e := range v { - r.buffer.Write(e.Marshal()) - } + reader := NewVectorLimbReader(v, r.LogTwoBound/8) - { - // previous Sum() + kz := make([]goldilocks.Element, r.Degree) + k := make([]goldilocks.Element, r.Degree) + for i := 0; i < len(r.Ag); i++ { + copy(k, kz) - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") + zero := uint64(0) + for j := 0; j < r.Degree; j++ { + l := reader.NextLimb() + zero |= l + k[j][0] = l } - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues - - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (goldilocks.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue } - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - return nil + mulModAcc(res, r.Ag[i], k) } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + + return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -225,193 +188,92 @@ func mulModAcc(res []goldilocks.Element, pLagrangeCosetBitReversed, qLagrangeCos } } -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make(goldilocks.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - return res -} +func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } -} + digest := blake2b.Sum256(buf[:]) -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m goldilocks.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} + var res goldilocks.Element + res.SetBytes(digest[:]) -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// This function is called when logTwoBound divides the number of bits used to represent a -// goldilocks element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m goldilocks.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / goldilocks.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := goldilocks.Bytes - 1; j >= 0; j-- { - curByte := buf[i*goldilocks.Bytes+j] - curPos := i*goldilocks.Bytes*nbChunksPerBytes + (goldilocks.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - m[curPos+k][0] = uint64((curByte >> (k * logTwoBound)) & mask) - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } + return res } -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m goldilocks.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / goldilocks.Bytes - nbChunksPerElements := goldilocks.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*goldilocks.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { +// VectorLimbReader reads a vector of field element, limb by limb. +// The elements are interpreted in little endian. +// The limb is also interpreted in little endian. +type VectorLimbReader struct { + v goldilocks.Vector + buf [goldilocks.Bytes]byte - m[curElmt][0] |= (uint64(buf[curPos-k]) << ((k - 1) * 8)) + i int // position in vector + j int // position in buf - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } + next func(buf []byte, pos *int) uint64 } -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m goldilocks.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 +// NewVectorLimbReader creates a new VectorLimbReader +// v: the vector to read +// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { + var next func(buf []byte, pos *int) uint64 + switch limbSize { + case 1: + next = nextUint8 + case 2: + next = nextUint16 + + case 4: + next = nextUint32 + case 8: + next = nextUint64 + default: + panic("unsupported limb size") } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < goldilocks.Bytes*8; { - - j := bitInField % logTwoBound - - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + goldilocks.Bytes*8 - bitInField - 1 - - m[mPos][0] |= uint64(getIthBit(at) << j) - - bitInField++ - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } - - if j == logTwoBound-1 || bitInField == goldilocks.Bytes*8 { - mPos++ - } - } - fieldStart += goldilocks.Bytes * 8 + return &VectorLimbReader{ + v: v, + j: goldilocks.Bytes, + next: next, } } -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m goldilocks.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := goldilocks.Bytes - 1; startPos < len(buf); startPos += goldilocks.Bytes { - for i := startPos; i >= startPos-goldilocks.Bytes+1; i-- { - - m[j][0] = uint64(buf[i]) - - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } +// NextLimb returns the next limb of the vector. +// This does not perform any bound check, may trigger an out of bound panic. +// If underlying vector is "out of limb" +func (vr *VectorLimbReader) NextLimb() uint64 { + if vr.j == goldilocks.Bytes { + vr.j = 0 + // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) + goldilocks.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) + vr.i++ } + return vr.next(vr.buf[:], &vr.j) } -func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) +func nextUint8(buf []byte, pos *int) uint64 { + r := uint64(buf[*pos]) + *pos++ + return r +} - digest := blake2b.Sum256(buf[:]) +func nextUint16(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint16(buf[*pos:])) + *pos += 2 + return r +} - var res goldilocks.Element - res.SetBytes(digest[:]) +func nextUint32(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint32(buf[*pos:])) + *pos += 4 + return r +} - return res +func nextUint64(buf []byte, pos *int) uint64 { + r := uint64(binary.LittleEndian.Uint64(buf[*pos:])) + *pos += 8 + return r } diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 8c931677d5..fd8f056b00 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -6,8 +6,6 @@ package sis import ( - "bytes" - "crypto/rand" "encoding/json" "fmt" "math/big" @@ -16,7 +14,6 @@ import ( "testing" "time" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/goldilocks" "github.com/consensys/gnark-crypto/field/goldilocks/fft" "github.com/stretchr/testify/require" @@ -27,11 +24,7 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, {logTwoBound: 16, logTwoDegree: 7}, {logTwoBound: 32, logTwoDegree: 8}, } @@ -65,6 +58,15 @@ func TestReference(t *testing.T) { inputs := testCases.Inputs for testCaseID, testCase := range testCases.Entries { + if testCase.Params.LogTwoBound%8 != 0 { + t.Logf("skipping test case %d, logTwoBound is not a multiple of 8", testCaseID) + continue + } + if testCase.Params.LogTwoBound > goldilocks.Bits { + t.Logf("skipping test case %d, logTwoBound %d is greater than field bit size (%d)", testCaseID, testCase.Params.LogTwoBound, goldilocks.Bits) + continue + } + t.Logf("logTwoBound = %d, logTwoDegree = %d", testCase.Params.LogTwoBound, testCase.Params.LogTwoDegree) // create the SIS instance sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) @@ -88,7 +90,7 @@ func TestReference(t *testing.T) { } -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { +func TestLimbDecomposeBytes(t *testing.T) { var montConstant goldilocks.Element var bMontConstant big.Int @@ -99,71 +101,22 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { nbElmts := 10 a := make([]goldilocks.Element, nbElmts) for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) + a[i].SetRandom() } logTwoBound := 8 for cc := 0; cc < 3; cc++ { + vr := NewVectorLimbReader(a, logTwoBound/8) m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - } - - var x goldilocks.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := goldilocks.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + m[i][0] = vr.NextLimb() } - logTwoBound *= 2 - } - -} - -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant goldilocks.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, goldilocks.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]goldilocks.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 2 - - for cc := 0; cc < 3; cc++ { - - m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) - m2 := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) - - // the limbs are set as is, they are NOT converted in Montgomery form - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - limbDecomposeBytesSmallBound(buf.Bytes(), m2, logTwoBound, 4, nil) for i := 0; i < len(m); i++ { m[i].Mul(&m[i], &montConstant) - m2[i].Mul(&m2[i], &montConstant) } + var x goldilocks.Element x.SetUint64(1 << logTwoBound) @@ -173,10 +126,6 @@ func TestLimbDecomposeBytesSmallBound(t *testing.T) { if !r.Equal(&a[i]) { t.Fatal("limbDecomposeBytes failed") } - r = eval(m2[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytesSmallBound failed") - } } logTwoBound *= 2 } @@ -249,51 +198,6 @@ func estimateSisTheory(p sisParams) float64 { return float64(r) } -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]goldilocks.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - func BenchmarkSIS(b *testing.B) { // max nb field elements to hash @@ -360,32 +264,6 @@ func benchmarkSIS(b *testing.B, input []goldilocks.Element, sparse bool, logTwoB }) } -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := goldilocks.Bytes; size < 5*goldilocks.Bytes; size += goldilocks.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]goldilocks.Element, size) - mValues := bitset.New(uint(size)) - n := make([]goldilocks.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - func TestUnrolledFFT(t *testing.T) { var shift goldilocks.Element diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index eacfc32c05..626a4b91a0 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -6,25 +6,19 @@ package sis import ( - "bytes" "encoding/binary" "errors" "fmt" "math/bits" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/koalabear" "github.com/consensys/gnark-crypto/field/koalabear/fft" "github.com/consensys/gnark-crypto/internal/parallel" "golang.org/x/crypto/blake2b" ) -// Ring-SIS instance +// RSis is the Ring-SIS instance type RSis struct { - - // buffer storing the data to hash - buffer bytes.Buffer - // Vectors in ℤ_{p}/Xⁿ+1 // A[i] is the i-th polynomial. // Ag the evaluation form of the polynomials in A on the coset √(g) * @@ -36,21 +30,14 @@ type RSis struct { // cf https://hackmd.io/7OODKWQZRRW9RxM5BaXtIw , B >= 3. LogTwoBound int + // d, the degree of X^{d}+1 + Degree int + // domain for the polynomial multiplication Domain *fft.Domain twiddleCosets []koalabear.Element // see FFT64 and precomputeTwiddlesCoset - // d, the degree of X^{d}+1 - Degree int - - // in bytes, represents the maximum number of bytes the .Write(...) will handle; - // ( maximum number of bytes to sum ) - capacity int maxNbElementsToHash int - - // allocate memory once per instance (used in Sum()) - bufM koalabear.Vector - bufMValues *bitset.BitSet } // NewRSis creates an instance of RSis. @@ -61,22 +48,22 @@ type RSis struct { // used to derived n, the number of polynomials in A, and max size of instance's internal buffer. func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*RSis, error) { - if logTwoBound > 64 { + if logTwoBound > 64 || logTwoBound > koalabear.Bits { return nil, errors.New("logTwoBound too large") } + if logTwoBound%8 != 0 { + panic("logTwoBound must be a multiple of 8") + } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") } degree := 1 << logTwoDegree - capacity := maxNbElementsToHash * koalabear.Bytes // n: number of polynomials in A // len(m) == degree * n // with each element in m being logTwoBounds bits from the instance buffer. // that is, to fill m, we need [degree * n * logTwoBound] bits of data - // capacity == [degree * n * logTwoBound] / 8 - // n == (capacity*8)/(degree*logTwoBound) // First n <- #limbs to represent a single field element n := (koalabear.Bytes * 8) / logTwoBound @@ -103,13 +90,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r := &RSis{ LogTwoBound: logTwoBound, - capacity: capacity, Degree: degree, Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]koalabear.Element, n), Ag: make([][]koalabear.Element, n), - bufM: make(koalabear.Vector, degree*n), - bufMValues: bitset.New(uint(n)), maxNbElementsToHash: maxNbElementsToHash, } if r.LogTwoBound == 8 && r.Degree == 64 { @@ -148,67 +132,46 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) } - // TODO @gbotrel check that this is needed. + for i := 0; i < len(res); i++ { + // TODO @gbotrel ensure that this is needed. res[i].SetZero() } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - // reset the buffer - r.buffer.Reset() + fastPath := r.LogTwoBound == 8 && r.Degree == 64 - // write the elements to the buffer - // TODO @gbotrel for now we use a buffer, we will kill it later in the refactoring. - for _, e := range v { - r.buffer.Write(e.Marshal()) - } + reader := NewVectorLimbReader(v, r.LogTwoBound/8) - { - // previous Sum() + kz := make([]koalabear.Element, r.Degree) + k := make([]koalabear.Element, r.Degree) + for i := 0; i < len(r.Ag); i++ { + copy(k, kz) - buf := r.buffer.Bytes() - if len(buf) > r.capacity { - panic("buffer too large") + zero := uint32(0) + for j := 0; j < r.Degree; j++ { + l := reader.NextLimb() + zero |= l + k[j][0] = l } - - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - - // clear the buffers of the instance. - defer r.cleanupBuffers() - - m := r.bufM - mValues := r.bufMValues - - if r.LogTwoBound < 8 && (8%r.LogTwoBound == 0) { - limbDecomposeBytesSmallBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else if r.LogTwoBound >= 8 && (koalabear.Bytes*8)%r.LogTwoBound == 0 { - limbDecomposeBytesMiddleBound(buf, m, r.LogTwoBound, r.Degree, mValues) - } else { - limbDecomposeBytes(buf, m, r.LogTwoBound, r.Degree, mValues) + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + continue } - - // method 1: fft - for i := 0; i < len(r.Ag); i++ { - if !mValues.Test(uint(i)) { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue - } - k := m[i*r.Degree : (i+1)*r.Degree] - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - mulModAcc(res, r.Ag[i], k) + if fastPath { + // fast path. + FFT64(k, r.twiddleCosets) + } else { + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 - - return nil + mulModAcc(res, r.Ag[i], k) } + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + + return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -225,193 +188,76 @@ func mulModAcc(res []koalabear.Element, pLagrangeCosetBitReversed, qLagrangeCose } } -// Returns a clone of the RSis parameters with a fresh and empty buffer. Does not -// mutate the current instance. The keys and the public parameters of the SIS -// instance are not deep-copied. It is useful when we want to hash in parallel. -// Otherwise, we would have to generate an entire RSis for each thread. -func (r *RSis) CopyWithFreshBuffer() RSis { - res := *r - res.buffer = bytes.Buffer{} - res.bufM = make(koalabear.Vector, len(r.bufM)) - res.bufMValues = bitset.New(r.bufMValues.Len()) - return res -} +func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { + var buf [3 + 3*8]byte + copy(buf[:3], "SIS") + binary.BigEndian.PutUint64(buf[3:], uint64(seed)) + binary.BigEndian.PutUint64(buf[11:], uint64(i)) + binary.BigEndian.PutUint64(buf[19:], uint64(j)) -// Cleanup the buffers of the RSis instance -func (r *RSis) cleanupBuffers() { - r.bufMValues.ClearAll() - for i := 0; i < len(r.bufM); i++ { - r.bufM[i].SetZero() - } -} + digest := blake2b.Sum256(buf[:]) -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func LimbDecomposeBytes(buf []byte, m koalabear.Vector, logTwoBound int) { - limbDecomposeBytes(buf, m, logTwoBound, 0, nil) -} + var res koalabear.Element + res.SetBytes(digest[:]) -// decomposes m as by taking chunks of logTwoBound bits at a time. The buffer is interpreted like this: -// [0xa, .. , 0x1 | 0xa ... ] -// -// <- #bytes in a field element -> -// <-0xa is the MSB, 0x1 the LSB-> -// <-we read this chunk from right -// to left -> -// -// This function is called when logTwoBound divides the number of bits used to represent a -// koalabear element. -// From a slice of field elements m:=[a_0, a_1, ...] -// Doing h.Sum(h.Write([Marshal[a_i] for i in len(m)])) is the same than -// writing the a_i in little endian, and then taking logTwoBound bits at a time. -// -// ex: m := [0x1, 0x3] -// in the hash buffer, it is interpreted like that as a stream of bits: -// [100...0 110...0] corresponding to [0x1, 0x3] in little endian, so first bit = LSbit -// then the stream of bits is splitted in chunks of logTwoBound bits. -// -// This function is called when logTwoBound divides 8. -func limbDecomposeBytesSmallBound(buf []byte, m koalabear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - mask := byte((1 << logTwoBound) - 1) - nbChunksPerBytes := 8 / logTwoBound - nbFieldsElmts := len(buf) / koalabear.Bytes - for i := 0; i < nbFieldsElmts; i++ { - for j := koalabear.Bytes - 1; j >= 0; j-- { - curByte := buf[i*koalabear.Bytes+j] - curPos := i*koalabear.Bytes*nbChunksPerBytes + (koalabear.Bytes-1-j)*nbChunksPerBytes - for k := 0; k < nbChunksPerBytes; k++ { - - m[curPos+k][0] = uint32((curByte >> (k * logTwoBound)) & mask) - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curPos+k][0] != 0 && mValues != nil { - mValues.Set(uint((curPos + k) / degree)) - } - } - } - } + return res } -// limbDecomposeBytesMiddleBound same function than limbDecomposeBytesSmallBound, but logTwoBound is -// a multiple of 8, and divides the number of bits of the fields. -func limbDecomposeBytesMiddleBound(buf []byte, m koalabear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - nbFieldsElmts := len(buf) / koalabear.Bytes - nbChunksPerElements := koalabear.Bytes * 8 / logTwoBound - nbBytesInChunk := logTwoBound / 8 - curElmt := 0 - for i := 0; i < nbFieldsElmts; i++ { - for j := nbChunksPerElements; j > 0; j-- { - curPos := i*koalabear.Bytes + j*nbBytesInChunk - for k := 1; k <= nbBytesInChunk; k++ { +// VectorLimbReader reads a vector of field element, limb by limb. +// The elements are interpreted in little endian. +// The limb is also interpreted in little endian. +type VectorLimbReader struct { + v koalabear.Vector + buf [koalabear.Bytes]byte - m[curElmt][0] |= (uint32(buf[curPos-k]) << ((k - 1) * 8)) + i int // position in vector + j int // position in buf - } - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[curElmt][0] != 0 && mValues != nil { - mValues.Set(uint((curElmt) / degree)) - } - curElmt += 1 - } - } + next func(buf []byte, pos *int) uint32 } -// Split an slice of bytes representing an array of serialized field element in -// big-endian form into an array of limbs representing the same field elements -// in little-endian form. Namely, if our field is represented with 64 bits and we -// have the following field element 0x0123456789abcdef (0 being the most significant -// character and and f being the least significant one) and our log norm bound is -// 16 (so 1 hex character = 1 limb). The function assigns the values of m to [f, e, -// d, c, b, a, ..., 3, 2, 1, 0]. m should be preallocated and zeroized. mValues is -// an optional bitSet. If provided, it must be empty. The function will set bit "i" -// to indicate the that i-th SIS input polynomial should be non-zero. Recall, that a -// SIS polynomial corresponds to a chunk of limbs of size `degree`. Additionally, -// we have the guarantee that 2 bits contributing to different field elements cannot -// be part of the same limb. -func limbDecomposeBytes(buf []byte, m koalabear.Vector, logTwoBound, degree int, mValues *bitset.BitSet) { - - // bitwise decomposition of the buffer, in order to build m (the vector to hash) - // as a list of polynomials, whose coefficients are less than r.B bits long. - // Say buf=[0xbe,0x0f]. As a stream of bits it is interpreted like this: - // 10111110 00001111. getIthBit(0)=1 (=leftmost bit), getIthBit(1)=0 (=second leftmost bit), etc. - nbBits := len(buf) * 8 - getIthBit := func(i int) uint8 { - k := i / 8 - if k >= len(buf) { - return 0 - } - b := buf[k] - j := i % 8 - return b >> (7 - j) & 1 +// NewVectorLimbReader creates a new VectorLimbReader +// v: the vector to read +// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { + var next func(buf []byte, pos *int) uint32 + switch limbSize { + case 1: + next = nextUint8 + case 2: + next = nextUint16 + + default: + panic("unsupported limb size") } - - // we process the input buffer by blocks of r.LogTwoBound bits - // each of these block (<< 64bits) are interpreted as a coefficient - mPos := 0 - for fieldStart := 0; fieldStart < nbBits; { - for bitInField := 0; bitInField < koalabear.Bytes*8; { - - j := bitInField % logTwoBound - - // r.LogTwoBound < 64; we just use the first word of our element here, - // and set the bits from LSB to MSB. - at := fieldStart + koalabear.Bytes*8 - bitInField - 1 - - m[mPos][0] |= uint32(getIthBit(at) << j) - - bitInField++ - - // Check if mPos is zero and mark as non-zero in the bitset if not - if m[mPos][0] != 0 && mValues != nil { - mValues.Set(uint(mPos / degree)) - } - - if j == logTwoBound-1 || bitInField == koalabear.Bytes*8 { - mPos++ - } - } - fieldStart += koalabear.Bytes * 8 + return &VectorLimbReader{ + v: v, + j: koalabear.Bytes, + next: next, } } -// see limbDecomposeBytes; this function is optimized for the case where -// logTwoBound == 8 and degree == 64 -func limbDecomposeBytes8_64(buf []byte, m koalabear.Vector, mValues *bitset.BitSet) { - // with logTwoBound == 8, we can actually advance byte per byte. - const degree = 64 - j := 0 - - for startPos := koalabear.Bytes - 1; startPos < len(buf); startPos += koalabear.Bytes { - for i := startPos; i >= startPos-koalabear.Bytes+1; i-- { - - m[j][0] = uint32(buf[i]) - - if m[j][0] != 0 { - mValues.Set(uint(j / degree)) - } - j++ - } +// NextLimb returns the next limb of the vector. +// This does not perform any bound check, may trigger an out of bound panic. +// If underlying vector is "out of limb" +func (vr *VectorLimbReader) NextLimb() uint32 { + if vr.j == koalabear.Bytes { + vr.j = 0 + // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) + koalabear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) + vr.i++ } + return vr.next(vr.buf[:], &vr.j) } -func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { - var buf [3 + 3*8]byte - copy(buf[:3], "SIS") - binary.BigEndian.PutUint64(buf[3:], uint64(seed)) - binary.BigEndian.PutUint64(buf[11:], uint64(i)) - binary.BigEndian.PutUint64(buf[19:], uint64(j)) - - digest := blake2b.Sum256(buf[:]) - - var res koalabear.Element - res.SetBytes(digest[:]) +func nextUint8(buf []byte, pos *int) uint32 { + r := uint32(buf[*pos]) + *pos++ + return r +} - return res +func nextUint16(buf []byte, pos *int) uint32 { + r := uint32(binary.LittleEndian.Uint16(buf[*pos:])) + *pos += 2 + return r } diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index 6d4e135eb4..bc802fc8df 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -6,8 +6,6 @@ package sis import ( - "bytes" - "crypto/rand" "encoding/json" "fmt" "math/big" @@ -16,7 +14,6 @@ import ( "testing" "time" - "github.com/bits-and-blooms/bitset" "github.com/consensys/gnark-crypto/field/koalabear" "github.com/consensys/gnark-crypto/field/koalabear/fft" "github.com/stretchr/testify/require" @@ -27,11 +24,7 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ - {logTwoBound: 2, logTwoDegree: 3}, - {logTwoBound: 4, logTwoDegree: 4}, - {logTwoBound: 6, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 10, logTwoDegree: 6}, {logTwoBound: 16, logTwoDegree: 7}, {logTwoBound: 32, logTwoDegree: 8}, } @@ -65,6 +58,15 @@ func TestReference(t *testing.T) { inputs := testCases.Inputs for testCaseID, testCase := range testCases.Entries { + if testCase.Params.LogTwoBound%8 != 0 { + t.Logf("skipping test case %d, logTwoBound is not a multiple of 8", testCaseID) + continue + } + if testCase.Params.LogTwoBound > koalabear.Bits { + t.Logf("skipping test case %d, logTwoBound %d is greater than field bit size (%d)", testCaseID, testCase.Params.LogTwoBound, koalabear.Bits) + continue + } + t.Logf("logTwoBound = %d, logTwoDegree = %d", testCase.Params.LogTwoBound, testCase.Params.LogTwoDegree) // create the SIS instance sis, err := NewRSis(testCase.Params.Seed, testCase.Params.LogTwoDegree, testCase.Params.LogTwoBound, testCase.Params.MaxNbElementsToHash) @@ -88,7 +90,7 @@ func TestReference(t *testing.T) { } -func TestLimbDecomposeBytesMiddleBound(t *testing.T) { +func TestLimbDecomposeBytes(t *testing.T) { var montConstant koalabear.Element var bMontConstant big.Int @@ -99,71 +101,22 @@ func TestLimbDecomposeBytesMiddleBound(t *testing.T) { nbElmts := 10 a := make([]koalabear.Element, nbElmts) for i := 0; i < nbElmts; i++ { - a[i].SetUint64(33) - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) + a[i].SetRandom() } logTwoBound := 8 - for cc := 0; cc < 3; cc++ { + for cc := 0; cc < 1; cc++ { + vr := NewVectorLimbReader(a, logTwoBound/8) m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - for i := 0; i < len(m); i++ { - m[i].Mul(&m[i], &montConstant) - } - - var x koalabear.Element - x.SetUint64(1 << logTwoBound) - - coeffsPerFieldsElmt := koalabear.Bytes * 8 / logTwoBound - for i := 0; i < nbElmts; i++ { - r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + m[i][0] = vr.NextLimb() } - logTwoBound *= 2 - } - -} - -func TestLimbDecomposeBytesSmallBound(t *testing.T) { - - var montConstant koalabear.Element - var bMontConstant big.Int - bMontConstant.SetUint64(1) - bMontConstant.Lsh(&bMontConstant, koalabear.Bytes*8) - montConstant.SetBigInt(&bMontConstant) - - nbElmts := 10 - a := make([]koalabear.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - - logTwoBound := 2 - - for cc := 0; cc < 3; cc++ { - - m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) - m2 := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) - - // the limbs are set as is, they are NOT converted in Montgomery form - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - limbDecomposeBytesSmallBound(buf.Bytes(), m2, logTwoBound, 4, nil) for i := 0; i < len(m); i++ { m[i].Mul(&m[i], &montConstant) - m2[i].Mul(&m2[i], &montConstant) } + var x koalabear.Element x.SetUint64(1 << logTwoBound) @@ -173,10 +126,6 @@ func TestLimbDecomposeBytesSmallBound(t *testing.T) { if !r.Equal(&a[i]) { t.Fatal("limbDecomposeBytes failed") } - r = eval(m2[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytesSmallBound failed") - } } logTwoBound *= 2 } @@ -249,51 +198,6 @@ func estimateSisTheory(p sisParams) float64 { return float64(r) } -func BenchmarkDecomposition(b *testing.B) { - - nbElmts := 1000 - a := make([]koalabear.Element, nbElmts) - for i := 0; i < nbElmts; i++ { - a[i].SetRandom() - } - var buf bytes.Buffer - for i := 0; i < nbElmts; i++ { - buf.Write(a[i].Marshal()) - } - logTwoBound := 4 - m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) - - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesSmallBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - logTwoBound = 16 - b.Run(fmt.Sprintf("limbDecomposeBytes logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytes(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - - b.Run(fmt.Sprintf("limbDecomposeByteSmallBound logTwoBound=%d", logTwoBound), func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - limbDecomposeBytesMiddleBound(buf.Bytes(), m, logTwoBound, 4, nil) - } - }) - -} - func BenchmarkSIS(b *testing.B) { // max nb field elements to hash @@ -360,32 +264,6 @@ func benchmarkSIS(b *testing.B, input []koalabear.Element, sparse bool, logTwoBo }) } -func TestLimbDecompositionFastPath(t *testing.T) { - assert := require.New(t) - - for size := koalabear.Bytes; size < 5*koalabear.Bytes; size += koalabear.Bytes { - // Test the fast path of limbDecomposeBytes8_64 - buf := make([]byte, size) - m := make([]koalabear.Element, size) - mValues := bitset.New(uint(size)) - n := make([]koalabear.Element, size) - nValues := bitset.New(uint(size)) - - // Generate a random buffer - _, err := rand.Read(buf) - assert.NoError(err) - - limbDecomposeBytes8_64(buf, m, mValues) - limbDecomposeBytes(buf, n, 8, 64, nValues) - - for i := 0; i < size; i++ { - assert.Equal(mValues.Test(uint(i)), nValues.Test(uint(i))) - assert.True(m[i].Equal(&n[i])) - } - } - -} - func TestUnrolledFFT(t *testing.T) { var shift koalabear.Element From d18b49726d038eff6089f43aa02efd8813481b12 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 16:25:54 -0600 Subject: [PATCH 06/25] style: code cleaning --- ecc/bls12-377/fr/sis/sis.go | 3 --- field/babybear/sis/sis.go | 3 --- field/generator/internal/templates/sis/sis.go.tmpl | 3 --- field/goldilocks/sis/sis.go | 3 --- field/koalabear/sis/sis.go | 3 --- 5 files changed, 15 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 569323ebc1..65118357c9 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -125,9 +125,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense func (r *RSis) Hash(v, res []fr.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index eeba906849..ea1f7394c8 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -125,9 +125,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense func (r *RSis) Hash(v, res []babybear.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 837772abcb..7ac18bf9fd 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -127,9 +127,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 0435af6983..5a24c0a8f9 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -125,9 +125,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense func (r *RSis) Hash(v, res []goldilocks.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 626a4b91a0..72260010ee 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -125,9 +125,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 -// -// It is equivalent to calling r.Write(element.Marshal()); outBytes = r.Sum(nil); -// ! note @gbotrel: this is a place holder, may not make sense func (r *RSis) Hash(v, res []koalabear.Element) error { if len(res) != r.Degree { return fmt.Errorf("output vector must have length %d", r.Degree) From 81fe8916c1d156b5c3872dbaf3c7b9ede0fdc128 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 23:28:32 +0000 Subject: [PATCH 07/25] refactor: get rid of fft gen in SIS, fft does it better --- ecc/bls12-377/fr/fft/fft.go | 67 ++- ecc/bls12-377/fr/sis/sis.go | 18 +- ecc/bls12-377/fr/sis/sis_fft.go | 556 ------------------ ecc/bls12-377/fr/sis/sis_test.go | 34 +- ecc/bls12-381/fr/fft/fft.go | 67 ++- ecc/bls24-315/fr/fft/fft.go | 67 ++- ecc/bls24-317/fr/fft/fft.go | 67 ++- ecc/bn254/fr/fft/fft.go | 67 ++- ecc/bw6-633/fr/fft/fft.go | 67 ++- ecc/bw6-761/fr/fft/fft.go | 67 ++- field/babybear/fft/fft.go | 67 ++- field/babybear/sis/sis.go | 18 +- field/babybear/sis/sis_fft.go | 556 ------------------ field/babybear/sis/sis_test.go | 34 +- field/generator/generator_sis.go | 4 +- .../internal/templates/fft/fft.go.tmpl | 56 +- .../internal/templates/sis/fft.go.tmpl | 82 --- .../internal/templates/sis/sis.go.tmpl | 16 +- .../internal/templates/sis/sis.test.go.tmpl | 34 +- field/goldilocks/fft/fft.go | 67 ++- field/goldilocks/sis/sis.go | 18 +- field/goldilocks/sis/sis_fft.go | 556 ------------------ field/goldilocks/sis/sis_test.go | 34 +- field/koalabear/fft/fft.go | 67 ++- field/koalabear/sis/sis.go | 18 +- field/koalabear/sis/sis_fft.go | 556 ------------------ field/koalabear/sis/sis_test.go | 34 +- internal/generator/main.go | 5 - internal/generator/sis/generate.go | 37 -- internal/generator/sis/template/fft.go.tmpl | 82 --- 30 files changed, 651 insertions(+), 2767 deletions(-) delete mode 100644 ecc/bls12-377/fr/sis/sis_fft.go delete mode 100644 field/babybear/sis/sis_fft.go delete mode 100644 field/generator/internal/templates/sis/fft.go.tmpl delete mode 100644 field/goldilocks/sis/sis_fft.go delete mode 100644 field/koalabear/sis/sis_fft.go delete mode 100644 internal/generator/sis/generate.go delete mode 100644 internal/generator/sis/template/fft.go.tmpl diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index e6d228ded7..3c76d4f66c 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 65118357c9..27cc2e466b 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -34,8 +34,7 @@ type RSis struct { Degree int // domain for the polynomial multiplication - Domain *fft.Domain - twiddleCosets []fr.Element // see FFT64 and precomputeTwiddlesCoset + Domain *fft.Domain maxNbElementsToHash int } @@ -96,10 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Ag: make([][]fr.Element, n), maxNbElementsToHash: maxNbElementsToHash, } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } // filling A a := make([]fr.Element, n*r.Degree) @@ -138,8 +133,6 @@ func (r *RSis) Hash(v, res []fr.Element) error { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]fr.Element, r.Degree) @@ -158,12 +151,9 @@ func (r *RSis) Hash(v, res []fr.Element) error { // we can skip this, FFT(0) = 0 continue } - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } + + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[i], k) } r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 diff --git a/ecc/bls12-377/fr/sis/sis_fft.go b/ecc/bls12-377/fr/sis/sis_fft.go deleted file mode 100644 index f4f4db3abb..0000000000 --- a/ecc/bls12-377/fr/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/ecc/bls12-377/fr" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []fr.Element, twiddlesCoset []fr.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - fr.Butterfly(&a[0], &a[32]) - fr.Butterfly(&a[1], &a[33]) - fr.Butterfly(&a[2], &a[34]) - fr.Butterfly(&a[3], &a[35]) - fr.Butterfly(&a[4], &a[36]) - fr.Butterfly(&a[5], &a[37]) - fr.Butterfly(&a[6], &a[38]) - fr.Butterfly(&a[7], &a[39]) - fr.Butterfly(&a[8], &a[40]) - fr.Butterfly(&a[9], &a[41]) - fr.Butterfly(&a[10], &a[42]) - fr.Butterfly(&a[11], &a[43]) - fr.Butterfly(&a[12], &a[44]) - fr.Butterfly(&a[13], &a[45]) - fr.Butterfly(&a[14], &a[46]) - fr.Butterfly(&a[15], &a[47]) - fr.Butterfly(&a[16], &a[48]) - fr.Butterfly(&a[17], &a[49]) - fr.Butterfly(&a[18], &a[50]) - fr.Butterfly(&a[19], &a[51]) - fr.Butterfly(&a[20], &a[52]) - fr.Butterfly(&a[21], &a[53]) - fr.Butterfly(&a[22], &a[54]) - fr.Butterfly(&a[23], &a[55]) - fr.Butterfly(&a[24], &a[56]) - fr.Butterfly(&a[25], &a[57]) - fr.Butterfly(&a[26], &a[58]) - fr.Butterfly(&a[27], &a[59]) - fr.Butterfly(&a[28], &a[60]) - fr.Butterfly(&a[29], &a[61]) - fr.Butterfly(&a[30], &a[62]) - fr.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - fr.Butterfly(&a[0], &a[16]) - fr.Butterfly(&a[1], &a[17]) - fr.Butterfly(&a[2], &a[18]) - fr.Butterfly(&a[3], &a[19]) - fr.Butterfly(&a[4], &a[20]) - fr.Butterfly(&a[5], &a[21]) - fr.Butterfly(&a[6], &a[22]) - fr.Butterfly(&a[7], &a[23]) - fr.Butterfly(&a[8], &a[24]) - fr.Butterfly(&a[9], &a[25]) - fr.Butterfly(&a[10], &a[26]) - fr.Butterfly(&a[11], &a[27]) - fr.Butterfly(&a[12], &a[28]) - fr.Butterfly(&a[13], &a[29]) - fr.Butterfly(&a[14], &a[30]) - fr.Butterfly(&a[15], &a[31]) - fr.Butterfly(&a[32], &a[48]) - fr.Butterfly(&a[33], &a[49]) - fr.Butterfly(&a[34], &a[50]) - fr.Butterfly(&a[35], &a[51]) - fr.Butterfly(&a[36], &a[52]) - fr.Butterfly(&a[37], &a[53]) - fr.Butterfly(&a[38], &a[54]) - fr.Butterfly(&a[39], &a[55]) - fr.Butterfly(&a[40], &a[56]) - fr.Butterfly(&a[41], &a[57]) - fr.Butterfly(&a[42], &a[58]) - fr.Butterfly(&a[43], &a[59]) - fr.Butterfly(&a[44], &a[60]) - fr.Butterfly(&a[45], &a[61]) - fr.Butterfly(&a[46], &a[62]) - fr.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - fr.Butterfly(&a[0], &a[8]) - fr.Butterfly(&a[1], &a[9]) - fr.Butterfly(&a[2], &a[10]) - fr.Butterfly(&a[3], &a[11]) - fr.Butterfly(&a[4], &a[12]) - fr.Butterfly(&a[5], &a[13]) - fr.Butterfly(&a[6], &a[14]) - fr.Butterfly(&a[7], &a[15]) - fr.Butterfly(&a[16], &a[24]) - fr.Butterfly(&a[17], &a[25]) - fr.Butterfly(&a[18], &a[26]) - fr.Butterfly(&a[19], &a[27]) - fr.Butterfly(&a[20], &a[28]) - fr.Butterfly(&a[21], &a[29]) - fr.Butterfly(&a[22], &a[30]) - fr.Butterfly(&a[23], &a[31]) - fr.Butterfly(&a[32], &a[40]) - fr.Butterfly(&a[33], &a[41]) - fr.Butterfly(&a[34], &a[42]) - fr.Butterfly(&a[35], &a[43]) - fr.Butterfly(&a[36], &a[44]) - fr.Butterfly(&a[37], &a[45]) - fr.Butterfly(&a[38], &a[46]) - fr.Butterfly(&a[39], &a[47]) - fr.Butterfly(&a[48], &a[56]) - fr.Butterfly(&a[49], &a[57]) - fr.Butterfly(&a[50], &a[58]) - fr.Butterfly(&a[51], &a[59]) - fr.Butterfly(&a[52], &a[60]) - fr.Butterfly(&a[53], &a[61]) - fr.Butterfly(&a[54], &a[62]) - fr.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - fr.Butterfly(&a[0], &a[4]) - fr.Butterfly(&a[1], &a[5]) - fr.Butterfly(&a[2], &a[6]) - fr.Butterfly(&a[3], &a[7]) - fr.Butterfly(&a[8], &a[12]) - fr.Butterfly(&a[9], &a[13]) - fr.Butterfly(&a[10], &a[14]) - fr.Butterfly(&a[11], &a[15]) - fr.Butterfly(&a[16], &a[20]) - fr.Butterfly(&a[17], &a[21]) - fr.Butterfly(&a[18], &a[22]) - fr.Butterfly(&a[19], &a[23]) - fr.Butterfly(&a[24], &a[28]) - fr.Butterfly(&a[25], &a[29]) - fr.Butterfly(&a[26], &a[30]) - fr.Butterfly(&a[27], &a[31]) - fr.Butterfly(&a[32], &a[36]) - fr.Butterfly(&a[33], &a[37]) - fr.Butterfly(&a[34], &a[38]) - fr.Butterfly(&a[35], &a[39]) - fr.Butterfly(&a[40], &a[44]) - fr.Butterfly(&a[41], &a[45]) - fr.Butterfly(&a[42], &a[46]) - fr.Butterfly(&a[43], &a[47]) - fr.Butterfly(&a[48], &a[52]) - fr.Butterfly(&a[49], &a[53]) - fr.Butterfly(&a[50], &a[54]) - fr.Butterfly(&a[51], &a[55]) - fr.Butterfly(&a[56], &a[60]) - fr.Butterfly(&a[57], &a[61]) - fr.Butterfly(&a[58], &a[62]) - fr.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - fr.Butterfly(&a[0], &a[2]) - fr.Butterfly(&a[1], &a[3]) - fr.Butterfly(&a[4], &a[6]) - fr.Butterfly(&a[5], &a[7]) - fr.Butterfly(&a[8], &a[10]) - fr.Butterfly(&a[9], &a[11]) - fr.Butterfly(&a[12], &a[14]) - fr.Butterfly(&a[13], &a[15]) - fr.Butterfly(&a[16], &a[18]) - fr.Butterfly(&a[17], &a[19]) - fr.Butterfly(&a[20], &a[22]) - fr.Butterfly(&a[21], &a[23]) - fr.Butterfly(&a[24], &a[26]) - fr.Butterfly(&a[25], &a[27]) - fr.Butterfly(&a[28], &a[30]) - fr.Butterfly(&a[29], &a[31]) - fr.Butterfly(&a[32], &a[34]) - fr.Butterfly(&a[33], &a[35]) - fr.Butterfly(&a[36], &a[38]) - fr.Butterfly(&a[37], &a[39]) - fr.Butterfly(&a[40], &a[42]) - fr.Butterfly(&a[41], &a[43]) - fr.Butterfly(&a[44], &a[46]) - fr.Butterfly(&a[45], &a[47]) - fr.Butterfly(&a[48], &a[50]) - fr.Butterfly(&a[49], &a[51]) - fr.Butterfly(&a[52], &a[54]) - fr.Butterfly(&a[53], &a[55]) - fr.Butterfly(&a[56], &a[58]) - fr.Butterfly(&a[57], &a[59]) - fr.Butterfly(&a[60], &a[62]) - fr.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - fr.Butterfly(&a[0], &a[1]) - fr.Butterfly(&a[2], &a[3]) - fr.Butterfly(&a[4], &a[5]) - fr.Butterfly(&a[6], &a[7]) - fr.Butterfly(&a[8], &a[9]) - fr.Butterfly(&a[10], &a[11]) - fr.Butterfly(&a[12], &a[13]) - fr.Butterfly(&a[14], &a[15]) - fr.Butterfly(&a[16], &a[17]) - fr.Butterfly(&a[18], &a[19]) - fr.Butterfly(&a[20], &a[21]) - fr.Butterfly(&a[22], &a[23]) - fr.Butterfly(&a[24], &a[25]) - fr.Butterfly(&a[26], &a[27]) - fr.Butterfly(&a[28], &a[29]) - fr.Butterfly(&a[30], &a[31]) - fr.Butterfly(&a[32], &a[33]) - fr.Butterfly(&a[34], &a[35]) - fr.Butterfly(&a[36], &a[37]) - fr.Butterfly(&a[38], &a[39]) - fr.Butterfly(&a[40], &a[41]) - fr.Butterfly(&a[42], &a[43]) - fr.Butterfly(&a[44], &a[45]) - fr.Butterfly(&a[46], &a[47]) - fr.Butterfly(&a[48], &a[49]) - fr.Butterfly(&a[50], &a[51]) - fr.Butterfly(&a[52], &a[53]) - fr.Butterfly(&a[54], &a[55]) - fr.Butterfly(&a[56], &a[57]) - fr.Butterfly(&a[58], &a[59]) - fr.Butterfly(&a[60], &a[61]) - fr.Butterfly(&a[62], &a[63]) -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { - toReturn := make([]fr.Element, 63) - var r, s fr.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index e6e6cdf608..b94a779836 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -25,8 +25,8 @@ type sisParams struct { var params128Bits []sisParams = []sisParams{ {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, + // {logTwoBound: 16, logTwoDegree: 7}, + // {logTwoBound: 32, logTwoDegree: 8}, } type TestCases struct { @@ -263,33 +263,3 @@ func benchmarkSIS(b *testing.B, input []fr.Element, sparse bool, logTwoBound, lo }) } - -func TestUnrolledFFT(t *testing.T) { - - var shift fr.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]fr.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]fr.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index 0627d85ce4..e7808933c6 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index c88c924889..0a4904eb74 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 1479c3a078..6590c1be9a 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 2d75394897..9aa097620d 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 591372477d..1725b7886e 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index 196949dd57..597251ff1c 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := fr.Vector(a[start:end]) + v2 := fr.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index 3eb5d70427..ffc722fad3 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []babybear.Element, decimation Decimation, opts ...O } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := babybear.Vector(a[start:end]) + v2 := babybear.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index ea1f7394c8..0e65303564 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -34,8 +34,7 @@ type RSis struct { Degree int // domain for the polynomial multiplication - Domain *fft.Domain - twiddleCosets []babybear.Element // see FFT64 and precomputeTwiddlesCoset + Domain *fft.Domain maxNbElementsToHash int } @@ -96,10 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Ag: make([][]babybear.Element, n), maxNbElementsToHash: maxNbElementsToHash, } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } // filling A a := make([]babybear.Element, n*r.Degree) @@ -138,8 +133,6 @@ func (r *RSis) Hash(v, res []babybear.Element) error { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]babybear.Element, r.Degree) @@ -158,12 +151,9 @@ func (r *RSis) Hash(v, res []babybear.Element) error { // we can skip this, FFT(0) = 0 continue } - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } + + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[i], k) } r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 diff --git a/field/babybear/sis/sis_fft.go b/field/babybear/sis/sis_fft.go deleted file mode 100644 index 0b30e84b20..0000000000 --- a/field/babybear/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/babybear" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []babybear.Element, twiddlesCoset []babybear.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - babybear.Butterfly(&a[0], &a[32]) - babybear.Butterfly(&a[1], &a[33]) - babybear.Butterfly(&a[2], &a[34]) - babybear.Butterfly(&a[3], &a[35]) - babybear.Butterfly(&a[4], &a[36]) - babybear.Butterfly(&a[5], &a[37]) - babybear.Butterfly(&a[6], &a[38]) - babybear.Butterfly(&a[7], &a[39]) - babybear.Butterfly(&a[8], &a[40]) - babybear.Butterfly(&a[9], &a[41]) - babybear.Butterfly(&a[10], &a[42]) - babybear.Butterfly(&a[11], &a[43]) - babybear.Butterfly(&a[12], &a[44]) - babybear.Butterfly(&a[13], &a[45]) - babybear.Butterfly(&a[14], &a[46]) - babybear.Butterfly(&a[15], &a[47]) - babybear.Butterfly(&a[16], &a[48]) - babybear.Butterfly(&a[17], &a[49]) - babybear.Butterfly(&a[18], &a[50]) - babybear.Butterfly(&a[19], &a[51]) - babybear.Butterfly(&a[20], &a[52]) - babybear.Butterfly(&a[21], &a[53]) - babybear.Butterfly(&a[22], &a[54]) - babybear.Butterfly(&a[23], &a[55]) - babybear.Butterfly(&a[24], &a[56]) - babybear.Butterfly(&a[25], &a[57]) - babybear.Butterfly(&a[26], &a[58]) - babybear.Butterfly(&a[27], &a[59]) - babybear.Butterfly(&a[28], &a[60]) - babybear.Butterfly(&a[29], &a[61]) - babybear.Butterfly(&a[30], &a[62]) - babybear.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - babybear.Butterfly(&a[0], &a[16]) - babybear.Butterfly(&a[1], &a[17]) - babybear.Butterfly(&a[2], &a[18]) - babybear.Butterfly(&a[3], &a[19]) - babybear.Butterfly(&a[4], &a[20]) - babybear.Butterfly(&a[5], &a[21]) - babybear.Butterfly(&a[6], &a[22]) - babybear.Butterfly(&a[7], &a[23]) - babybear.Butterfly(&a[8], &a[24]) - babybear.Butterfly(&a[9], &a[25]) - babybear.Butterfly(&a[10], &a[26]) - babybear.Butterfly(&a[11], &a[27]) - babybear.Butterfly(&a[12], &a[28]) - babybear.Butterfly(&a[13], &a[29]) - babybear.Butterfly(&a[14], &a[30]) - babybear.Butterfly(&a[15], &a[31]) - babybear.Butterfly(&a[32], &a[48]) - babybear.Butterfly(&a[33], &a[49]) - babybear.Butterfly(&a[34], &a[50]) - babybear.Butterfly(&a[35], &a[51]) - babybear.Butterfly(&a[36], &a[52]) - babybear.Butterfly(&a[37], &a[53]) - babybear.Butterfly(&a[38], &a[54]) - babybear.Butterfly(&a[39], &a[55]) - babybear.Butterfly(&a[40], &a[56]) - babybear.Butterfly(&a[41], &a[57]) - babybear.Butterfly(&a[42], &a[58]) - babybear.Butterfly(&a[43], &a[59]) - babybear.Butterfly(&a[44], &a[60]) - babybear.Butterfly(&a[45], &a[61]) - babybear.Butterfly(&a[46], &a[62]) - babybear.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - babybear.Butterfly(&a[0], &a[8]) - babybear.Butterfly(&a[1], &a[9]) - babybear.Butterfly(&a[2], &a[10]) - babybear.Butterfly(&a[3], &a[11]) - babybear.Butterfly(&a[4], &a[12]) - babybear.Butterfly(&a[5], &a[13]) - babybear.Butterfly(&a[6], &a[14]) - babybear.Butterfly(&a[7], &a[15]) - babybear.Butterfly(&a[16], &a[24]) - babybear.Butterfly(&a[17], &a[25]) - babybear.Butterfly(&a[18], &a[26]) - babybear.Butterfly(&a[19], &a[27]) - babybear.Butterfly(&a[20], &a[28]) - babybear.Butterfly(&a[21], &a[29]) - babybear.Butterfly(&a[22], &a[30]) - babybear.Butterfly(&a[23], &a[31]) - babybear.Butterfly(&a[32], &a[40]) - babybear.Butterfly(&a[33], &a[41]) - babybear.Butterfly(&a[34], &a[42]) - babybear.Butterfly(&a[35], &a[43]) - babybear.Butterfly(&a[36], &a[44]) - babybear.Butterfly(&a[37], &a[45]) - babybear.Butterfly(&a[38], &a[46]) - babybear.Butterfly(&a[39], &a[47]) - babybear.Butterfly(&a[48], &a[56]) - babybear.Butterfly(&a[49], &a[57]) - babybear.Butterfly(&a[50], &a[58]) - babybear.Butterfly(&a[51], &a[59]) - babybear.Butterfly(&a[52], &a[60]) - babybear.Butterfly(&a[53], &a[61]) - babybear.Butterfly(&a[54], &a[62]) - babybear.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - babybear.Butterfly(&a[0], &a[4]) - babybear.Butterfly(&a[1], &a[5]) - babybear.Butterfly(&a[2], &a[6]) - babybear.Butterfly(&a[3], &a[7]) - babybear.Butterfly(&a[8], &a[12]) - babybear.Butterfly(&a[9], &a[13]) - babybear.Butterfly(&a[10], &a[14]) - babybear.Butterfly(&a[11], &a[15]) - babybear.Butterfly(&a[16], &a[20]) - babybear.Butterfly(&a[17], &a[21]) - babybear.Butterfly(&a[18], &a[22]) - babybear.Butterfly(&a[19], &a[23]) - babybear.Butterfly(&a[24], &a[28]) - babybear.Butterfly(&a[25], &a[29]) - babybear.Butterfly(&a[26], &a[30]) - babybear.Butterfly(&a[27], &a[31]) - babybear.Butterfly(&a[32], &a[36]) - babybear.Butterfly(&a[33], &a[37]) - babybear.Butterfly(&a[34], &a[38]) - babybear.Butterfly(&a[35], &a[39]) - babybear.Butterfly(&a[40], &a[44]) - babybear.Butterfly(&a[41], &a[45]) - babybear.Butterfly(&a[42], &a[46]) - babybear.Butterfly(&a[43], &a[47]) - babybear.Butterfly(&a[48], &a[52]) - babybear.Butterfly(&a[49], &a[53]) - babybear.Butterfly(&a[50], &a[54]) - babybear.Butterfly(&a[51], &a[55]) - babybear.Butterfly(&a[56], &a[60]) - babybear.Butterfly(&a[57], &a[61]) - babybear.Butterfly(&a[58], &a[62]) - babybear.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - babybear.Butterfly(&a[0], &a[2]) - babybear.Butterfly(&a[1], &a[3]) - babybear.Butterfly(&a[4], &a[6]) - babybear.Butterfly(&a[5], &a[7]) - babybear.Butterfly(&a[8], &a[10]) - babybear.Butterfly(&a[9], &a[11]) - babybear.Butterfly(&a[12], &a[14]) - babybear.Butterfly(&a[13], &a[15]) - babybear.Butterfly(&a[16], &a[18]) - babybear.Butterfly(&a[17], &a[19]) - babybear.Butterfly(&a[20], &a[22]) - babybear.Butterfly(&a[21], &a[23]) - babybear.Butterfly(&a[24], &a[26]) - babybear.Butterfly(&a[25], &a[27]) - babybear.Butterfly(&a[28], &a[30]) - babybear.Butterfly(&a[29], &a[31]) - babybear.Butterfly(&a[32], &a[34]) - babybear.Butterfly(&a[33], &a[35]) - babybear.Butterfly(&a[36], &a[38]) - babybear.Butterfly(&a[37], &a[39]) - babybear.Butterfly(&a[40], &a[42]) - babybear.Butterfly(&a[41], &a[43]) - babybear.Butterfly(&a[44], &a[46]) - babybear.Butterfly(&a[45], &a[47]) - babybear.Butterfly(&a[48], &a[50]) - babybear.Butterfly(&a[49], &a[51]) - babybear.Butterfly(&a[52], &a[54]) - babybear.Butterfly(&a[53], &a[55]) - babybear.Butterfly(&a[56], &a[58]) - babybear.Butterfly(&a[57], &a[59]) - babybear.Butterfly(&a[60], &a[62]) - babybear.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - babybear.Butterfly(&a[0], &a[1]) - babybear.Butterfly(&a[2], &a[3]) - babybear.Butterfly(&a[4], &a[5]) - babybear.Butterfly(&a[6], &a[7]) - babybear.Butterfly(&a[8], &a[9]) - babybear.Butterfly(&a[10], &a[11]) - babybear.Butterfly(&a[12], &a[13]) - babybear.Butterfly(&a[14], &a[15]) - babybear.Butterfly(&a[16], &a[17]) - babybear.Butterfly(&a[18], &a[19]) - babybear.Butterfly(&a[20], &a[21]) - babybear.Butterfly(&a[22], &a[23]) - babybear.Butterfly(&a[24], &a[25]) - babybear.Butterfly(&a[26], &a[27]) - babybear.Butterfly(&a[28], &a[29]) - babybear.Butterfly(&a[30], &a[31]) - babybear.Butterfly(&a[32], &a[33]) - babybear.Butterfly(&a[34], &a[35]) - babybear.Butterfly(&a[36], &a[37]) - babybear.Butterfly(&a[38], &a[39]) - babybear.Butterfly(&a[40], &a[41]) - babybear.Butterfly(&a[42], &a[43]) - babybear.Butterfly(&a[44], &a[45]) - babybear.Butterfly(&a[46], &a[47]) - babybear.Butterfly(&a[48], &a[49]) - babybear.Butterfly(&a[50], &a[51]) - babybear.Butterfly(&a[52], &a[53]) - babybear.Butterfly(&a[54], &a[55]) - babybear.Butterfly(&a[56], &a[57]) - babybear.Butterfly(&a[58], &a[59]) - babybear.Butterfly(&a[60], &a[61]) - babybear.Butterfly(&a[62], &a[63]) -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter babybear.Element) []babybear.Element { - toReturn := make([]babybear.Element, 63) - var r, s babybear.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 9d973961fd..54e5ac4d5b 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -25,8 +25,8 @@ type sisParams struct { var params128Bits []sisParams = []sisParams{ {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, + // {logTwoBound: 16, logTwoDegree: 7}, + // {logTwoBound: 32, logTwoDegree: 8}, } type TestCases struct { @@ -263,33 +263,3 @@ func benchmarkSIS(b *testing.B, input []babybear.Element, sparse bool, logTwoBou }) } - -func TestUnrolledFFT(t *testing.T) { - - var shift babybear.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]babybear.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]babybear.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/field/generator/generator_sis.go b/field/generator/generator_sis.go index f8f456b29c..d4f251f712 100644 --- a/field/generator/generator_sis.go +++ b/field/generator/generator_sis.go @@ -1,6 +1,7 @@ package generator import ( + "os" "path/filepath" "github.com/consensys/bavard" @@ -17,11 +18,12 @@ func generateSIS(F *config.Field, outputDir string) error { outputDir = filepath.Join(outputDir, "sis") entries := []bavard.Entry{ - {File: filepath.Join(outputDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}, {File: filepath.Join(outputDir, "sis.go"), Templates: []string{"sis.go.tmpl"}}, {File: filepath.Join(outputDir, "sis_test.go"), Templates: []string{"sis.test.go.tmpl"}}, } + os.Remove(filepath.Join(outputDir, "sis_fft.go")) + funcs := make(map[string]interface{}) funcs["bitReverse"] = bitReverse diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index b143e0c969..58a6062fa5 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,6 +11,9 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} +{{ $sizeKernel2Log2 := 5}} +{{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} + // Decimation is used in the FFT call to select decimation in time or in frequency type Decimation uint8 @@ -57,9 +60,9 @@ func (domain *Domain) FFT(a []{{ .FF }}.Element, decimation Decimation, opts ... } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := {{.FF}}.Vector(a[start:end]) + v2 := {{.FF}}.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +202,15 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel}} { + kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel2}} { + kerDIFNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -283,10 +292,17 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel2}} { + kerDITNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel}} { + kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -358,15 +374,17 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta } } +{{genKernel $.FF $sizeKernel $sizeKernelLog2}} +{{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} +{{define "genKernel FF sizeKernel sizeKernelLog2"}} -func kerDIFNP_{{$sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) { +func kerDIFNP_{{.sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - {{ $n := shl 1 $sizeKernelLog2}} + {{ $n := shl 1 .sizeKernelLog2}} {{ $m := div $n 2}} {{ $split := 1}} - {{- range $step := iterate 0 $sizeKernelLog2}} + {{- range $step := iterate 0 .sizeKernelLog2}} {{- $offset := 0}} {{- $bound := mul $split $n}} @@ -389,13 +407,12 @@ func kerDIFNP_{{$sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Elem } -func kerDITNP_{{$sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) { +func kerDITNP_{{.sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - {{ $n := 2}} {{ $m := div $n 2}} - {{ $split := div (shl 1 $sizeKernelLog2) 2}} - {{- range $step := reverse (iterate 0 $sizeKernelLog2)}} + {{ $split := div (shl 1 .sizeKernelLog2) 2}} + {{- range $step := reverse (iterate 0 .sizeKernelLog2)}} {{- $offset := 0}} {{- $bound := mul $split $n}} @@ -416,3 +433,8 @@ func kerDITNP_{{$sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Elem {{- $split = div $split 2}} {{- end}} } + +{{end}} + + + diff --git a/field/generator/internal/templates/sis/fft.go.tmpl b/field/generator/internal/templates/sis/fft.go.tmpl deleted file mode 100644 index 777eb8610a..0000000000 --- a/field/generator/internal/templates/sis/fft.go.tmpl +++ /dev/null @@ -1,82 +0,0 @@ -import ( - "{{ .FieldPackagePath }}" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []{{ .FF }}.Element, twiddlesCoset []{{ .FF }}.Element) { - - {{- /* notes: - this function can be updated with larger n - nbSteps must be updated too such as 1 << nbSteps == n - butterflies and multiplication are separated for size n = 8, must check perf for larger n - */}} - {{$tIndex := 0}} - {{ $n := 64}} - {{ $m := div $n 2}} - {{ $split := 1}} - {{ $split = div $split 1}} - {{- range $step := reverse (iterate 0 6)}} - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- range $i := iterate 0 $m}} - {{- $j := add $i $offset}} - {{- $k := add $j $m}} - a[{{$k}}].Mul(&a[{{$k}}], &twiddlesCoset[{{$tIndex}}]) - {{- end}} - {{- $offset = add $offset $n}} - {{- $tIndex = add $tIndex 1}} - {{- end}} - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- range $i := iterate 0 $m}} - {{- $j := add $i $offset}} - {{- $k := add $j $m}} - {{ $.FF }}.Butterfly(&a[{{$j}}], &a[{{$k}}]) - {{- end}} - {{- $offset = add $offset $n}} - {{- end}} - - {{- $n = div $n 2}} - {{- $m = div $n 2}} - {{- $split = mul $split 2}} - {{- end}} -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter {{ .FF }}.Element) []{{ .FF }}.Element { - toReturn := make([]{{ .FF }}.Element, 63) - var r, s {{ .FF }}.Element - e := new(big.Int) - {{ $n := 64}} - {{ $m := div $n 2}} - {{ $split := 1}} - {{ $split = div $split 1}} - {{ $j := 0}} - {{- range $step := reverse (iterate 0 6)}} - s = shifter - for k:=0; k <{{$step}};k++ { - s.Square(&s) - } - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- $exp := bitReverse $split $s}} - {{- if eq $exp 0}} - toReturn[{{$j}}] = s - {{- else}} - r.Exp(generator, e.SetUint64(uint64(1<<{{$step}} * {{$exp}}))) - toReturn[{{$j}}].Mul(&r, &s) - {{- end}} - {{- $j = add $j 1}} - {{- end}} - - {{- $split = mul $split 2}} - {{- end}} - return toReturn -} \ No newline at end of file diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 7ac18bf9fd..674624219e 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -35,10 +35,8 @@ type RSis struct { // domain for the polynomial multiplication Domain *fft.Domain - twiddleCosets []{{ .FF }}.Element // see FFT64 and precomputeTwiddlesCoset maxNbElementsToHash int - } // NewRSis creates an instance of RSis. @@ -97,10 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Ag: make([][]{{ .FF }}.Element, n), maxNbElementsToHash: maxNbElementsToHash, } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } // filling A a := make([]{{ .FF }}.Element, n*r.Degree) @@ -140,7 +134,6 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 reader := NewVectorLimbReader(v, r.LogTwoBound/8) @@ -160,12 +153,9 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { // we can skip this, FFT(0) = 0 continue } - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } + + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[i], k) } r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index ce4bd51257..44073b0e94 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -18,8 +18,8 @@ type sisParams struct { var params128Bits []sisParams = []sisParams{ {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, + // {logTwoBound: 16, logTwoDegree: 7}, + // {logTwoBound: 32, logTwoDegree: 8}, } type TestCases struct { @@ -260,33 +260,3 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo }) } - -func TestUnrolledFFT(t *testing.T) { - - var shift {{ .FF }}.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]{{ .FF }}.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]{{ .FF }}.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 966f03457a..35c77f53de 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []goldilocks.Element, decimation Decimation, opts .. } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := goldilocks.Vector(a[start:end]) + v2 := goldilocks.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 5a24c0a8f9..d461ae711e 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -34,8 +34,7 @@ type RSis struct { Degree int // domain for the polynomial multiplication - Domain *fft.Domain - twiddleCosets []goldilocks.Element // see FFT64 and precomputeTwiddlesCoset + Domain *fft.Domain maxNbElementsToHash int } @@ -96,10 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Ag: make([][]goldilocks.Element, n), maxNbElementsToHash: maxNbElementsToHash, } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } // filling A a := make([]goldilocks.Element, n*r.Degree) @@ -138,8 +133,6 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]goldilocks.Element, r.Degree) @@ -158,12 +151,9 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { // we can skip this, FFT(0) = 0 continue } - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } + + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[i], k) } r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 diff --git a/field/goldilocks/sis/sis_fft.go b/field/goldilocks/sis/sis_fft.go deleted file mode 100644 index 321a84e905..0000000000 --- a/field/goldilocks/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/goldilocks" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - goldilocks.Butterfly(&a[0], &a[32]) - goldilocks.Butterfly(&a[1], &a[33]) - goldilocks.Butterfly(&a[2], &a[34]) - goldilocks.Butterfly(&a[3], &a[35]) - goldilocks.Butterfly(&a[4], &a[36]) - goldilocks.Butterfly(&a[5], &a[37]) - goldilocks.Butterfly(&a[6], &a[38]) - goldilocks.Butterfly(&a[7], &a[39]) - goldilocks.Butterfly(&a[8], &a[40]) - goldilocks.Butterfly(&a[9], &a[41]) - goldilocks.Butterfly(&a[10], &a[42]) - goldilocks.Butterfly(&a[11], &a[43]) - goldilocks.Butterfly(&a[12], &a[44]) - goldilocks.Butterfly(&a[13], &a[45]) - goldilocks.Butterfly(&a[14], &a[46]) - goldilocks.Butterfly(&a[15], &a[47]) - goldilocks.Butterfly(&a[16], &a[48]) - goldilocks.Butterfly(&a[17], &a[49]) - goldilocks.Butterfly(&a[18], &a[50]) - goldilocks.Butterfly(&a[19], &a[51]) - goldilocks.Butterfly(&a[20], &a[52]) - goldilocks.Butterfly(&a[21], &a[53]) - goldilocks.Butterfly(&a[22], &a[54]) - goldilocks.Butterfly(&a[23], &a[55]) - goldilocks.Butterfly(&a[24], &a[56]) - goldilocks.Butterfly(&a[25], &a[57]) - goldilocks.Butterfly(&a[26], &a[58]) - goldilocks.Butterfly(&a[27], &a[59]) - goldilocks.Butterfly(&a[28], &a[60]) - goldilocks.Butterfly(&a[29], &a[61]) - goldilocks.Butterfly(&a[30], &a[62]) - goldilocks.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - goldilocks.Butterfly(&a[0], &a[16]) - goldilocks.Butterfly(&a[1], &a[17]) - goldilocks.Butterfly(&a[2], &a[18]) - goldilocks.Butterfly(&a[3], &a[19]) - goldilocks.Butterfly(&a[4], &a[20]) - goldilocks.Butterfly(&a[5], &a[21]) - goldilocks.Butterfly(&a[6], &a[22]) - goldilocks.Butterfly(&a[7], &a[23]) - goldilocks.Butterfly(&a[8], &a[24]) - goldilocks.Butterfly(&a[9], &a[25]) - goldilocks.Butterfly(&a[10], &a[26]) - goldilocks.Butterfly(&a[11], &a[27]) - goldilocks.Butterfly(&a[12], &a[28]) - goldilocks.Butterfly(&a[13], &a[29]) - goldilocks.Butterfly(&a[14], &a[30]) - goldilocks.Butterfly(&a[15], &a[31]) - goldilocks.Butterfly(&a[32], &a[48]) - goldilocks.Butterfly(&a[33], &a[49]) - goldilocks.Butterfly(&a[34], &a[50]) - goldilocks.Butterfly(&a[35], &a[51]) - goldilocks.Butterfly(&a[36], &a[52]) - goldilocks.Butterfly(&a[37], &a[53]) - goldilocks.Butterfly(&a[38], &a[54]) - goldilocks.Butterfly(&a[39], &a[55]) - goldilocks.Butterfly(&a[40], &a[56]) - goldilocks.Butterfly(&a[41], &a[57]) - goldilocks.Butterfly(&a[42], &a[58]) - goldilocks.Butterfly(&a[43], &a[59]) - goldilocks.Butterfly(&a[44], &a[60]) - goldilocks.Butterfly(&a[45], &a[61]) - goldilocks.Butterfly(&a[46], &a[62]) - goldilocks.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - goldilocks.Butterfly(&a[0], &a[8]) - goldilocks.Butterfly(&a[1], &a[9]) - goldilocks.Butterfly(&a[2], &a[10]) - goldilocks.Butterfly(&a[3], &a[11]) - goldilocks.Butterfly(&a[4], &a[12]) - goldilocks.Butterfly(&a[5], &a[13]) - goldilocks.Butterfly(&a[6], &a[14]) - goldilocks.Butterfly(&a[7], &a[15]) - goldilocks.Butterfly(&a[16], &a[24]) - goldilocks.Butterfly(&a[17], &a[25]) - goldilocks.Butterfly(&a[18], &a[26]) - goldilocks.Butterfly(&a[19], &a[27]) - goldilocks.Butterfly(&a[20], &a[28]) - goldilocks.Butterfly(&a[21], &a[29]) - goldilocks.Butterfly(&a[22], &a[30]) - goldilocks.Butterfly(&a[23], &a[31]) - goldilocks.Butterfly(&a[32], &a[40]) - goldilocks.Butterfly(&a[33], &a[41]) - goldilocks.Butterfly(&a[34], &a[42]) - goldilocks.Butterfly(&a[35], &a[43]) - goldilocks.Butterfly(&a[36], &a[44]) - goldilocks.Butterfly(&a[37], &a[45]) - goldilocks.Butterfly(&a[38], &a[46]) - goldilocks.Butterfly(&a[39], &a[47]) - goldilocks.Butterfly(&a[48], &a[56]) - goldilocks.Butterfly(&a[49], &a[57]) - goldilocks.Butterfly(&a[50], &a[58]) - goldilocks.Butterfly(&a[51], &a[59]) - goldilocks.Butterfly(&a[52], &a[60]) - goldilocks.Butterfly(&a[53], &a[61]) - goldilocks.Butterfly(&a[54], &a[62]) - goldilocks.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - goldilocks.Butterfly(&a[0], &a[4]) - goldilocks.Butterfly(&a[1], &a[5]) - goldilocks.Butterfly(&a[2], &a[6]) - goldilocks.Butterfly(&a[3], &a[7]) - goldilocks.Butterfly(&a[8], &a[12]) - goldilocks.Butterfly(&a[9], &a[13]) - goldilocks.Butterfly(&a[10], &a[14]) - goldilocks.Butterfly(&a[11], &a[15]) - goldilocks.Butterfly(&a[16], &a[20]) - goldilocks.Butterfly(&a[17], &a[21]) - goldilocks.Butterfly(&a[18], &a[22]) - goldilocks.Butterfly(&a[19], &a[23]) - goldilocks.Butterfly(&a[24], &a[28]) - goldilocks.Butterfly(&a[25], &a[29]) - goldilocks.Butterfly(&a[26], &a[30]) - goldilocks.Butterfly(&a[27], &a[31]) - goldilocks.Butterfly(&a[32], &a[36]) - goldilocks.Butterfly(&a[33], &a[37]) - goldilocks.Butterfly(&a[34], &a[38]) - goldilocks.Butterfly(&a[35], &a[39]) - goldilocks.Butterfly(&a[40], &a[44]) - goldilocks.Butterfly(&a[41], &a[45]) - goldilocks.Butterfly(&a[42], &a[46]) - goldilocks.Butterfly(&a[43], &a[47]) - goldilocks.Butterfly(&a[48], &a[52]) - goldilocks.Butterfly(&a[49], &a[53]) - goldilocks.Butterfly(&a[50], &a[54]) - goldilocks.Butterfly(&a[51], &a[55]) - goldilocks.Butterfly(&a[56], &a[60]) - goldilocks.Butterfly(&a[57], &a[61]) - goldilocks.Butterfly(&a[58], &a[62]) - goldilocks.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - goldilocks.Butterfly(&a[0], &a[2]) - goldilocks.Butterfly(&a[1], &a[3]) - goldilocks.Butterfly(&a[4], &a[6]) - goldilocks.Butterfly(&a[5], &a[7]) - goldilocks.Butterfly(&a[8], &a[10]) - goldilocks.Butterfly(&a[9], &a[11]) - goldilocks.Butterfly(&a[12], &a[14]) - goldilocks.Butterfly(&a[13], &a[15]) - goldilocks.Butterfly(&a[16], &a[18]) - goldilocks.Butterfly(&a[17], &a[19]) - goldilocks.Butterfly(&a[20], &a[22]) - goldilocks.Butterfly(&a[21], &a[23]) - goldilocks.Butterfly(&a[24], &a[26]) - goldilocks.Butterfly(&a[25], &a[27]) - goldilocks.Butterfly(&a[28], &a[30]) - goldilocks.Butterfly(&a[29], &a[31]) - goldilocks.Butterfly(&a[32], &a[34]) - goldilocks.Butterfly(&a[33], &a[35]) - goldilocks.Butterfly(&a[36], &a[38]) - goldilocks.Butterfly(&a[37], &a[39]) - goldilocks.Butterfly(&a[40], &a[42]) - goldilocks.Butterfly(&a[41], &a[43]) - goldilocks.Butterfly(&a[44], &a[46]) - goldilocks.Butterfly(&a[45], &a[47]) - goldilocks.Butterfly(&a[48], &a[50]) - goldilocks.Butterfly(&a[49], &a[51]) - goldilocks.Butterfly(&a[52], &a[54]) - goldilocks.Butterfly(&a[53], &a[55]) - goldilocks.Butterfly(&a[56], &a[58]) - goldilocks.Butterfly(&a[57], &a[59]) - goldilocks.Butterfly(&a[60], &a[62]) - goldilocks.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - goldilocks.Butterfly(&a[0], &a[1]) - goldilocks.Butterfly(&a[2], &a[3]) - goldilocks.Butterfly(&a[4], &a[5]) - goldilocks.Butterfly(&a[6], &a[7]) - goldilocks.Butterfly(&a[8], &a[9]) - goldilocks.Butterfly(&a[10], &a[11]) - goldilocks.Butterfly(&a[12], &a[13]) - goldilocks.Butterfly(&a[14], &a[15]) - goldilocks.Butterfly(&a[16], &a[17]) - goldilocks.Butterfly(&a[18], &a[19]) - goldilocks.Butterfly(&a[20], &a[21]) - goldilocks.Butterfly(&a[22], &a[23]) - goldilocks.Butterfly(&a[24], &a[25]) - goldilocks.Butterfly(&a[26], &a[27]) - goldilocks.Butterfly(&a[28], &a[29]) - goldilocks.Butterfly(&a[30], &a[31]) - goldilocks.Butterfly(&a[32], &a[33]) - goldilocks.Butterfly(&a[34], &a[35]) - goldilocks.Butterfly(&a[36], &a[37]) - goldilocks.Butterfly(&a[38], &a[39]) - goldilocks.Butterfly(&a[40], &a[41]) - goldilocks.Butterfly(&a[42], &a[43]) - goldilocks.Butterfly(&a[44], &a[45]) - goldilocks.Butterfly(&a[46], &a[47]) - goldilocks.Butterfly(&a[48], &a[49]) - goldilocks.Butterfly(&a[50], &a[51]) - goldilocks.Butterfly(&a[52], &a[53]) - goldilocks.Butterfly(&a[54], &a[55]) - goldilocks.Butterfly(&a[56], &a[57]) - goldilocks.Butterfly(&a[58], &a[59]) - goldilocks.Butterfly(&a[60], &a[61]) - goldilocks.Butterfly(&a[62], &a[63]) -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter goldilocks.Element) []goldilocks.Element { - toReturn := make([]goldilocks.Element, 63) - var r, s goldilocks.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index fd8f056b00..66ef08a61b 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -25,8 +25,8 @@ type sisParams struct { var params128Bits []sisParams = []sisParams{ {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, + // {logTwoBound: 16, logTwoDegree: 7}, + // {logTwoBound: 32, logTwoDegree: 8}, } type TestCases struct { @@ -263,33 +263,3 @@ func benchmarkSIS(b *testing.B, input []goldilocks.Element, sparse bool, logTwoB }) } - -func TestUnrolledFFT(t *testing.T) { - - var shift goldilocks.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]goldilocks.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]goldilocks.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index 157d9e3872..391bec73f0 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -60,9 +60,9 @@ func (domain *Domain) FFT(a []koalabear.Element, decimation Decimation, opts ... } else { if domain.withPrecompute { parallel.Execute(len(a), func(start, end int) { - for i := start; i < end; i++ { - a[i].Mul(&a[i], &domain.cosetTable[i]) - } + v1 := koalabear.Vector(a[start:end]) + v2 := koalabear.Vector(domain.cosetTable[start:end]) + v1.Mul(v1, v2) }, opt.nbTasks) } else { c := domain.FrMultiplicativeGen @@ -199,9 +199,15 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -281,10 +287,17 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } + m := n >> 1 nextStage := stage + 1 @@ -408,3 +421,39 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 72260010ee..ff18823efd 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -34,8 +34,7 @@ type RSis struct { Degree int // domain for the polynomial multiplication - Domain *fft.Domain - twiddleCosets []koalabear.Element // see FFT64 and precomputeTwiddlesCoset + Domain *fft.Domain maxNbElementsToHash int } @@ -96,10 +95,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Ag: make([][]koalabear.Element, n), maxNbElementsToHash: maxNbElementsToHash, } - if r.LogTwoBound == 8 && r.Degree == 64 { - // TODO @gbotrel fixme, that's dirty. - r.twiddleCosets = PrecomputeTwiddlesCoset(r.Domain.Generator, r.Domain.FrMultiplicativeGen) - } // filling A a := make([]koalabear.Element, n*r.Degree) @@ -138,8 +133,6 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } - fastPath := r.LogTwoBound == 8 && r.Degree == 64 - reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]koalabear.Element, r.Degree) @@ -158,12 +151,9 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { // we can skip this, FFT(0) = 0 continue } - if fastPath { - // fast path. - FFT64(k, r.twiddleCosets) - } else { - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } + + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[i], k) } r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 diff --git a/field/koalabear/sis/sis_fft.go b/field/koalabear/sis/sis_fft.go deleted file mode 100644 index 7706135c9c..0000000000 --- a/field/koalabear/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/koalabear" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - koalabear.Butterfly(&a[0], &a[32]) - koalabear.Butterfly(&a[1], &a[33]) - koalabear.Butterfly(&a[2], &a[34]) - koalabear.Butterfly(&a[3], &a[35]) - koalabear.Butterfly(&a[4], &a[36]) - koalabear.Butterfly(&a[5], &a[37]) - koalabear.Butterfly(&a[6], &a[38]) - koalabear.Butterfly(&a[7], &a[39]) - koalabear.Butterfly(&a[8], &a[40]) - koalabear.Butterfly(&a[9], &a[41]) - koalabear.Butterfly(&a[10], &a[42]) - koalabear.Butterfly(&a[11], &a[43]) - koalabear.Butterfly(&a[12], &a[44]) - koalabear.Butterfly(&a[13], &a[45]) - koalabear.Butterfly(&a[14], &a[46]) - koalabear.Butterfly(&a[15], &a[47]) - koalabear.Butterfly(&a[16], &a[48]) - koalabear.Butterfly(&a[17], &a[49]) - koalabear.Butterfly(&a[18], &a[50]) - koalabear.Butterfly(&a[19], &a[51]) - koalabear.Butterfly(&a[20], &a[52]) - koalabear.Butterfly(&a[21], &a[53]) - koalabear.Butterfly(&a[22], &a[54]) - koalabear.Butterfly(&a[23], &a[55]) - koalabear.Butterfly(&a[24], &a[56]) - koalabear.Butterfly(&a[25], &a[57]) - koalabear.Butterfly(&a[26], &a[58]) - koalabear.Butterfly(&a[27], &a[59]) - koalabear.Butterfly(&a[28], &a[60]) - koalabear.Butterfly(&a[29], &a[61]) - koalabear.Butterfly(&a[30], &a[62]) - koalabear.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - koalabear.Butterfly(&a[0], &a[16]) - koalabear.Butterfly(&a[1], &a[17]) - koalabear.Butterfly(&a[2], &a[18]) - koalabear.Butterfly(&a[3], &a[19]) - koalabear.Butterfly(&a[4], &a[20]) - koalabear.Butterfly(&a[5], &a[21]) - koalabear.Butterfly(&a[6], &a[22]) - koalabear.Butterfly(&a[7], &a[23]) - koalabear.Butterfly(&a[8], &a[24]) - koalabear.Butterfly(&a[9], &a[25]) - koalabear.Butterfly(&a[10], &a[26]) - koalabear.Butterfly(&a[11], &a[27]) - koalabear.Butterfly(&a[12], &a[28]) - koalabear.Butterfly(&a[13], &a[29]) - koalabear.Butterfly(&a[14], &a[30]) - koalabear.Butterfly(&a[15], &a[31]) - koalabear.Butterfly(&a[32], &a[48]) - koalabear.Butterfly(&a[33], &a[49]) - koalabear.Butterfly(&a[34], &a[50]) - koalabear.Butterfly(&a[35], &a[51]) - koalabear.Butterfly(&a[36], &a[52]) - koalabear.Butterfly(&a[37], &a[53]) - koalabear.Butterfly(&a[38], &a[54]) - koalabear.Butterfly(&a[39], &a[55]) - koalabear.Butterfly(&a[40], &a[56]) - koalabear.Butterfly(&a[41], &a[57]) - koalabear.Butterfly(&a[42], &a[58]) - koalabear.Butterfly(&a[43], &a[59]) - koalabear.Butterfly(&a[44], &a[60]) - koalabear.Butterfly(&a[45], &a[61]) - koalabear.Butterfly(&a[46], &a[62]) - koalabear.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - koalabear.Butterfly(&a[0], &a[8]) - koalabear.Butterfly(&a[1], &a[9]) - koalabear.Butterfly(&a[2], &a[10]) - koalabear.Butterfly(&a[3], &a[11]) - koalabear.Butterfly(&a[4], &a[12]) - koalabear.Butterfly(&a[5], &a[13]) - koalabear.Butterfly(&a[6], &a[14]) - koalabear.Butterfly(&a[7], &a[15]) - koalabear.Butterfly(&a[16], &a[24]) - koalabear.Butterfly(&a[17], &a[25]) - koalabear.Butterfly(&a[18], &a[26]) - koalabear.Butterfly(&a[19], &a[27]) - koalabear.Butterfly(&a[20], &a[28]) - koalabear.Butterfly(&a[21], &a[29]) - koalabear.Butterfly(&a[22], &a[30]) - koalabear.Butterfly(&a[23], &a[31]) - koalabear.Butterfly(&a[32], &a[40]) - koalabear.Butterfly(&a[33], &a[41]) - koalabear.Butterfly(&a[34], &a[42]) - koalabear.Butterfly(&a[35], &a[43]) - koalabear.Butterfly(&a[36], &a[44]) - koalabear.Butterfly(&a[37], &a[45]) - koalabear.Butterfly(&a[38], &a[46]) - koalabear.Butterfly(&a[39], &a[47]) - koalabear.Butterfly(&a[48], &a[56]) - koalabear.Butterfly(&a[49], &a[57]) - koalabear.Butterfly(&a[50], &a[58]) - koalabear.Butterfly(&a[51], &a[59]) - koalabear.Butterfly(&a[52], &a[60]) - koalabear.Butterfly(&a[53], &a[61]) - koalabear.Butterfly(&a[54], &a[62]) - koalabear.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - koalabear.Butterfly(&a[0], &a[4]) - koalabear.Butterfly(&a[1], &a[5]) - koalabear.Butterfly(&a[2], &a[6]) - koalabear.Butterfly(&a[3], &a[7]) - koalabear.Butterfly(&a[8], &a[12]) - koalabear.Butterfly(&a[9], &a[13]) - koalabear.Butterfly(&a[10], &a[14]) - koalabear.Butterfly(&a[11], &a[15]) - koalabear.Butterfly(&a[16], &a[20]) - koalabear.Butterfly(&a[17], &a[21]) - koalabear.Butterfly(&a[18], &a[22]) - koalabear.Butterfly(&a[19], &a[23]) - koalabear.Butterfly(&a[24], &a[28]) - koalabear.Butterfly(&a[25], &a[29]) - koalabear.Butterfly(&a[26], &a[30]) - koalabear.Butterfly(&a[27], &a[31]) - koalabear.Butterfly(&a[32], &a[36]) - koalabear.Butterfly(&a[33], &a[37]) - koalabear.Butterfly(&a[34], &a[38]) - koalabear.Butterfly(&a[35], &a[39]) - koalabear.Butterfly(&a[40], &a[44]) - koalabear.Butterfly(&a[41], &a[45]) - koalabear.Butterfly(&a[42], &a[46]) - koalabear.Butterfly(&a[43], &a[47]) - koalabear.Butterfly(&a[48], &a[52]) - koalabear.Butterfly(&a[49], &a[53]) - koalabear.Butterfly(&a[50], &a[54]) - koalabear.Butterfly(&a[51], &a[55]) - koalabear.Butterfly(&a[56], &a[60]) - koalabear.Butterfly(&a[57], &a[61]) - koalabear.Butterfly(&a[58], &a[62]) - koalabear.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - koalabear.Butterfly(&a[0], &a[2]) - koalabear.Butterfly(&a[1], &a[3]) - koalabear.Butterfly(&a[4], &a[6]) - koalabear.Butterfly(&a[5], &a[7]) - koalabear.Butterfly(&a[8], &a[10]) - koalabear.Butterfly(&a[9], &a[11]) - koalabear.Butterfly(&a[12], &a[14]) - koalabear.Butterfly(&a[13], &a[15]) - koalabear.Butterfly(&a[16], &a[18]) - koalabear.Butterfly(&a[17], &a[19]) - koalabear.Butterfly(&a[20], &a[22]) - koalabear.Butterfly(&a[21], &a[23]) - koalabear.Butterfly(&a[24], &a[26]) - koalabear.Butterfly(&a[25], &a[27]) - koalabear.Butterfly(&a[28], &a[30]) - koalabear.Butterfly(&a[29], &a[31]) - koalabear.Butterfly(&a[32], &a[34]) - koalabear.Butterfly(&a[33], &a[35]) - koalabear.Butterfly(&a[36], &a[38]) - koalabear.Butterfly(&a[37], &a[39]) - koalabear.Butterfly(&a[40], &a[42]) - koalabear.Butterfly(&a[41], &a[43]) - koalabear.Butterfly(&a[44], &a[46]) - koalabear.Butterfly(&a[45], &a[47]) - koalabear.Butterfly(&a[48], &a[50]) - koalabear.Butterfly(&a[49], &a[51]) - koalabear.Butterfly(&a[52], &a[54]) - koalabear.Butterfly(&a[53], &a[55]) - koalabear.Butterfly(&a[56], &a[58]) - koalabear.Butterfly(&a[57], &a[59]) - koalabear.Butterfly(&a[60], &a[62]) - koalabear.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - koalabear.Butterfly(&a[0], &a[1]) - koalabear.Butterfly(&a[2], &a[3]) - koalabear.Butterfly(&a[4], &a[5]) - koalabear.Butterfly(&a[6], &a[7]) - koalabear.Butterfly(&a[8], &a[9]) - koalabear.Butterfly(&a[10], &a[11]) - koalabear.Butterfly(&a[12], &a[13]) - koalabear.Butterfly(&a[14], &a[15]) - koalabear.Butterfly(&a[16], &a[17]) - koalabear.Butterfly(&a[18], &a[19]) - koalabear.Butterfly(&a[20], &a[21]) - koalabear.Butterfly(&a[22], &a[23]) - koalabear.Butterfly(&a[24], &a[25]) - koalabear.Butterfly(&a[26], &a[27]) - koalabear.Butterfly(&a[28], &a[29]) - koalabear.Butterfly(&a[30], &a[31]) - koalabear.Butterfly(&a[32], &a[33]) - koalabear.Butterfly(&a[34], &a[35]) - koalabear.Butterfly(&a[36], &a[37]) - koalabear.Butterfly(&a[38], &a[39]) - koalabear.Butterfly(&a[40], &a[41]) - koalabear.Butterfly(&a[42], &a[43]) - koalabear.Butterfly(&a[44], &a[45]) - koalabear.Butterfly(&a[46], &a[47]) - koalabear.Butterfly(&a[48], &a[49]) - koalabear.Butterfly(&a[50], &a[51]) - koalabear.Butterfly(&a[52], &a[53]) - koalabear.Butterfly(&a[54], &a[55]) - koalabear.Butterfly(&a[56], &a[57]) - koalabear.Butterfly(&a[58], &a[59]) - koalabear.Butterfly(&a[60], &a[61]) - koalabear.Butterfly(&a[62], &a[63]) -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter koalabear.Element) []koalabear.Element { - toReturn := make([]koalabear.Element, 63) - var r, s koalabear.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index bc802fc8df..729a836605 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -25,8 +25,8 @@ type sisParams struct { var params128Bits []sisParams = []sisParams{ {logTwoBound: 8, logTwoDegree: 6}, - {logTwoBound: 16, logTwoDegree: 7}, - {logTwoBound: 32, logTwoDegree: 8}, + // {logTwoBound: 16, logTwoDegree: 7}, + // {logTwoBound: 32, logTwoDegree: 8}, } type TestCases struct { @@ -263,33 +263,3 @@ func benchmarkSIS(b *testing.B, input []koalabear.Element, sparse bool, logTwoBo }) } - -func TestUnrolledFFT(t *testing.T) { - - var shift koalabear.Element - shift.SetRandom() - - const size = 64 - assert := require.New(t) - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]koalabear.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]koalabear.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := PrecomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - FFT64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - // fmt.Printf("i = %d, k1 = %v, k2 = %v\n", i, k1[i].String(), k2[i].String()) - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/internal/generator/main.go b/internal/generator/main.go index d6b45d629a..8dcd881eba 100644 --- a/internal/generator/main.go +++ b/internal/generator/main.go @@ -29,7 +29,6 @@ import ( "github.com/consensys/gnark-crypto/internal/generator/plookup" "github.com/consensys/gnark-crypto/internal/generator/polynomial" "github.com/consensys/gnark-crypto/internal/generator/shplonk" - "github.com/consensys/gnark-crypto/internal/generator/sis" "github.com/consensys/gnark-crypto/internal/generator/sumcheck" "github.com/consensys/gnark-crypto/internal/generator/test_vector_utils" "github.com/consensys/gnark-crypto/internal/generator/tower" @@ -108,10 +107,6 @@ func main() { // generate fri on fr assertNoError(fri.Generate(conf, filepath.Join(curveDir, "fr", "fri"), bgen)) - if conf.Equal(config.BN254) || conf.Equal(config.BLS12_377) { - assertNoError(sis.Generate(conf, filepath.Join(curveDir, "fr", "sis"), bgen)) - } - // generate kzg on fr assertNoError(kzg.Generate(conf, filepath.Join(curveDir, "kzg"), bgen)) diff --git a/internal/generator/sis/generate.go b/internal/generator/sis/generate.go deleted file mode 100644 index 8eec8aef67..0000000000 --- a/internal/generator/sis/generate.go +++ /dev/null @@ -1,37 +0,0 @@ -package sis - -import ( - "math/bits" - "path/filepath" - - "github.com/consensys/bavard" - "github.com/consensys/gnark-crypto/internal/generator/config" -) - -func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error { - - conf.Package = "sis" - entries := []bavard.Entry{ - {File: filepath.Join(baseDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}, - } - - funcs := make(map[string]interface{}) - funcs["bitReverse"] = func(n, i int64) uint64 { - nn := uint64(64 - bits.TrailingZeros64(uint64(n))) - r := make([]uint64, n) - for i := 0; i < len(r); i++ { - r[i] = uint64(i) - } - for i := 0; i < len(r); i++ { - irev := bits.Reverse64(r[i]) >> nn - if irev > uint64(i) { - r[i], r[irev] = r[irev], r[i] - } - } - return r[i] - } - - bavardOpts := []func(*bavard.Bavard) error{bavard.Funcs(funcs)} - - return bgen.GenerateWithOptions(conf, conf.Package, "./sis/template/", bavardOpts, entries...) -} diff --git a/internal/generator/sis/template/fft.go.tmpl b/internal/generator/sis/template/fft.go.tmpl deleted file mode 100644 index fefbae06df..0000000000 --- a/internal/generator/sis/template/fft.go.tmpl +++ /dev/null @@ -1,82 +0,0 @@ -import ( - "github.com/consensys/gnark-crypto/ecc/{{ .Name }}/fr" - "math/big" -) - -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []fr.Element, twiddlesCoset []fr.Element) { - - {{- /* notes: - this function can be updated with larger n - nbSteps must be updated too such as 1 << nbSteps == n - butterflies and multiplication are separated for size n = 8, must check perf for larger n - */}} - {{$tIndex := 0}} - {{ $n := 64}} - {{ $m := div $n 2}} - {{ $split := 1}} - {{ $split = div $split 1}} - {{- range $step := reverse (iterate 0 6)}} - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- range $i := iterate 0 $m}} - {{- $j := add $i $offset}} - {{- $k := add $j $m}} - a[{{$k}}].Mul(&a[{{$k}}], &twiddlesCoset[{{$tIndex}}]) - {{- end}} - {{- $offset = add $offset $n}} - {{- $tIndex = add $tIndex 1}} - {{- end}} - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- range $i := iterate 0 $m}} - {{- $j := add $i $offset}} - {{- $k := add $j $m}} - fr.Butterfly(&a[{{$j}}], &a[{{$k}}]) - {{- end}} - {{- $offset = add $offset $n}} - {{- end}} - - {{- $n = div $n 2}} - {{- $m = div $n 2}} - {{- $split = mul $split 2}} - {{- end}} -} - -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { - toReturn := make([]fr.Element, 63) - var r, s fr.Element - e := new(big.Int) - {{ $n := 64}} - {{ $m := div $n 2}} - {{ $split := 1}} - {{ $split = div $split 1}} - {{ $j := 0}} - {{- range $step := reverse (iterate 0 6)}} - s = shifter - for k:=0; k <{{$step}};k++ { - s.Square(&s) - } - - {{- $offset := 0}} - {{- range $s := iterate 0 $split}} - {{- $exp := bitReverse $split $s}} - {{- if eq $exp 0}} - toReturn[{{$j}}] = s - {{- else}} - r.Exp(generator, e.SetUint64(uint64(1<<{{$step}} * {{$exp}}))) - toReturn[{{$j}}].Mul(&r, &s) - {{- end}} - {{- $j = add $j 1}} - {{- end}} - - {{- $split = mul $split 2}} - {{- end}} - return toReturn -} \ No newline at end of file From 7c1889f69ce6b0bcc39aa243f26041cbce7af8d7 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 7 Jan 2025 20:38:51 -0600 Subject: [PATCH 08/25] style: minor comment --- field/generator/internal/templates/sis/sis.go.tmpl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 674624219e..cb4b8853a3 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -51,7 +51,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, errors.New("logTwoBound too large") } if logTwoBound % 8 != 0 { - panic("logTwoBound must be a multiple of 8") + return nil, errors.New("logTwoBound must be a multiple of 8") } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") @@ -111,7 +111,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -239,7 +239,6 @@ func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { func (vr *VectorLimbReader) NextLimb() {{$tReturn}} { if vr.j == {{ .FF }}.Bytes { vr.j = 0 - // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) {{.FF}}.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) vr.i++ } From 830bab3d8d2a6f6058ff5448a0d7664483212621 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Wed, 8 Jan 2025 03:02:25 +0000 Subject: [PATCH 09/25] perf: more perf stuff in sis, benefits from AVX152 --- ecc/bls12-377/fr/fft/fft.go | 9 ++++++-- ecc/bls12-377/fr/sis/sis.go | 22 +++++++++---------- ecc/bls12-381/fr/fft/fft.go | 9 ++++++-- ecc/bls24-315/fr/fft/fft.go | 9 ++++++-- ecc/bls24-317/fr/fft/fft.go | 9 ++++++-- ecc/bn254/fr/fft/fft.go | 9 ++++++-- ecc/bw6-633/fr/fft/fft.go | 9 ++++++-- ecc/bw6-761/fr/fft/fft.go | 9 ++++++-- field/babybear/fft/fft.go | 9 ++++++-- field/babybear/sis/sis.go | 22 +++++++++---------- .../internal/templates/fft/fft.go.tmpl | 10 ++++++--- .../internal/templates/sis/sis.go.tmpl | 17 +++++++------- field/goldilocks/fft/fft.go | 9 ++++++-- field/goldilocks/sis/sis.go | 22 +++++++++---------- field/koalabear/fft/fft.go | 9 ++++++-- field/koalabear/sis/sis.go | 22 +++++++++---------- 16 files changed, 125 insertions(+), 80 deletions(-) diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index 3c76d4f66c..ff9623cfb9 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 27cc2e466b..b8aa21c1fa 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -51,7 +51,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, errors.New("logTwoBound too large") } if logTwoBound%8 != 0 { - panic("logTwoBound must be a multiple of 8") + return nil, errors.New("logTwoBound must be a multiple of 8") } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") @@ -111,7 +111,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -167,12 +167,11 @@ func (r *RSis) Hash(v, res []fr.Element) error { // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. // then accumulates the mulMod result in res. -func mulModAcc(res []fr.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []fr.Element) { - var t fr.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } +// qLagrangeCosetBitReversed and res are mutated. +// pLagrangeCosetBitReversed is not mutated. +func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed fr.Vector) { + qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) + res.Add(res, qLagrangeCosetBitReversed) } func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { @@ -190,9 +189,7 @@ func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { return res } -// VectorLimbReader reads a vector of field element, limb by limb. -// The elements are interpreted in little endian. -// The limb is also interpreted in little endian. +// VectorLimbReader iterates over a vector of field element, limb by limb. type VectorLimbReader struct { v fr.Vector buf [fr.Bytes]byte @@ -206,6 +203,8 @@ type VectorLimbReader struct { // NewVectorLimbReader creates a new VectorLimbReader // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// The elements are interpreted in little endian. +// The limb is also in little endian. func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { var next func(buf []byte, pos *int) uint64 switch limbSize { @@ -234,7 +233,6 @@ func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { func (vr *VectorLimbReader) NextLimb() uint64 { if vr.j == fr.Bytes { vr.j = 0 - // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) fr.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) vr.i++ } diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index e7808933c6..3b2e29739c 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index 0a4904eb74..506f12fb4d 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 6590c1be9a..4a418f87fb 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 9aa097620d..463faffb6d 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 1725b7886e..8fd7d57e79 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index 597251ff1c..76d6bb7a76 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i fr.Butterfly(&a[0], &a[m]) start++ } + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) fr.Butterfly(&a[i], &a[i+m]) } } diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index ffc722fad3..a623c24a97 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []babybear.Element, twiddles []babybear.Element, sta } for i := start; i < end; i++ { babybear.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := babybear.Vector(a[start+m : end+m]) + v2 := babybear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []babybear.Element, at, w babybear.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []babybear.Element, twiddles []babybear.Element, sta babybear.Butterfly(&a[0], &a[m]) start++ } + v1 := babybear.Vector(a[start+m : end+m]) + v2 := babybear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) babybear.Butterfly(&a[i], &a[i+m]) } } diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 0e65303564..2fa855d55f 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -51,7 +51,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, errors.New("logTwoBound too large") } if logTwoBound%8 != 0 { - panic("logTwoBound must be a multiple of 8") + return nil, errors.New("logTwoBound must be a multiple of 8") } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") @@ -111,7 +111,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -167,12 +167,11 @@ func (r *RSis) Hash(v, res []babybear.Element) error { // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. // then accumulates the mulMod result in res. -func mulModAcc(res []babybear.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []babybear.Element) { - var t babybear.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } +// qLagrangeCosetBitReversed and res are mutated. +// pLagrangeCosetBitReversed is not mutated. +func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed babybear.Vector) { + qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) + res.Add(res, qLagrangeCosetBitReversed) } func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { @@ -190,9 +189,7 @@ func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { return res } -// VectorLimbReader reads a vector of field element, limb by limb. -// The elements are interpreted in little endian. -// The limb is also interpreted in little endian. +// VectorLimbReader iterates over a vector of field element, limb by limb. type VectorLimbReader struct { v babybear.Vector buf [babybear.Bytes]byte @@ -206,6 +203,8 @@ type VectorLimbReader struct { // NewVectorLimbReader creates a new VectorLimbReader // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// The elements are interpreted in little endian. +// The limb is also in little endian. func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { var next func(buf []byte, pos *int) uint32 switch limbSize { @@ -230,7 +229,6 @@ func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { func (vr *VectorLimbReader) NextLimb() uint32 { if vr.j == babybear.Bytes { vr.j = 0 - // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) babybear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) vr.i++ } diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index 58a6062fa5..a20785ba28 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -260,7 +260,6 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E } - func innerDIFWithTwiddles(a []{{ .FF }}.Element, twiddles []{{ .FF }}.Element, start, end, m int) { if start == 0 { {{ .FF }}.Butterfly(&a[0], &a[m]) @@ -268,8 +267,11 @@ func innerDIFWithTwiddles(a []{{ .FF }}.Element, twiddles []{{ .FF }}.Element, s } for i := start; i < end; i++ { {{ .FF }}.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := {{ .FF }}.Vector(a[start+m:end+m]) + v2 := {{ .FF }}.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, start, end, m int) { @@ -356,8 +358,10 @@ func innerDITWithTwiddles(a []{{ .FF }}.Element, twiddles []{{ .FF }}.Element, s {{ .FF }}.Butterfly(&a[0], &a[m]) start++ } + v1 := {{ .FF }}.Vector(a[start+m:end+m]) + v2 := {{ .FF }}.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) {{ .FF }}.Butterfly(&a[i], &a[i+m]) } } diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index cb4b8853a3..c4425624a6 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -169,12 +169,11 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. // then accumulates the mulMod result in res. -func mulModAcc(res []{{ .FF }}.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []{{ .FF }}.Element) { - var t {{ .FF }}.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } +// qLagrangeCosetBitReversed and res are mutated. +// pLagrangeCosetBitReversed is not mutated. +func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed {{.FF}}.Vector) { + qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) + res.Add(res, qLagrangeCosetBitReversed) } @@ -194,9 +193,7 @@ func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { } -// VectorLimbReader reads a vector of field element, limb by limb. -// The elements are interpreted in little endian. -// The limb is also interpreted in little endian. +// VectorLimbReader iterates over a vector of field element, limb by limb. type VectorLimbReader struct { v {{ .FF }}.Vector buf [{{ .FF }}.Bytes]byte @@ -210,6 +207,8 @@ type VectorLimbReader struct { // NewVectorLimbReader creates a new VectorLimbReader // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// The elements are interpreted in little endian. +// The limb is also in little endian. func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { var next func(buf []byte, pos *int) {{$tReturn}} switch limbSize { diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 35c77f53de..20c5e57c55 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []goldilocks.Element, twiddles []goldilocks.Element, } for i := start; i < end; i++ { goldilocks.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := goldilocks.Vector(a[start+m : end+m]) + v2 := goldilocks.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []goldilocks.Element, at, w goldilocks.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []goldilocks.Element, twiddles []goldilocks.Element, goldilocks.Butterfly(&a[0], &a[m]) start++ } + v1 := goldilocks.Vector(a[start+m : end+m]) + v2 := goldilocks.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) goldilocks.Butterfly(&a[i], &a[i+m]) } } diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index d461ae711e..9de0ed2c7d 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -51,7 +51,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, errors.New("logTwoBound too large") } if logTwoBound%8 != 0 { - panic("logTwoBound must be a multiple of 8") + return nil, errors.New("logTwoBound must be a multiple of 8") } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") @@ -111,7 +111,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -167,12 +167,11 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. // then accumulates the mulMod result in res. -func mulModAcc(res []goldilocks.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []goldilocks.Element) { - var t goldilocks.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } +// qLagrangeCosetBitReversed and res are mutated. +// pLagrangeCosetBitReversed is not mutated. +func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed goldilocks.Vector) { + qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) + res.Add(res, qLagrangeCosetBitReversed) } func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { @@ -190,9 +189,7 @@ func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { return res } -// VectorLimbReader reads a vector of field element, limb by limb. -// The elements are interpreted in little endian. -// The limb is also interpreted in little endian. +// VectorLimbReader iterates over a vector of field element, limb by limb. type VectorLimbReader struct { v goldilocks.Vector buf [goldilocks.Bytes]byte @@ -206,6 +203,8 @@ type VectorLimbReader struct { // NewVectorLimbReader creates a new VectorLimbReader // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// The elements are interpreted in little endian. +// The limb is also in little endian. func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { var next func(buf []byte, pos *int) uint64 switch limbSize { @@ -234,7 +233,6 @@ func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { func (vr *VectorLimbReader) NextLimb() uint64 { if vr.j == goldilocks.Bytes { vr.j = 0 - // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) goldilocks.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) vr.i++ } diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index 391bec73f0..4c5e4ecfe9 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -264,8 +264,11 @@ func innerDIFWithTwiddles(a []koalabear.Element, twiddles []koalabear.Element, s } for i := start; i < end; i++ { koalabear.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := koalabear.Vector(a[start+m : end+m]) + v2 := koalabear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []koalabear.Element, at, w koalabear.Element, start, end, m int) { @@ -350,8 +353,10 @@ func innerDITWithTwiddles(a []koalabear.Element, twiddles []koalabear.Element, s koalabear.Butterfly(&a[0], &a[m]) start++ } + v1 := koalabear.Vector(a[start+m : end+m]) + v2 := koalabear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) for i := start; i < end; i++ { - a[i+m].Mul(&a[i+m], &twiddles[i]) koalabear.Butterfly(&a[i], &a[i+m]) } } diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index ff18823efd..e8179f6ab1 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -51,7 +51,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, errors.New("logTwoBound too large") } if logTwoBound%8 != 0 { - panic("logTwoBound must be a multiple of 8") + return nil, errors.New("logTwoBound must be a multiple of 8") } if bits.UintSize == 32 { return nil, errors.New("unsupported architecture; need 64bit target") @@ -111,7 +111,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset()) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -167,12 +167,11 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { // The result is not FFTinversed. The fft inverse is done once every // multiplications are done. // then accumulates the mulMod result in res. -func mulModAcc(res []koalabear.Element, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed []koalabear.Element) { - var t koalabear.Element - for i := 0; i < len(pLagrangeCosetBitReversed); i++ { - t.Mul(&pLagrangeCosetBitReversed[i], &qLagrangeCosetBitReversed[i]) - res[i].Add(&res[i], &t) - } +// qLagrangeCosetBitReversed and res are mutated. +// pLagrangeCosetBitReversed is not mutated. +func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed koalabear.Vector) { + qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) + res.Add(res, qLagrangeCosetBitReversed) } func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { @@ -190,9 +189,7 @@ func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { return res } -// VectorLimbReader reads a vector of field element, limb by limb. -// The elements are interpreted in little endian. -// The limb is also interpreted in little endian. +// VectorLimbReader iterates over a vector of field element, limb by limb. type VectorLimbReader struct { v koalabear.Vector buf [koalabear.Bytes]byte @@ -206,6 +203,8 @@ type VectorLimbReader struct { // NewVectorLimbReader creates a new VectorLimbReader // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// The elements are interpreted in little endian. +// The limb is also in little endian. func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { var next func(buf []byte, pos *int) uint32 switch limbSize { @@ -230,7 +229,6 @@ func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { func (vr *VectorLimbReader) NextLimb() uint32 { if vr.j == koalabear.Bytes { vr.j = 0 - // TODO @gbotrel we could return 0 in the case vr.i == len(vr.v) koalabear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) vr.i++ } From 8855a698757c6b06317ee10a2f47d3b7f029d985 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Wed, 8 Jan 2025 16:58:11 +0000 Subject: [PATCH 10/25] style: code cleaning --- ecc/bls12-377/fr/sis/sis.go | 26 +++++++++++-------- ecc/bls12-377/fr/sis/sis_test.go | 7 ++--- field/babybear/sis/sis.go | 26 +++++++++++-------- field/babybear/sis/sis_test.go | 9 ++++--- .../internal/templates/sis/sis.go.tmpl | 25 ++++++++++-------- .../internal/templates/sis/sis.test.go.tmpl | 9 ++++--- field/goldilocks/sis/sis.go | 26 +++++++++++-------- field/goldilocks/sis/sis_test.go | 7 ++--- field/koalabear/sis/sis.go | 26 +++++++++++-------- field/koalabear/sis/sis_test.go | 9 ++++--- 10 files changed, 97 insertions(+), 73 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index b8aa21c1fa..4051dcc4d8 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -65,10 +65,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := (fr.Bytes * 8) / logTwoBound - if n*logTwoBound < fr.Bytes*8 { - n++ - } + n := fr.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -125,14 +122,16 @@ func (r *RSis) Hash(v, res []fr.Element) error { return fmt.Errorf("output vector must have length %d", r.Degree) } - for i := 0; i < len(res); i++ { - // TODO @gbotrel ensure that this is needed. - res[i].SetZero() - } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } + // zeroing res + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + + // decompose v limb by limb reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]fr.Element, r.Degree) @@ -141,10 +140,15 @@ func (r *RSis) Hash(v, res []fr.Element) error { copy(k, kz) zero := uint64(0) - for j := 0; j < r.Degree; j++ { + for j := 0; j < r.Degree; j += 2 { + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l k[j][0] = l + + l2 := reader.NextLimb() + zero |= l2 + k[j+1][0] = l2 } if zero == 0 { // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] @@ -153,10 +157,10 @@ func (r *RSis) Hash(v, res []fr.Element) error { } r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index b94a779836..c05a194112 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -24,9 +24,10 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ + {logTwoBound: 8, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - // {logTwoBound: 16, logTwoDegree: 7}, - // {logTwoBound: 32, logTwoDegree: 8}, + {logTwoBound: 16, logTwoDegree: 6}, + {logTwoBound: 16, logTwoDegree: 9}, } type TestCases struct { @@ -166,7 +167,7 @@ func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { } const ( - LATENCY_MUL_FIELD_NS int = 18 + LATENCY_MUL_FIELD_NS int = 15 LATENCY_ADD_FIELD_NS int = 4 ) diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 2fa855d55f..2763dd5ffb 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -65,10 +65,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := (babybear.Bytes * 8) / logTwoBound - if n*logTwoBound < babybear.Bytes*8 { - n++ - } + n := babybear.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -125,14 +122,16 @@ func (r *RSis) Hash(v, res []babybear.Element) error { return fmt.Errorf("output vector must have length %d", r.Degree) } - for i := 0; i < len(res); i++ { - // TODO @gbotrel ensure that this is needed. - res[i].SetZero() - } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } + // zeroing res + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + + // decompose v limb by limb reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]babybear.Element, r.Degree) @@ -141,10 +140,15 @@ func (r *RSis) Hash(v, res []babybear.Element) error { copy(k, kz) zero := uint32(0) - for j := 0; j < r.Degree; j++ { + for j := 0; j < r.Degree; j += 2 { + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l k[j][0] = l + + l2 := reader.NextLimb() + zero |= l2 + k[j+1][0] = l2 } if zero == 0 { // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] @@ -153,10 +157,10 @@ func (r *RSis) Hash(v, res []babybear.Element) error { } r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 54e5ac4d5b..2561071b9c 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -24,9 +24,10 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ + {logTwoBound: 8, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - // {logTwoBound: 16, logTwoDegree: 7}, - // {logTwoBound: 32, logTwoDegree: 8}, + {logTwoBound: 16, logTwoDegree: 6}, + {logTwoBound: 16, logTwoDegree: 9}, } type TestCases struct { @@ -166,8 +167,8 @@ func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { } const ( - LATENCY_MUL_FIELD_NS int = 18 - LATENCY_ADD_FIELD_NS int = 4 + LATENCY_MUL_FIELD_NS int = 4 + LATENCY_ADD_FIELD_NS int = 2 ) // Estimate the theoretical performances that are achievable using ring-SIS diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index c4425624a6..312574e4ee 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -65,10 +65,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := ({{ .FF }}.Bytes * 8) / logTwoBound - if n*logTwoBound < {{ .FF }}.Bytes*8 { - n++ - } + n := {{ .FF }}.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -126,15 +123,16 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { return fmt.Errorf("output vector must have length %d", r.Degree) } - for i := 0; i < len(res); i++ { - // TODO @gbotrel ensure that this is needed. - res[i].SetZero() - } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } + // zeroing res + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + // decompose v limb by limb reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]{{ .FF }}.Element, r.Degree) @@ -143,10 +141,15 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { copy(k, kz) zero := {{$tReturn}}(0) - for j := 0; j < r.Degree; j++ { + for j := 0; j < r.Degree; j+=2 { + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l k[j][0] = l + + l2 := reader.NextLimb() + zero |= l2 + k[j+1][0] = l2 } if zero == 0 { // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] @@ -155,10 +158,10 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { } r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 44073b0e94..d9fcb4e6d2 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -17,9 +17,10 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ + {logTwoBound: 8, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - // {logTwoBound: 16, logTwoDegree: 7}, - // {logTwoBound: 32, logTwoDegree: 8}, + {logTwoBound: 16, logTwoDegree: 6}, + {logTwoBound: 16, logTwoDegree: 9}, } type TestCases struct { @@ -163,8 +164,8 @@ func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { } const ( - LATENCY_MUL_FIELD_NS int = 18 - LATENCY_ADD_FIELD_NS int = 4 + LATENCY_MUL_FIELD_NS int = {{- if $f31 }}4{{- else }}15{{- end}} + LATENCY_ADD_FIELD_NS int = {{- if $f31 }}2{{- else }}4{{- end}} ) // Estimate the theoretical performances that are achievable using ring-SIS diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 9de0ed2c7d..9d069bbd23 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -65,10 +65,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := (goldilocks.Bytes * 8) / logTwoBound - if n*logTwoBound < goldilocks.Bytes*8 { - n++ - } + n := goldilocks.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -125,14 +122,16 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { return fmt.Errorf("output vector must have length %d", r.Degree) } - for i := 0; i < len(res); i++ { - // TODO @gbotrel ensure that this is needed. - res[i].SetZero() - } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } + // zeroing res + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + + // decompose v limb by limb reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]goldilocks.Element, r.Degree) @@ -141,10 +140,15 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { copy(k, kz) zero := uint64(0) - for j := 0; j < r.Degree; j++ { + for j := 0; j < r.Degree; j += 2 { + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l k[j][0] = l + + l2 := reader.NextLimb() + zero |= l2 + k[j+1][0] = l2 } if zero == 0 { // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] @@ -153,10 +157,10 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { } r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 66ef08a61b..9006abd295 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -24,9 +24,10 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ + {logTwoBound: 8, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - // {logTwoBound: 16, logTwoDegree: 7}, - // {logTwoBound: 32, logTwoDegree: 8}, + {logTwoBound: 16, logTwoDegree: 6}, + {logTwoBound: 16, logTwoDegree: 9}, } type TestCases struct { @@ -166,7 +167,7 @@ func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { } const ( - LATENCY_MUL_FIELD_NS int = 18 + LATENCY_MUL_FIELD_NS int = 15 LATENCY_ADD_FIELD_NS int = 4 ) diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index e8179f6ab1..530983ea3a 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -65,10 +65,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := (koalabear.Bytes * 8) / logTwoBound - if n*logTwoBound < koalabear.Bytes*8 { - n++ - } + n := koalabear.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -125,14 +122,16 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { return fmt.Errorf("output vector must have length %d", r.Degree) } - for i := 0; i < len(res); i++ { - // TODO @gbotrel ensure that this is needed. - res[i].SetZero() - } if len(v) > r.maxNbElementsToHash { return fmt.Errorf("can't hash more than %d elements with params provided in constructor", r.maxNbElementsToHash) } + // zeroing res + for i := 0; i < len(res); i++ { + res[i].SetZero() + } + + // decompose v limb by limb reader := NewVectorLimbReader(v, r.LogTwoBound/8) kz := make([]koalabear.Element, r.Degree) @@ -141,10 +140,15 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { copy(k, kz) zero := uint32(0) - for j := 0; j < r.Degree; j++ { + for j := 0; j < r.Degree; j += 2 { + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l k[j][0] = l + + l2 := reader.NextLimb() + zero |= l2 + k[j+1][0] = l2 } if zero == 0 { // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] @@ -153,10 +157,10 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { } r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) } - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) // -> reduces mod Xᵈ+1 + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index 729a836605..5928248f34 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -24,9 +24,10 @@ type sisParams struct { } var params128Bits []sisParams = []sisParams{ + {logTwoBound: 8, logTwoDegree: 5}, {logTwoBound: 8, logTwoDegree: 6}, - // {logTwoBound: 16, logTwoDegree: 7}, - // {logTwoBound: 32, logTwoDegree: 8}, + {logTwoBound: 16, logTwoDegree: 6}, + {logTwoBound: 16, logTwoDegree: 9}, } type TestCases struct { @@ -166,8 +167,8 @@ func makeKeyDeterministic(t *testing.T, sis *RSis, _seed int64) { } const ( - LATENCY_MUL_FIELD_NS int = 18 - LATENCY_ADD_FIELD_NS int = 4 + LATENCY_MUL_FIELD_NS int = 4 + LATENCY_ADD_FIELD_NS int = 2 ) // Estimate the theoretical performances that are achievable using ring-SIS From 455b89a34739b0a5cbc14eeeb36e8c652af7aae9 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Wed, 8 Jan 2025 22:36:28 +0000 Subject: [PATCH 11/25] refactor: prepare terrain for linea integration --- ecc/bls12-377/fr/sis/sis.go | 66 ++++++++++++------ ecc/bls12-377/fr/sis/sis_test.go | 2 +- field/babybear/sis/sis.go | 66 ++++++++++++------ field/babybear/sis/sis_test.go | 2 +- .../internal/templates/sis/sis.go.tmpl | 68 +++++++++++++------ .../internal/templates/sis/sis.test.go.tmpl | 2 +- field/goldilocks/sis/sis.go | 66 ++++++++++++------ field/goldilocks/sis/sis_test.go | 2 +- field/koalabear/sis/sis.go | 66 ++++++++++++------ field/koalabear/sis/sis_test.go | 2 +- 10 files changed, 237 insertions(+), 105 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 4051dcc4d8..dc56e241af 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -131,16 +131,26 @@ func (r *RSis) Hash(v, res []fr.Element) error { res[i].SetZero() } - // decompose v limb by limb - reader := NewVectorLimbReader(v, r.LogTwoBound/8) - - kz := make([]fr.Element, r.Degree) k := make([]fr.Element, r.Degree) - for i := 0; i < len(r.Ag); i++ { - copy(k, kz) + // inner hash + r.InnerHash(&vectorIterator{v: v}, res, k) + + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) + + return nil +} + +func (r *RSis) InnerHash(it ElementIterator, res, k fr.Vector) { + reader := NewLimbIterator(it, r.LogTwoBound/8) + + for i := 0; i < len(r.Ag); i++ { zero := uint64(0) for j := 0; j < r.Degree; j += 2 { + k[j].SetZero() + k[j+1].SetZero() + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l @@ -159,10 +169,7 @@ func (r *RSis) Hash(v, res []fr.Element) error { r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) mulModAcc(res, r.Ag[i], k) } - // reduces mod Xᵈ+1 - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) - return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -193,23 +200,42 @@ func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { return res } -// VectorLimbReader iterates over a vector of field element, limb by limb. -type VectorLimbReader struct { - v fr.Vector +// TODO @gbotrel explore generic perf impact + go 1.23 iterators + +// ElementIterator is an iterator over a stream of field elements. +type ElementIterator interface { + Next() (fr.Element, bool) +} + +type vectorIterator struct { + v fr.Vector + i int +} + +func (vi *vectorIterator) Next() (fr.Element, bool) { + if vi.i == len(vi.v) { + return fr.Element{}, false + } + vi.i++ + return vi.v[vi.i-1], true +} + +// LimbIterator iterates over a vector of field element, limb by limb. +type LimbIterator struct { + it ElementIterator buf [fr.Bytes]byte - i int // position in vector j int // position in buf next func(buf []byte, pos *int) uint64 } -// NewVectorLimbReader creates a new VectorLimbReader +// NewLimbIterator creates a new LimbIterator // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) // The elements are interpreted in little endian. // The limb is also in little endian. -func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { +func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { var next func(buf []byte, pos *int) uint64 switch limbSize { case 1: @@ -224,8 +250,8 @@ func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { default: panic("unsupported limb size") } - return &VectorLimbReader{ - v: v, + return &LimbIterator{ + it: it, j: fr.Bytes, next: next, } @@ -234,11 +260,11 @@ func NewVectorLimbReader(v fr.Vector, limbSize int) *VectorLimbReader { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *VectorLimbReader) NextLimb() uint64 { +func (vr *LimbIterator) NextLimb() uint64 { if vr.j == fr.Bytes { vr.j = 0 - fr.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) - vr.i++ + next, _ := vr.it.Next() + fr.LittleEndian.PutElement(&vr.buf, next) } return vr.next(vr.buf[:], &vr.j) } diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index c05a194112..1765e0b173 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 3; cc++ { - vr := NewVectorLimbReader(a, logTwoBound/8) + vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 2763dd5ffb..e4ef0177a1 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -131,16 +131,26 @@ func (r *RSis) Hash(v, res []babybear.Element) error { res[i].SetZero() } - // decompose v limb by limb - reader := NewVectorLimbReader(v, r.LogTwoBound/8) - - kz := make([]babybear.Element, r.Degree) k := make([]babybear.Element, r.Degree) - for i := 0; i < len(r.Ag); i++ { - copy(k, kz) + // inner hash + r.InnerHash(&vectorIterator{v: v}, res, k) + + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) + + return nil +} + +func (r *RSis) InnerHash(it ElementIterator, res, k babybear.Vector) { + reader := NewLimbIterator(it, r.LogTwoBound/8) + + for i := 0; i < len(r.Ag); i++ { zero := uint32(0) for j := 0; j < r.Degree; j += 2 { + k[j].SetZero() + k[j+1].SetZero() + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l @@ -159,10 +169,7 @@ func (r *RSis) Hash(v, res []babybear.Element) error { r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) mulModAcc(res, r.Ag[i], k) } - // reduces mod Xᵈ+1 - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) - return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -193,23 +200,42 @@ func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { return res } -// VectorLimbReader iterates over a vector of field element, limb by limb. -type VectorLimbReader struct { - v babybear.Vector +// TODO @gbotrel explore generic perf impact + go 1.23 iterators + +// ElementIterator is an iterator over a stream of field elements. +type ElementIterator interface { + Next() (babybear.Element, bool) +} + +type vectorIterator struct { + v babybear.Vector + i int +} + +func (vi *vectorIterator) Next() (babybear.Element, bool) { + if vi.i == len(vi.v) { + return babybear.Element{}, false + } + vi.i++ + return vi.v[vi.i-1], true +} + +// LimbIterator iterates over a vector of field element, limb by limb. +type LimbIterator struct { + it ElementIterator buf [babybear.Bytes]byte - i int // position in vector j int // position in buf next func(buf []byte, pos *int) uint32 } -// NewVectorLimbReader creates a new VectorLimbReader +// NewLimbIterator creates a new LimbIterator // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) // The elements are interpreted in little endian. // The limb is also in little endian. -func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { +func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { var next func(buf []byte, pos *int) uint32 switch limbSize { case 1: @@ -220,8 +246,8 @@ func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { default: panic("unsupported limb size") } - return &VectorLimbReader{ - v: v, + return &LimbIterator{ + it: it, j: babybear.Bytes, next: next, } @@ -230,11 +256,11 @@ func NewVectorLimbReader(v babybear.Vector, limbSize int) *VectorLimbReader { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *VectorLimbReader) NextLimb() uint32 { +func (vr *LimbIterator) NextLimb() uint32 { if vr.j == babybear.Bytes { vr.j = 0 - babybear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) - vr.i++ + next, _ := vr.it.Next() + babybear.LittleEndian.PutElement(&vr.buf, next) } return vr.next(vr.buf[:], &vr.j) } diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 2561071b9c..9c1e89df24 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 1; cc++ { - vr := NewVectorLimbReader(a, logTwoBound/8) + vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 312574e4ee..4b6ea6a30d 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -115,7 +115,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return r, nil } - // Hash interprets the input vector as a sequence of coefficients of size r.LogTwoBound bits long, // and return the hash of the polynomial corresponding to the sum sum_i A[i]*m Mod X^{d}+1 func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { @@ -132,16 +131,27 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { res[i].SetZero() } - // decompose v limb by limb - reader := NewVectorLimbReader(v, r.LogTwoBound/8) - - kz := make([]{{ .FF }}.Element, r.Degree) k := make([]{{ .FF }}.Element, r.Degree) - for i := 0; i < len(r.Ag); i++ { - copy(k, kz) + // inner hash + r.InnerHash(&vectorIterator{v: v}, res, k) + + + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) + + return nil +} + +func (r *RSis) InnerHash(it ElementIterator, res, k {{ .FF }}.Vector) { + reader := NewLimbIterator(it, r.LogTwoBound/8) + + for i := 0; i < len(r.Ag); i++ { zero := {{$tReturn}}(0) for j := 0; j < r.Degree; j+=2 { + k[j].SetZero() + k[j+1].SetZero() + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l @@ -160,10 +170,7 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) mulModAcc(res, r.Ag[i], k) } - // reduces mod Xᵈ+1 - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) - return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -196,23 +203,44 @@ func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { } -// VectorLimbReader iterates over a vector of field element, limb by limb. -type VectorLimbReader struct { +// TODO @gbotrel explore generic perf impact + go 1.23 iterators + +// ElementIterator is an iterator over a stream of field elements. +type ElementIterator interface { + Next() ({{ .FF }}.Element, bool) +} + +type vectorIterator struct { v {{ .FF }}.Vector + i int +} + +func (vi *vectorIterator) Next() ({{ .FF }}.Element, bool) { + if vi.i == len(vi.v) { + return {{ .FF }}.Element{}, false + } + vi.i++ + return vi.v[vi.i-1], true +} + + + +// LimbIterator iterates over a vector of field element, limb by limb. +type LimbIterator struct { + it ElementIterator buf [{{ .FF }}.Bytes]byte - i int // position in vector j int // position in buf next func(buf []byte, pos *int) {{$tReturn}} } -// NewVectorLimbReader creates a new VectorLimbReader +// NewLimbIterator creates a new LimbIterator // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) // The elements are interpreted in little endian. // The limb is also in little endian. -func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { +func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { var next func(buf []byte, pos *int) {{$tReturn}} switch limbSize { case 1: @@ -228,8 +256,8 @@ func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { default: panic("unsupported limb size") } - return &VectorLimbReader{ - v: v, + return &LimbIterator{ + it: it, j: {{ .FF }}.Bytes, next: next, } @@ -238,11 +266,11 @@ func NewVectorLimbReader(v {{ .FF }}.Vector, limbSize int) *VectorLimbReader { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *VectorLimbReader) NextLimb() {{$tReturn}} { +func (vr *LimbIterator) NextLimb() {{$tReturn}} { if vr.j == {{ .FF }}.Bytes { vr.j = 0 - {{.FF}}.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) - vr.i++ + next, _ := vr.it.Next() + {{.FF}}.LittleEndian.PutElement(&vr.buf, next) } return vr.next(vr.buf[:], &vr.j) } diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index d9fcb4e6d2..44e01f34c4 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -105,7 +105,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc:=0;cc<{{- if $f31 }}1{{- else }}3{{- end}}; cc++ { - vr := NewVectorLimbReader(a, logTwoBound/8) + vr := NewLimbIterator(&vectorIterator{v:a}, logTwoBound/8) m := make({{ .FF }}.Vector, nbElmts*{{ .FF }}.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 9d069bbd23..3b8f6a20eb 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -131,16 +131,26 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { res[i].SetZero() } - // decompose v limb by limb - reader := NewVectorLimbReader(v, r.LogTwoBound/8) - - kz := make([]goldilocks.Element, r.Degree) k := make([]goldilocks.Element, r.Degree) - for i := 0; i < len(r.Ag); i++ { - copy(k, kz) + // inner hash + r.InnerHash(&vectorIterator{v: v}, res, k) + + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) + + return nil +} + +func (r *RSis) InnerHash(it ElementIterator, res, k goldilocks.Vector) { + reader := NewLimbIterator(it, r.LogTwoBound/8) + + for i := 0; i < len(r.Ag); i++ { zero := uint64(0) for j := 0; j < r.Degree; j += 2 { + k[j].SetZero() + k[j+1].SetZero() + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l @@ -159,10 +169,7 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) mulModAcc(res, r.Ag[i], k) } - // reduces mod Xᵈ+1 - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) - return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -193,23 +200,42 @@ func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { return res } -// VectorLimbReader iterates over a vector of field element, limb by limb. -type VectorLimbReader struct { - v goldilocks.Vector +// TODO @gbotrel explore generic perf impact + go 1.23 iterators + +// ElementIterator is an iterator over a stream of field elements. +type ElementIterator interface { + Next() (goldilocks.Element, bool) +} + +type vectorIterator struct { + v goldilocks.Vector + i int +} + +func (vi *vectorIterator) Next() (goldilocks.Element, bool) { + if vi.i == len(vi.v) { + return goldilocks.Element{}, false + } + vi.i++ + return vi.v[vi.i-1], true +} + +// LimbIterator iterates over a vector of field element, limb by limb. +type LimbIterator struct { + it ElementIterator buf [goldilocks.Bytes]byte - i int // position in vector j int // position in buf next func(buf []byte, pos *int) uint64 } -// NewVectorLimbReader creates a new VectorLimbReader +// NewLimbIterator creates a new LimbIterator // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) // The elements are interpreted in little endian. // The limb is also in little endian. -func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { +func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { var next func(buf []byte, pos *int) uint64 switch limbSize { case 1: @@ -224,8 +250,8 @@ func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { default: panic("unsupported limb size") } - return &VectorLimbReader{ - v: v, + return &LimbIterator{ + it: it, j: goldilocks.Bytes, next: next, } @@ -234,11 +260,11 @@ func NewVectorLimbReader(v goldilocks.Vector, limbSize int) *VectorLimbReader { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *VectorLimbReader) NextLimb() uint64 { +func (vr *LimbIterator) NextLimb() uint64 { if vr.j == goldilocks.Bytes { vr.j = 0 - goldilocks.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) - vr.i++ + next, _ := vr.it.Next() + goldilocks.LittleEndian.PutElement(&vr.buf, next) } return vr.next(vr.buf[:], &vr.j) } diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 9006abd295..e14052c23e 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 3; cc++ { - vr := NewVectorLimbReader(a, logTwoBound/8) + vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 530983ea3a..afbed5dbd1 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -131,16 +131,26 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { res[i].SetZero() } - // decompose v limb by limb - reader := NewVectorLimbReader(v, r.LogTwoBound/8) - - kz := make([]koalabear.Element, r.Degree) k := make([]koalabear.Element, r.Degree) - for i := 0; i < len(r.Ag); i++ { - copy(k, kz) + // inner hash + r.InnerHash(&vectorIterator{v: v}, res, k) + + // reduces mod Xᵈ+1 + r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) + + return nil +} + +func (r *RSis) InnerHash(it ElementIterator, res, k koalabear.Vector) { + reader := NewLimbIterator(it, r.LogTwoBound/8) + + for i := 0; i < len(r.Ag); i++ { zero := uint32(0) for j := 0; j < r.Degree; j += 2 { + k[j].SetZero() + k[j+1].SetZero() + // read limbs 2 by 2 since degree is a power of 2 (> 1) l := reader.NextLimb() zero |= l @@ -159,10 +169,7 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) mulModAcc(res, r.Ag[i], k) } - // reduces mod Xᵈ+1 - r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) - return nil } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -193,23 +200,42 @@ func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { return res } -// VectorLimbReader iterates over a vector of field element, limb by limb. -type VectorLimbReader struct { - v koalabear.Vector +// TODO @gbotrel explore generic perf impact + go 1.23 iterators + +// ElementIterator is an iterator over a stream of field elements. +type ElementIterator interface { + Next() (koalabear.Element, bool) +} + +type vectorIterator struct { + v koalabear.Vector + i int +} + +func (vi *vectorIterator) Next() (koalabear.Element, bool) { + if vi.i == len(vi.v) { + return koalabear.Element{}, false + } + vi.i++ + return vi.v[vi.i-1], true +} + +// LimbIterator iterates over a vector of field element, limb by limb. +type LimbIterator struct { + it ElementIterator buf [koalabear.Bytes]byte - i int // position in vector j int // position in buf next func(buf []byte, pos *int) uint32 } -// NewVectorLimbReader creates a new VectorLimbReader +// NewLimbIterator creates a new LimbIterator // v: the vector to read // limbSize: the size of the limb in bytes (1, 2, 4 or 8) // The elements are interpreted in little endian. // The limb is also in little endian. -func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { +func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { var next func(buf []byte, pos *int) uint32 switch limbSize { case 1: @@ -220,8 +246,8 @@ func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { default: panic("unsupported limb size") } - return &VectorLimbReader{ - v: v, + return &LimbIterator{ + it: it, j: koalabear.Bytes, next: next, } @@ -230,11 +256,11 @@ func NewVectorLimbReader(v koalabear.Vector, limbSize int) *VectorLimbReader { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *VectorLimbReader) NextLimb() uint32 { +func (vr *LimbIterator) NextLimb() uint32 { if vr.j == koalabear.Bytes { vr.j = 0 - koalabear.LittleEndian.PutElement(&vr.buf, vr.v[vr.i]) - vr.i++ + next, _ := vr.it.Next() + koalabear.LittleEndian.PutElement(&vr.buf, next) } return vr.next(vr.buf[:], &vr.j) } diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index 5928248f34..c7615c4074 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 1; cc++ { - vr := NewVectorLimbReader(a, logTwoBound/8) + vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() From dcf62e02d65b156f5797fa074b99e12fec03e2b9 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Wed, 8 Jan 2025 19:21:36 -0600 Subject: [PATCH 12/25] feat: expose vector iterator in sis package --- ecc/bls12-377/fr/sis/sis.go | 6 +++--- ecc/bls12-377/fr/sis/sis_test.go | 2 +- field/babybear/sis/sis.go | 6 +++--- field/babybear/sis/sis_test.go | 2 +- field/generator/internal/templates/sis/sis.go.tmpl | 6 +++--- field/generator/internal/templates/sis/sis.test.go.tmpl | 2 +- field/goldilocks/sis/sis.go | 6 +++--- field/goldilocks/sis/sis_test.go | 2 +- field/koalabear/sis/sis.go | 6 +++--- field/koalabear/sis/sis_test.go | 2 +- 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index dc56e241af..54d5b261fc 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -134,7 +134,7 @@ func (r *RSis) Hash(v, res []fr.Element) error { k := make([]fr.Element, r.Degree) // inner hash - r.InnerHash(&vectorIterator{v: v}, res, k) + r.InnerHash(&VectorIterator{v: v}, res, k) // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -207,12 +207,12 @@ type ElementIterator interface { Next() (fr.Element, bool) } -type vectorIterator struct { +type VectorIterator struct { v fr.Vector i int } -func (vi *vectorIterator) Next() (fr.Element, bool) { +func (vi *VectorIterator) Next() (fr.Element, bool) { if vi.i == len(vi.v) { return fr.Element{}, false } diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index 1765e0b173..f90a19bc41 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 3; cc++ { - vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) + vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index e4ef0177a1..9666998004 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -134,7 +134,7 @@ func (r *RSis) Hash(v, res []babybear.Element) error { k := make([]babybear.Element, r.Degree) // inner hash - r.InnerHash(&vectorIterator{v: v}, res, k) + r.InnerHash(&VectorIterator{v: v}, res, k) // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -207,12 +207,12 @@ type ElementIterator interface { Next() (babybear.Element, bool) } -type vectorIterator struct { +type VectorIterator struct { v babybear.Vector i int } -func (vi *vectorIterator) Next() (babybear.Element, bool) { +func (vi *VectorIterator) Next() (babybear.Element, bool) { if vi.i == len(vi.v) { return babybear.Element{}, false } diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 9c1e89df24..3b00c0d4e6 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 1; cc++ { - vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) + vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 4b6ea6a30d..bd1e99c96e 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -134,7 +134,7 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { k := make([]{{ .FF }}.Element, r.Degree) // inner hash - r.InnerHash(&vectorIterator{v: v}, res, k) + r.InnerHash(&VectorIterator{v: v}, res, k) // reduces mod Xᵈ+1 @@ -210,12 +210,12 @@ type ElementIterator interface { Next() ({{ .FF }}.Element, bool) } -type vectorIterator struct { +type VectorIterator struct { v {{ .FF }}.Vector i int } -func (vi *vectorIterator) Next() ({{ .FF }}.Element, bool) { +func (vi *VectorIterator) Next() ({{ .FF }}.Element, bool) { if vi.i == len(vi.v) { return {{ .FF }}.Element{}, false } diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 44e01f34c4..9653c84ffc 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -105,7 +105,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc:=0;cc<{{- if $f31 }}1{{- else }}3{{- end}}; cc++ { - vr := NewLimbIterator(&vectorIterator{v:a}, logTwoBound/8) + vr := NewLimbIterator(&VectorIterator{v:a}, logTwoBound/8) m := make({{ .FF }}.Vector, nbElmts*{{ .FF }}.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 3b8f6a20eb..a77983705e 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -134,7 +134,7 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { k := make([]goldilocks.Element, r.Degree) // inner hash - r.InnerHash(&vectorIterator{v: v}, res, k) + r.InnerHash(&VectorIterator{v: v}, res, k) // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -207,12 +207,12 @@ type ElementIterator interface { Next() (goldilocks.Element, bool) } -type vectorIterator struct { +type VectorIterator struct { v goldilocks.Vector i int } -func (vi *vectorIterator) Next() (goldilocks.Element, bool) { +func (vi *VectorIterator) Next() (goldilocks.Element, bool) { if vi.i == len(vi.v) { return goldilocks.Element{}, false } diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index e14052c23e..28f5e53121 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 3; cc++ { - vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) + vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index afbed5dbd1..5c773306bf 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -134,7 +134,7 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { k := make([]koalabear.Element, r.Degree) // inner hash - r.InnerHash(&vectorIterator{v: v}, res, k) + r.InnerHash(&VectorIterator{v: v}, res, k) // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -207,12 +207,12 @@ type ElementIterator interface { Next() (koalabear.Element, bool) } -type vectorIterator struct { +type VectorIterator struct { v koalabear.Vector i int } -func (vi *vectorIterator) Next() (koalabear.Element, bool) { +func (vi *VectorIterator) Next() (koalabear.Element, bool) { if vi.i == len(vi.v) { return koalabear.Element{}, false } diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index c7615c4074..a14fcadd92 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -108,7 +108,7 @@ func TestLimbDecomposeBytes(t *testing.T) { logTwoBound := 8 for cc := 0; cc < 1; cc++ { - vr := NewLimbIterator(&vectorIterator{v: a}, logTwoBound/8) + vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) for i := 0; i < len(m); i++ { m[i][0] = vr.NextLimb() From 7a37994e5df88154a4397077f26d0d861d79ea23 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Fri, 10 Jan 2025 21:29:52 -0600 Subject: [PATCH 13/25] refactor: more changes to accomodate users --- ecc/bls12-377/fr/sis/sis.go | 75 ++++++++++-------- ecc/bls12-377/fr/sis/sis_test.go | 9 ++- field/babybear/sis/sis.go | 75 ++++++++++-------- field/babybear/sis/sis_test.go | 9 ++- .../internal/templates/fft/tests/fft.go.tmpl | 1 + .../internal/templates/sis/sis.go.tmpl | 76 +++++++++++-------- .../internal/templates/sis/sis.test.go.tmpl | 9 ++- field/goldilocks/sis/sis.go | 75 ++++++++++-------- field/goldilocks/sis/sis_test.go | 9 ++- field/koalabear/sis/sis.go | 75 ++++++++++-------- field/koalabear/sis/sis_test.go | 9 ++- 11 files changed, 251 insertions(+), 171 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 54d5b261fc..e15973275e 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -65,7 +65,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := fr.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb + nbBytesPerLimb := logTwoBound / 8 + if fr.Bytes%nbBytesPerLimb != 0 { + return nil, errors.New("nbBytesPerLimb must divide field size") + } + n := fr.Bytes / nbBytesPerLimb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -134,7 +138,10 @@ func (r *RSis) Hash(v, res []fr.Element) error { k := make([]fr.Element, r.Degree) // inner hash - r.InnerHash(&VectorIterator{v: v}, res, k) + it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) + for i := 0; i < len(r.Ag); i++ { + r.InnerHash(it, res, k, i) + } // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -142,34 +149,30 @@ func (r *RSis) Hash(v, res []fr.Element) error { return nil } -func (r *RSis) InnerHash(it ElementIterator, res, k fr.Vector) { - reader := NewLimbIterator(it, r.LogTwoBound/8) - - for i := 0; i < len(r.Ag); i++ { - zero := uint64(0) - for j := 0; j < r.Degree; j += 2 { - k[j].SetZero() - k[j+1].SetZero() - - // read limbs 2 by 2 since degree is a power of 2 (> 1) - l := reader.NextLimb() - zero |= l - k[j][0] = l - - l2 := reader.NextLimb() - zero |= l2 - k[j+1][0] = l2 - } - if zero == 0 { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue +func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { + zero := uint64(0) + for j := 0; j < r.Degree; j++ { + l, ok := it.NextLimb() + if !ok { + // we need to pad; note that we should use a deterministic padding + // other than 0, but it is not an issue for the current use cases. + for m := j; m < r.Degree; m++ { + k[m].SetZero() + } + break } - - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) + zero |= l + k[j].SetZero() + k[j][0] = l + } + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + return } + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[polId], k) } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -212,6 +215,10 @@ type VectorIterator struct { i int } +func NewVectorIterator(v fr.Vector) *VectorIterator { + return &VectorIterator{v: v} +} + func (vi *VectorIterator) Next() (fr.Element, bool) { if vi.i == len(vi.v) { return fr.Element{}, false @@ -260,13 +267,21 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *LimbIterator) NextLimb() uint64 { +func (vr *LimbIterator) NextLimb() (uint64, bool) { if vr.j == fr.Bytes { + next, ok := vr.it.Next() + if !ok { + return 0, false + } vr.j = 0 - next, _ := vr.it.Next() fr.LittleEndian.PutElement(&vr.buf, next) } - return vr.next(vr.buf[:], &vr.j) + return vr.next(vr.buf[:], &vr.j), true +} + +func (vr *LimbIterator) Reset(it ElementIterator) { + vr.it = it + vr.j = fr.Bytes } func nextUint8(buf []byte, pos *int) uint64 { diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index f90a19bc41..88dc2b7276 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -92,6 +92,7 @@ func TestReference(t *testing.T) { } func TestLimbDecomposeBytes(t *testing.T) { + assert := require.New(t) var montConstant fr.Element var bMontConstant big.Int @@ -110,8 +111,10 @@ func TestLimbDecomposeBytes(t *testing.T) { for cc := 0; cc < 3; cc++ { vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(fr.Vector, nbElmts*fr.Bytes*8/logTwoBound) + var ok bool for i := 0; i < len(m); i++ { - m[i][0] = vr.NextLimb() + m[i][0], ok = vr.NextLimb() + assert.True(ok) } for i := 0; i < len(m); i++ { @@ -124,9 +127,7 @@ func TestLimbDecomposeBytes(t *testing.T) { coeffsPerFieldsElmt := fr.Bytes * 8 / logTwoBound for i := 0; i < nbElmts; i++ { r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + assert.True(r.Equal(&a[i]), "limbDecomposeBytes failed") } logTwoBound *= 2 } diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 9666998004..2b2c0d931b 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -65,7 +65,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := babybear.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb + nbBytesPerLimb := logTwoBound / 8 + if babybear.Bytes%nbBytesPerLimb != 0 { + return nil, errors.New("nbBytesPerLimb must divide field size") + } + n := babybear.Bytes / nbBytesPerLimb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -134,7 +138,10 @@ func (r *RSis) Hash(v, res []babybear.Element) error { k := make([]babybear.Element, r.Degree) // inner hash - r.InnerHash(&VectorIterator{v: v}, res, k) + it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) + for i := 0; i < len(r.Ag); i++ { + r.InnerHash(it, res, k, i) + } // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -142,34 +149,30 @@ func (r *RSis) Hash(v, res []babybear.Element) error { return nil } -func (r *RSis) InnerHash(it ElementIterator, res, k babybear.Vector) { - reader := NewLimbIterator(it, r.LogTwoBound/8) - - for i := 0; i < len(r.Ag); i++ { - zero := uint32(0) - for j := 0; j < r.Degree; j += 2 { - k[j].SetZero() - k[j+1].SetZero() - - // read limbs 2 by 2 since degree is a power of 2 (> 1) - l := reader.NextLimb() - zero |= l - k[j][0] = l - - l2 := reader.NextLimb() - zero |= l2 - k[j+1][0] = l2 - } - if zero == 0 { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue +func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { + zero := uint32(0) + for j := 0; j < r.Degree; j++ { + l, ok := it.NextLimb() + if !ok { + // we need to pad; note that we should use a deterministic padding + // other than 0, but it is not an issue for the current use cases. + for m := j; m < r.Degree; m++ { + k[m].SetZero() + } + break } - - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) + zero |= l + k[j].SetZero() + k[j][0] = l + } + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + return } + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[polId], k) } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -212,6 +215,10 @@ type VectorIterator struct { i int } +func NewVectorIterator(v babybear.Vector) *VectorIterator { + return &VectorIterator{v: v} +} + func (vi *VectorIterator) Next() (babybear.Element, bool) { if vi.i == len(vi.v) { return babybear.Element{}, false @@ -256,13 +263,21 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *LimbIterator) NextLimb() uint32 { +func (vr *LimbIterator) NextLimb() (uint32, bool) { if vr.j == babybear.Bytes { + next, ok := vr.it.Next() + if !ok { + return 0, false + } vr.j = 0 - next, _ := vr.it.Next() babybear.LittleEndian.PutElement(&vr.buf, next) } - return vr.next(vr.buf[:], &vr.j) + return vr.next(vr.buf[:], &vr.j), true +} + +func (vr *LimbIterator) Reset(it ElementIterator) { + vr.it = it + vr.j = babybear.Bytes } func nextUint8(buf []byte, pos *int) uint32 { diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 3b00c0d4e6..6d041515e7 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -92,6 +92,7 @@ func TestReference(t *testing.T) { } func TestLimbDecomposeBytes(t *testing.T) { + assert := require.New(t) var montConstant babybear.Element var bMontConstant big.Int @@ -110,8 +111,10 @@ func TestLimbDecomposeBytes(t *testing.T) { for cc := 0; cc < 1; cc++ { vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(babybear.Vector, nbElmts*babybear.Bytes*8/logTwoBound) + var ok bool for i := 0; i < len(m); i++ { - m[i][0] = vr.NextLimb() + m[i][0], ok = vr.NextLimb() + assert.True(ok) } for i := 0; i < len(m); i++ { @@ -124,9 +127,7 @@ func TestLimbDecomposeBytes(t *testing.T) { coeffsPerFieldsElmt := babybear.Bytes * 8 / logTwoBound for i := 0; i < nbElmts; i++ { r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + assert.True(r.Equal(&a[i]), "limbDecomposeBytes failed") } logTwoBound *= 2 } diff --git a/field/generator/internal/templates/fft/tests/fft.go.tmpl b/field/generator/internal/templates/fft/tests/fft.go.tmpl index 4500234956..c926779cde 100644 --- a/field/generator/internal/templates/fft/tests/fft.go.tmpl +++ b/field/generator/internal/templates/fft/tests/fft.go.tmpl @@ -303,6 +303,7 @@ func BenchmarkFFTDIFReference(b *testing.B) { } } + func evaluatePolynomial(pol []{{ .FF }}.Element, val {{ .FF }}.Element) {{ .FF }}.Element { var acc, res, tmp {{ .FF }}.Element res.Set(&pol[0]) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index bd1e99c96e..0a2f3c7300 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -65,7 +65,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := {{ .FF }}.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb + nbBytesPerLimb := logTwoBound / 8 + if {{ .FF }}.Bytes % nbBytesPerLimb != 0 { + return nil, errors.New("nbBytesPerLimb must divide field size") + } + n := {{ .FF }}.Bytes / nbBytesPerLimb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -134,43 +138,41 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { k := make([]{{ .FF }}.Element, r.Degree) // inner hash - r.InnerHash(&VectorIterator{v: v}, res, k) + it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) + for i := 0; i < len(r.Ag); i++ { + r.InnerHash(it, res, k, i) + } - // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) return nil } -func (r *RSis) InnerHash(it ElementIterator, res, k {{ .FF }}.Vector) { - reader := NewLimbIterator(it, r.LogTwoBound/8) - - for i := 0; i < len(r.Ag); i++ { - zero := {{$tReturn}}(0) - for j := 0; j < r.Degree; j+=2 { - k[j].SetZero() - k[j+1].SetZero() - - // read limbs 2 by 2 since degree is a power of 2 (> 1) - l := reader.NextLimb() - zero |= l - k[j][0] = l - - l2 := reader.NextLimb() - zero |= l2 - k[j+1][0] = l2 - } - if zero == 0 { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue +func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { + zero := {{$tReturn}}(0) + for j := 0; j < r.Degree; j++ { + l, ok := it.NextLimb() + if !ok { + // we need to pad; note that we should use a deterministic padding + // other than 0, but it is not an issue for the current use cases. + for m := j; m < r.Degree; m++ { + k[m].SetZero() + } + break } - - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) + zero |= l + k[j].SetZero() + k[j][0] = l + } + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + return } + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[polId], k) } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -215,6 +217,10 @@ type VectorIterator struct { i int } +func NewVectorIterator(v {{ .FF }}.Vector) *VectorIterator { + return &VectorIterator{v: v} +} + func (vi *VectorIterator) Next() ({{ .FF }}.Element, bool) { if vi.i == len(vi.v) { return {{ .FF }}.Element{}, false @@ -266,13 +272,21 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *LimbIterator) NextLimb() {{$tReturn}} { +func (vr *LimbIterator) NextLimb() ({{$tReturn}}, bool) { if vr.j == {{ .FF }}.Bytes { + next, ok := vr.it.Next() + if !ok { + return 0, false + } vr.j = 0 - next, _ := vr.it.Next() {{.FF}}.LittleEndian.PutElement(&vr.buf, next) } - return vr.next(vr.buf[:], &vr.j) + return vr.next(vr.buf[:], &vr.j), true +} + +func (vr *LimbIterator) Reset(it ElementIterator) { + vr.it = it + vr.j = {{ .FF }}.Bytes } diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 9653c84ffc..9233eaf850 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -87,6 +87,7 @@ func TestReference(t *testing.T) { } func TestLimbDecomposeBytes(t *testing.T) { + assert := require.New(t) var montConstant {{ .FF }}.Element var bMontConstant big.Int @@ -107,8 +108,10 @@ func TestLimbDecomposeBytes(t *testing.T) { for cc:=0;cc<{{- if $f31 }}1{{- else }}3{{- end}}; cc++ { vr := NewLimbIterator(&VectorIterator{v:a}, logTwoBound/8) m := make({{ .FF }}.Vector, nbElmts*{{ .FF }}.Bytes*8/logTwoBound) + var ok bool for i := 0; i < len(m); i++ { - m[i][0] = vr.NextLimb() + m[i][0], ok = vr.NextLimb() + assert.True(ok) } for i := 0; i < len(m); i++ { @@ -121,9 +124,7 @@ func TestLimbDecomposeBytes(t *testing.T) { coeffsPerFieldsElmt := {{ .FF }}.Bytes * 8 / logTwoBound for i := 0; i < nbElmts; i++ { r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + assert.True(r.Equal(&a[i]), "limbDecomposeBytes failed") } logTwoBound*=2 } diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index a77983705e..c5d703c1f0 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -65,7 +65,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := goldilocks.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb + nbBytesPerLimb := logTwoBound / 8 + if goldilocks.Bytes%nbBytesPerLimb != 0 { + return nil, errors.New("nbBytesPerLimb must divide field size") + } + n := goldilocks.Bytes / nbBytesPerLimb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -134,7 +138,10 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { k := make([]goldilocks.Element, r.Degree) // inner hash - r.InnerHash(&VectorIterator{v: v}, res, k) + it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) + for i := 0; i < len(r.Ag); i++ { + r.InnerHash(it, res, k, i) + } // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -142,34 +149,30 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { return nil } -func (r *RSis) InnerHash(it ElementIterator, res, k goldilocks.Vector) { - reader := NewLimbIterator(it, r.LogTwoBound/8) - - for i := 0; i < len(r.Ag); i++ { - zero := uint64(0) - for j := 0; j < r.Degree; j += 2 { - k[j].SetZero() - k[j+1].SetZero() - - // read limbs 2 by 2 since degree is a power of 2 (> 1) - l := reader.NextLimb() - zero |= l - k[j][0] = l - - l2 := reader.NextLimb() - zero |= l2 - k[j+1][0] = l2 - } - if zero == 0 { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue +func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) { + zero := uint64(0) + for j := 0; j < r.Degree; j++ { + l, ok := it.NextLimb() + if !ok { + // we need to pad; note that we should use a deterministic padding + // other than 0, but it is not an issue for the current use cases. + for m := j; m < r.Degree; m++ { + k[m].SetZero() + } + break } - - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) + zero |= l + k[j].SetZero() + k[j][0] = l + } + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + return } + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[polId], k) } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -212,6 +215,10 @@ type VectorIterator struct { i int } +func NewVectorIterator(v goldilocks.Vector) *VectorIterator { + return &VectorIterator{v: v} +} + func (vi *VectorIterator) Next() (goldilocks.Element, bool) { if vi.i == len(vi.v) { return goldilocks.Element{}, false @@ -260,13 +267,21 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *LimbIterator) NextLimb() uint64 { +func (vr *LimbIterator) NextLimb() (uint64, bool) { if vr.j == goldilocks.Bytes { + next, ok := vr.it.Next() + if !ok { + return 0, false + } vr.j = 0 - next, _ := vr.it.Next() goldilocks.LittleEndian.PutElement(&vr.buf, next) } - return vr.next(vr.buf[:], &vr.j) + return vr.next(vr.buf[:], &vr.j), true +} + +func (vr *LimbIterator) Reset(it ElementIterator) { + vr.it = it + vr.j = goldilocks.Bytes } func nextUint8(buf []byte, pos *int) uint64 { diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 28f5e53121..480cbb8c33 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -92,6 +92,7 @@ func TestReference(t *testing.T) { } func TestLimbDecomposeBytes(t *testing.T) { + assert := require.New(t) var montConstant goldilocks.Element var bMontConstant big.Int @@ -110,8 +111,10 @@ func TestLimbDecomposeBytes(t *testing.T) { for cc := 0; cc < 3; cc++ { vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(goldilocks.Vector, nbElmts*goldilocks.Bytes*8/logTwoBound) + var ok bool for i := 0; i < len(m); i++ { - m[i][0] = vr.NextLimb() + m[i][0], ok = vr.NextLimb() + assert.True(ok) } for i := 0; i < len(m); i++ { @@ -124,9 +127,7 @@ func TestLimbDecomposeBytes(t *testing.T) { coeffsPerFieldsElmt := goldilocks.Bytes * 8 / logTwoBound for i := 0; i < nbElmts; i++ { r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + assert.True(r.Equal(&a[i]), "limbDecomposeBytes failed") } logTwoBound *= 2 } diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 5c773306bf..21224cceed 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -65,7 +65,11 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // that is, to fill m, we need [degree * n * logTwoBound] bits of data // First n <- #limbs to represent a single field element - n := koalabear.Bytes / (logTwoBound / 8) // logTwoBound / 8 --> nbBytes per limb + nbBytesPerLimb := logTwoBound / 8 + if koalabear.Bytes%nbBytesPerLimb != 0 { + return nil, errors.New("nbBytesPerLimb must divide field size") + } + n := koalabear.Bytes / nbBytesPerLimb // Then multiply by the number of field elements n *= maxNbElementsToHash @@ -134,7 +138,10 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { k := make([]koalabear.Element, r.Degree) // inner hash - r.InnerHash(&VectorIterator{v: v}, res, k) + it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) + for i := 0; i < len(r.Ag); i++ { + r.InnerHash(it, res, k, i) + } // reduces mod Xᵈ+1 r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1)) @@ -142,34 +149,30 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { return nil } -func (r *RSis) InnerHash(it ElementIterator, res, k koalabear.Vector) { - reader := NewLimbIterator(it, r.LogTwoBound/8) - - for i := 0; i < len(r.Ag); i++ { - zero := uint32(0) - for j := 0; j < r.Degree; j += 2 { - k[j].SetZero() - k[j+1].SetZero() - - // read limbs 2 by 2 since degree is a power of 2 (> 1) - l := reader.NextLimb() - zero |= l - k[j][0] = l - - l2 := reader.NextLimb() - zero |= l2 - k[j+1][0] = l2 - } - if zero == 0 { - // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] - // we can skip this, FFT(0) = 0 - continue +func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { + zero := uint32(0) + for j := 0; j < r.Degree; j++ { + l, ok := it.NextLimb() + if !ok { + // we need to pad; note that we should use a deterministic padding + // other than 0, but it is not an issue for the current use cases. + for m := j; m < r.Degree; m++ { + k[m].SetZero() + } + break } - - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - mulModAcc(res, r.Ag[i], k) + zero |= l + k[j].SetZero() + k[j][0] = l + } + if zero == 0 { + // means m[i*r.Degree : (i+1)*r.Degree] == [0...0] + // we can skip this, FFT(0) = 0 + return } + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + mulModAcc(res, r.Ag[polId], k) } // mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. @@ -212,6 +215,10 @@ type VectorIterator struct { i int } +func NewVectorIterator(v koalabear.Vector) *VectorIterator { + return &VectorIterator{v: v} +} + func (vi *VectorIterator) Next() (koalabear.Element, bool) { if vi.i == len(vi.v) { return koalabear.Element{}, false @@ -256,13 +263,21 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { // NextLimb returns the next limb of the vector. // This does not perform any bound check, may trigger an out of bound panic. // If underlying vector is "out of limb" -func (vr *LimbIterator) NextLimb() uint32 { +func (vr *LimbIterator) NextLimb() (uint32, bool) { if vr.j == koalabear.Bytes { + next, ok := vr.it.Next() + if !ok { + return 0, false + } vr.j = 0 - next, _ := vr.it.Next() koalabear.LittleEndian.PutElement(&vr.buf, next) } - return vr.next(vr.buf[:], &vr.j) + return vr.next(vr.buf[:], &vr.j), true +} + +func (vr *LimbIterator) Reset(it ElementIterator) { + vr.it = it + vr.j = koalabear.Bytes } func nextUint8(buf []byte, pos *int) uint32 { diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index a14fcadd92..d05364dea6 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -92,6 +92,7 @@ func TestReference(t *testing.T) { } func TestLimbDecomposeBytes(t *testing.T) { + assert := require.New(t) var montConstant koalabear.Element var bMontConstant big.Int @@ -110,8 +111,10 @@ func TestLimbDecomposeBytes(t *testing.T) { for cc := 0; cc < 1; cc++ { vr := NewLimbIterator(&VectorIterator{v: a}, logTwoBound/8) m := make(koalabear.Vector, nbElmts*koalabear.Bytes*8/logTwoBound) + var ok bool for i := 0; i < len(m); i++ { - m[i][0] = vr.NextLimb() + m[i][0], ok = vr.NextLimb() + assert.True(ok) } for i := 0; i < len(m); i++ { @@ -124,9 +127,7 @@ func TestLimbDecomposeBytes(t *testing.T) { coeffsPerFieldsElmt := koalabear.Bytes * 8 / logTwoBound for i := 0; i < nbElmts; i++ { r := eval(m[i*coeffsPerFieldsElmt:(i+1)*coeffsPerFieldsElmt], x) - if !r.Equal(&a[i]) { - t.Fatal("limbDecomposeBytes failed") - } + assert.True(r.Equal(&a[i]), "limbDecomposeBytes failed") } logTwoBound *= 2 } From 87c409d3533f6ac11e52d1e262cfbee21e602ba1 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Fri, 10 Jan 2025 21:33:24 -0600 Subject: [PATCH 14/25] perf: make SetBytes not allocate errors in non-fast path --- ecc/bls12-377/fp/element.go | 6 ++++-- ecc/bls12-377/fr/element.go | 6 ++++-- ecc/bls12-381/fp/element.go | 6 ++++-- ecc/bls12-381/fr/element.go | 6 ++++-- ecc/bls24-315/fp/element.go | 6 ++++-- ecc/bls24-315/fr/element.go | 6 ++++-- ecc/bls24-317/fp/element.go | 6 ++++-- ecc/bls24-317/fr/element.go | 6 ++++-- ecc/bn254/fp/element.go | 6 ++++-- ecc/bn254/fr/element.go | 6 ++++-- ecc/bw6-633/fp/element.go | 6 ++++-- ecc/bw6-633/fr/element.go | 6 ++++-- ecc/bw6-761/fp/element.go | 6 ++++-- ecc/bw6-761/fr/element.go | 6 ++++-- ecc/secp256k1/fp/element.go | 6 ++++-- ecc/secp256k1/fr/element.go | 6 ++++-- ecc/stark-curve/fp/element.go | 6 ++++-- ecc/stark-curve/fr/element.go | 6 ++++-- field/babybear/element.go | 6 ++++-- field/generator/internal/templates/element/conv.go | 6 ++++-- field/goldilocks/element.go | 6 ++++-- field/koalabear/element.go | 6 ++++-- 22 files changed, 88 insertions(+), 44 deletions(-) diff --git a/ecc/bls12-377/fp/element.go b/ecc/bls12-377/fp/element.go index 163bfa737b..d76fa8ac61 100644 --- a/ecc/bls12-377/fp/element.go +++ b/ecc/bls12-377/fp/element.go @@ -1214,6 +1214,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1231,7 +1233,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1265,7 +1267,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.LittleEndian.Uint64((*b)[40:48]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls12-377/fr/element.go b/ecc/bls12-377/fr/element.go index 85bd40bb35..712d80226f 100644 --- a/ecc/bls12-377/fr/element.go +++ b/ecc/bls12-377/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls12-381/fp/element.go b/ecc/bls12-381/fp/element.go index 0a6ab8e21f..2782c81aa3 100644 --- a/ecc/bls12-381/fp/element.go +++ b/ecc/bls12-381/fp/element.go @@ -1214,6 +1214,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1231,7 +1233,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1265,7 +1267,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.LittleEndian.Uint64((*b)[40:48]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls12-381/fr/element.go b/ecc/bls12-381/fr/element.go index 7fe789341e..6d22d7372b 100644 --- a/ecc/bls12-381/fr/element.go +++ b/ecc/bls12-381/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls24-315/fp/element.go b/ecc/bls24-315/fp/element.go index 5c9d2ad40c..9cab40d76a 100644 --- a/ecc/bls24-315/fp/element.go +++ b/ecc/bls24-315/fp/element.go @@ -1130,6 +1130,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1146,7 +1148,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1178,7 +1180,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.LittleEndian.Uint64((*b)[32:40]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls24-315/fr/element.go b/ecc/bls24-315/fr/element.go index 518f6908f4..72d4cb627f 100644 --- a/ecc/bls24-315/fr/element.go +++ b/ecc/bls24-315/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls24-317/fp/element.go b/ecc/bls24-317/fp/element.go index e061a5240c..88b9303564 100644 --- a/ecc/bls24-317/fp/element.go +++ b/ecc/bls24-317/fp/element.go @@ -1130,6 +1130,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1146,7 +1148,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1178,7 +1180,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.LittleEndian.Uint64((*b)[32:40]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bls24-317/fr/element.go b/ecc/bls24-317/fr/element.go index 01cbdc854f..6663dfff2d 100644 --- a/ecc/bls24-317/fr/element.go +++ b/ecc/bls24-317/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bn254/fp/element.go b/ecc/bn254/fp/element.go index 44aaa929bc..0d9cc8bd43 100644 --- a/ecc/bn254/fp/element.go +++ b/ecc/bn254/fp/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bn254/fr/element.go b/ecc/bn254/fr/element.go index a8f372c092..105442d6e3 100644 --- a/ecc/bn254/fr/element.go +++ b/ecc/bn254/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bw6-633/fp/element.go b/ecc/bw6-633/fp/element.go index e1cb5a8210..306ba9a694 100644 --- a/ecc/bw6-633/fp/element.go +++ b/ecc/bw6-633/fp/element.go @@ -1610,6 +1610,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1631,7 +1633,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[9] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1673,7 +1675,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[9] = binary.LittleEndian.Uint64((*b)[72:80]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bw6-633/fr/element.go b/ecc/bw6-633/fr/element.go index 286f0cbc97..065bfd5806 100644 --- a/ecc/bw6-633/fr/element.go +++ b/ecc/bw6-633/fr/element.go @@ -1130,6 +1130,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1146,7 +1148,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1178,7 +1180,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[4] = binary.LittleEndian.Uint64((*b)[32:40]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bw6-761/fp/element.go b/ecc/bw6-761/fp/element.go index b42b76e5a0..904b64e3bc 100644 --- a/ecc/bw6-761/fp/element.go +++ b/ecc/bw6-761/fp/element.go @@ -1844,6 +1844,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1867,7 +1869,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[11] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1913,7 +1915,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[11] = binary.LittleEndian.Uint64((*b)[88:96]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/bw6-761/fr/element.go b/ecc/bw6-761/fr/element.go index 71d980d3cc..074517a6db 100644 --- a/ecc/bw6-761/fr/element.go +++ b/ecc/bw6-761/fr/element.go @@ -1214,6 +1214,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1231,7 +1233,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1265,7 +1267,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[5] = binary.LittleEndian.Uint64((*b)[40:48]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/secp256k1/fp/element.go b/ecc/secp256k1/fp/element.go index 9c5f79500b..1829f9b676 100644 --- a/ecc/secp256k1/fp/element.go +++ b/ecc/secp256k1/fp/element.go @@ -1083,6 +1083,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1098,7 +1100,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1128,7 +1130,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/secp256k1/fr/element.go b/ecc/secp256k1/fr/element.go index 9f20f3b412..619f39e0ac 100644 --- a/ecc/secp256k1/fr/element.go +++ b/ecc/secp256k1/fr/element.go @@ -1083,6 +1083,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1098,7 +1100,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1128,7 +1130,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/stark-curve/fp/element.go b/ecc/stark-curve/fp/element.go index 6e911a9281..6c37af974e 100644 --- a/ecc/stark-curve/fp/element.go +++ b/ecc/stark-curve/fp/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fp.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fp.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/ecc/stark-curve/fr/element.go b/ecc/stark-curve/fr/element.go index c9d96f98b4..ce6fac740a 100644 --- a/ecc/stark-curve/fr/element.go +++ b/ecc/stark-curve/fr/element.go @@ -1055,6 +1055,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid fr.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -1070,7 +1072,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -1100,7 +1102,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[3] = binary.LittleEndian.Uint64((*b)[24:32]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid fr.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/field/babybear/element.go b/field/babybear/element.go index 8fc970bd41..7a8bdb77c2 100644 --- a/field/babybear/element.go +++ b/field/babybear/element.go @@ -768,6 +768,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid babybear.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -780,7 +782,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.BigEndian.Uint32((*b)[0:4]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid babybear.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -804,7 +806,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.LittleEndian.Uint32((*b)[0:4]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid babybear.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/field/generator/internal/templates/element/conv.go b/field/generator/internal/templates/element/conv.go index 70bffcf631..113b0b1972 100644 --- a/field/generator/internal/templates/element/conv.go +++ b/field/generator/internal/templates/element/conv.go @@ -327,6 +327,8 @@ type ByteOrder interface { } +var errInvalidEncoding = errors.New("invalid {{.PackageName}}.{{.ElementName}} encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -345,7 +347,7 @@ func (bigEndian) Element(b *[Bytes]byte) ({{.ElementName}}, error) { {{- end}} if !z.smallerThanModulus() { - return {{.ElementName}}{}, errors.New("invalid {{.PackageName}}.{{.ElementName}} encoding") + return {{.ElementName}}{}, errInvalidEncoding } z.toMont() @@ -382,7 +384,7 @@ func (littleEndian) Element(b *[Bytes]byte) ({{.ElementName}}, error) { {{- end}} if !z.smallerThanModulus() { - return {{.ElementName}}{}, errors.New("invalid {{.PackageName}}.{{.ElementName}} encoding") + return {{.ElementName}}{}, errInvalidEncoding } z.toMont() diff --git a/field/goldilocks/element.go b/field/goldilocks/element.go index 349928655a..4e2a293242 100644 --- a/field/goldilocks/element.go +++ b/field/goldilocks/element.go @@ -801,6 +801,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid goldilocks.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -813,7 +815,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.BigEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid goldilocks.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -837,7 +839,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.LittleEndian.Uint64((*b)[0:8]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid goldilocks.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() diff --git a/field/koalabear/element.go b/field/koalabear/element.go index c18ddf14a7..399e7e7515 100644 --- a/field/koalabear/element.go +++ b/field/koalabear/element.go @@ -768,6 +768,8 @@ type ByteOrder interface { String() string } +var errInvalidEncoding = errors.New("invalid koalabear.Element encoding") + // BigEndian is the big-endian implementation of ByteOrder and AppendByteOrder. var BigEndian bigEndian @@ -780,7 +782,7 @@ func (bigEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.BigEndian.Uint32((*b)[0:4]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid koalabear.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() @@ -804,7 +806,7 @@ func (littleEndian) Element(b *[Bytes]byte) (Element, error) { z[0] = binary.LittleEndian.Uint32((*b)[0:4]) if !z.smallerThanModulus() { - return Element{}, errors.New("invalid koalabear.Element encoding") + return Element{}, errInvalidEncoding } z.toMont() From bdfe5cc72b0fcb7ac6039dda5921fba5204935e7 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Sat, 11 Jan 2025 09:41:30 -0600 Subject: [PATCH 15/25] refactor: restore sis fft to investigate allocs --- ecc/bls12-377/fr/fft/fft.go | 63 +- ecc/bls12-377/fr/sis/sis.go | 26 +- ecc/bls12-377/fr/sis/sis_fft.go | 556 ++++++++++++++++++ ecc/bls12-381/fr/fft/fft.go | 63 +- ecc/bls24-315/fr/fft/fft.go | 63 +- ecc/bls24-317/fr/fft/fft.go | 63 +- ecc/bn254/fr/fft/fft.go | 63 +- ecc/bw6-633/fr/fft/fft.go | 63 +- ecc/bw6-761/fr/fft/fft.go | 63 +- field/babybear/fft/fft.go | 63 +- field/babybear/sis/sis.go | 26 +- field/babybear/sis/sis_fft.go | 556 ++++++++++++++++++ field/generator/generator_sis.go | 4 +- .../internal/templates/fft/fft.go.tmpl | 33 +- .../internal/templates/sis/fft.go.tmpl | 82 +++ .../internal/templates/sis/sis.go.tmpl | 30 +- field/goldilocks/fft/fft.go | 63 +- field/goldilocks/sis/sis.go | 26 +- field/goldilocks/sis/sis_fft.go | 556 ++++++++++++++++++ field/koalabear/fft/fft.go | 63 +- field/koalabear/sis/sis.go | 26 +- field/koalabear/sis/sis_fft.go | 556 ++++++++++++++++++ 22 files changed, 2523 insertions(+), 584 deletions(-) create mode 100644 ecc/bls12-377/fr/sis/sis_fft.go create mode 100644 field/babybear/sis/sis_fft.go create mode 100644 field/generator/internal/templates/sis/fft.go.tmpl create mode 100644 field/goldilocks/sis/sis_fft.go create mode 100644 field/koalabear/sis/sis_fft.go diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index ff9623cfb9..afdb821d34 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index e15973275e..5be811d821 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -37,6 +37,9 @@ type RSis struct { Domain *fft.Domain maxNbElementsToHash int + + smallFFT func([]fr.Element) + twiddlesCoset []fr.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -97,6 +100,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } + r.smallFFT = func(p []fr.Element) { + r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + + // if we have a FFT kernel of the size of the domain cardinality, we use it. + if r.Domain.Cardinality == 64 { + r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(a []fr.Element) { + FFT64(a, r.twiddlesCoset) + } + } + // filling A a := make([]fr.Element, n*r.Degree) ag := make([]fr.Element, n*r.Degree) @@ -171,7 +186,16 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { return } - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // for perf, we use directly what's exposed; + r.smallFFT(k) + // k.Mul(k, fr.Vector(r.cosetTable)) + // if r.Domain.KernelDIF != nil { + // r.Domain.KernelDIF(k) + // } else { + // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) + // } + mulModAcc(res, r.Ag[polId], k) } diff --git a/ecc/bls12-377/fr/sis/sis_fft.go b/ecc/bls12-377/fr/sis/sis_fft.go new file mode 100644 index 0000000000..f4f4db3abb --- /dev/null +++ b/ecc/bls12-377/fr/sis/sis_fft.go @@ -0,0 +1,556 @@ +// Copyright 2020-2025 Consensys Software Inc. +// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. + +// Code generated by consensys/gnark-crypto DO NOT EDIT + +package sis + +import ( + "github.com/consensys/gnark-crypto/ecc/bls12-377/fr" + "math/big" +) + +// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset +func FFT64(a []fr.Element, twiddlesCoset []fr.Element) { + + a[32].Mul(&a[32], &twiddlesCoset[0]) + a[33].Mul(&a[33], &twiddlesCoset[0]) + a[34].Mul(&a[34], &twiddlesCoset[0]) + a[35].Mul(&a[35], &twiddlesCoset[0]) + a[36].Mul(&a[36], &twiddlesCoset[0]) + a[37].Mul(&a[37], &twiddlesCoset[0]) + a[38].Mul(&a[38], &twiddlesCoset[0]) + a[39].Mul(&a[39], &twiddlesCoset[0]) + a[40].Mul(&a[40], &twiddlesCoset[0]) + a[41].Mul(&a[41], &twiddlesCoset[0]) + a[42].Mul(&a[42], &twiddlesCoset[0]) + a[43].Mul(&a[43], &twiddlesCoset[0]) + a[44].Mul(&a[44], &twiddlesCoset[0]) + a[45].Mul(&a[45], &twiddlesCoset[0]) + a[46].Mul(&a[46], &twiddlesCoset[0]) + a[47].Mul(&a[47], &twiddlesCoset[0]) + a[48].Mul(&a[48], &twiddlesCoset[0]) + a[49].Mul(&a[49], &twiddlesCoset[0]) + a[50].Mul(&a[50], &twiddlesCoset[0]) + a[51].Mul(&a[51], &twiddlesCoset[0]) + a[52].Mul(&a[52], &twiddlesCoset[0]) + a[53].Mul(&a[53], &twiddlesCoset[0]) + a[54].Mul(&a[54], &twiddlesCoset[0]) + a[55].Mul(&a[55], &twiddlesCoset[0]) + a[56].Mul(&a[56], &twiddlesCoset[0]) + a[57].Mul(&a[57], &twiddlesCoset[0]) + a[58].Mul(&a[58], &twiddlesCoset[0]) + a[59].Mul(&a[59], &twiddlesCoset[0]) + a[60].Mul(&a[60], &twiddlesCoset[0]) + a[61].Mul(&a[61], &twiddlesCoset[0]) + a[62].Mul(&a[62], &twiddlesCoset[0]) + a[63].Mul(&a[63], &twiddlesCoset[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddlesCoset[1]) + a[17].Mul(&a[17], &twiddlesCoset[1]) + a[18].Mul(&a[18], &twiddlesCoset[1]) + a[19].Mul(&a[19], &twiddlesCoset[1]) + a[20].Mul(&a[20], &twiddlesCoset[1]) + a[21].Mul(&a[21], &twiddlesCoset[1]) + a[22].Mul(&a[22], &twiddlesCoset[1]) + a[23].Mul(&a[23], &twiddlesCoset[1]) + a[24].Mul(&a[24], &twiddlesCoset[1]) + a[25].Mul(&a[25], &twiddlesCoset[1]) + a[26].Mul(&a[26], &twiddlesCoset[1]) + a[27].Mul(&a[27], &twiddlesCoset[1]) + a[28].Mul(&a[28], &twiddlesCoset[1]) + a[29].Mul(&a[29], &twiddlesCoset[1]) + a[30].Mul(&a[30], &twiddlesCoset[1]) + a[31].Mul(&a[31], &twiddlesCoset[1]) + a[48].Mul(&a[48], &twiddlesCoset[2]) + a[49].Mul(&a[49], &twiddlesCoset[2]) + a[50].Mul(&a[50], &twiddlesCoset[2]) + a[51].Mul(&a[51], &twiddlesCoset[2]) + a[52].Mul(&a[52], &twiddlesCoset[2]) + a[53].Mul(&a[53], &twiddlesCoset[2]) + a[54].Mul(&a[54], &twiddlesCoset[2]) + a[55].Mul(&a[55], &twiddlesCoset[2]) + a[56].Mul(&a[56], &twiddlesCoset[2]) + a[57].Mul(&a[57], &twiddlesCoset[2]) + a[58].Mul(&a[58], &twiddlesCoset[2]) + a[59].Mul(&a[59], &twiddlesCoset[2]) + a[60].Mul(&a[60], &twiddlesCoset[2]) + a[61].Mul(&a[61], &twiddlesCoset[2]) + a[62].Mul(&a[62], &twiddlesCoset[2]) + a[63].Mul(&a[63], &twiddlesCoset[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddlesCoset[3]) + a[9].Mul(&a[9], &twiddlesCoset[3]) + a[10].Mul(&a[10], &twiddlesCoset[3]) + a[11].Mul(&a[11], &twiddlesCoset[3]) + a[12].Mul(&a[12], &twiddlesCoset[3]) + a[13].Mul(&a[13], &twiddlesCoset[3]) + a[14].Mul(&a[14], &twiddlesCoset[3]) + a[15].Mul(&a[15], &twiddlesCoset[3]) + a[24].Mul(&a[24], &twiddlesCoset[4]) + a[25].Mul(&a[25], &twiddlesCoset[4]) + a[26].Mul(&a[26], &twiddlesCoset[4]) + a[27].Mul(&a[27], &twiddlesCoset[4]) + a[28].Mul(&a[28], &twiddlesCoset[4]) + a[29].Mul(&a[29], &twiddlesCoset[4]) + a[30].Mul(&a[30], &twiddlesCoset[4]) + a[31].Mul(&a[31], &twiddlesCoset[4]) + a[40].Mul(&a[40], &twiddlesCoset[5]) + a[41].Mul(&a[41], &twiddlesCoset[5]) + a[42].Mul(&a[42], &twiddlesCoset[5]) + a[43].Mul(&a[43], &twiddlesCoset[5]) + a[44].Mul(&a[44], &twiddlesCoset[5]) + a[45].Mul(&a[45], &twiddlesCoset[5]) + a[46].Mul(&a[46], &twiddlesCoset[5]) + a[47].Mul(&a[47], &twiddlesCoset[5]) + a[56].Mul(&a[56], &twiddlesCoset[6]) + a[57].Mul(&a[57], &twiddlesCoset[6]) + a[58].Mul(&a[58], &twiddlesCoset[6]) + a[59].Mul(&a[59], &twiddlesCoset[6]) + a[60].Mul(&a[60], &twiddlesCoset[6]) + a[61].Mul(&a[61], &twiddlesCoset[6]) + a[62].Mul(&a[62], &twiddlesCoset[6]) + a[63].Mul(&a[63], &twiddlesCoset[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddlesCoset[7]) + a[5].Mul(&a[5], &twiddlesCoset[7]) + a[6].Mul(&a[6], &twiddlesCoset[7]) + a[7].Mul(&a[7], &twiddlesCoset[7]) + a[12].Mul(&a[12], &twiddlesCoset[8]) + a[13].Mul(&a[13], &twiddlesCoset[8]) + a[14].Mul(&a[14], &twiddlesCoset[8]) + a[15].Mul(&a[15], &twiddlesCoset[8]) + a[20].Mul(&a[20], &twiddlesCoset[9]) + a[21].Mul(&a[21], &twiddlesCoset[9]) + a[22].Mul(&a[22], &twiddlesCoset[9]) + a[23].Mul(&a[23], &twiddlesCoset[9]) + a[28].Mul(&a[28], &twiddlesCoset[10]) + a[29].Mul(&a[29], &twiddlesCoset[10]) + a[30].Mul(&a[30], &twiddlesCoset[10]) + a[31].Mul(&a[31], &twiddlesCoset[10]) + a[36].Mul(&a[36], &twiddlesCoset[11]) + a[37].Mul(&a[37], &twiddlesCoset[11]) + a[38].Mul(&a[38], &twiddlesCoset[11]) + a[39].Mul(&a[39], &twiddlesCoset[11]) + a[44].Mul(&a[44], &twiddlesCoset[12]) + a[45].Mul(&a[45], &twiddlesCoset[12]) + a[46].Mul(&a[46], &twiddlesCoset[12]) + a[47].Mul(&a[47], &twiddlesCoset[12]) + a[52].Mul(&a[52], &twiddlesCoset[13]) + a[53].Mul(&a[53], &twiddlesCoset[13]) + a[54].Mul(&a[54], &twiddlesCoset[13]) + a[55].Mul(&a[55], &twiddlesCoset[13]) + a[60].Mul(&a[60], &twiddlesCoset[14]) + a[61].Mul(&a[61], &twiddlesCoset[14]) + a[62].Mul(&a[62], &twiddlesCoset[14]) + a[63].Mul(&a[63], &twiddlesCoset[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddlesCoset[15]) + a[3].Mul(&a[3], &twiddlesCoset[15]) + a[6].Mul(&a[6], &twiddlesCoset[16]) + a[7].Mul(&a[7], &twiddlesCoset[16]) + a[10].Mul(&a[10], &twiddlesCoset[17]) + a[11].Mul(&a[11], &twiddlesCoset[17]) + a[14].Mul(&a[14], &twiddlesCoset[18]) + a[15].Mul(&a[15], &twiddlesCoset[18]) + a[18].Mul(&a[18], &twiddlesCoset[19]) + a[19].Mul(&a[19], &twiddlesCoset[19]) + a[22].Mul(&a[22], &twiddlesCoset[20]) + a[23].Mul(&a[23], &twiddlesCoset[20]) + a[26].Mul(&a[26], &twiddlesCoset[21]) + a[27].Mul(&a[27], &twiddlesCoset[21]) + a[30].Mul(&a[30], &twiddlesCoset[22]) + a[31].Mul(&a[31], &twiddlesCoset[22]) + a[34].Mul(&a[34], &twiddlesCoset[23]) + a[35].Mul(&a[35], &twiddlesCoset[23]) + a[38].Mul(&a[38], &twiddlesCoset[24]) + a[39].Mul(&a[39], &twiddlesCoset[24]) + a[42].Mul(&a[42], &twiddlesCoset[25]) + a[43].Mul(&a[43], &twiddlesCoset[25]) + a[46].Mul(&a[46], &twiddlesCoset[26]) + a[47].Mul(&a[47], &twiddlesCoset[26]) + a[50].Mul(&a[50], &twiddlesCoset[27]) + a[51].Mul(&a[51], &twiddlesCoset[27]) + a[54].Mul(&a[54], &twiddlesCoset[28]) + a[55].Mul(&a[55], &twiddlesCoset[28]) + a[58].Mul(&a[58], &twiddlesCoset[29]) + a[59].Mul(&a[59], &twiddlesCoset[29]) + a[62].Mul(&a[62], &twiddlesCoset[30]) + a[63].Mul(&a[63], &twiddlesCoset[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddlesCoset[31]) + a[3].Mul(&a[3], &twiddlesCoset[32]) + a[5].Mul(&a[5], &twiddlesCoset[33]) + a[7].Mul(&a[7], &twiddlesCoset[34]) + a[9].Mul(&a[9], &twiddlesCoset[35]) + a[11].Mul(&a[11], &twiddlesCoset[36]) + a[13].Mul(&a[13], &twiddlesCoset[37]) + a[15].Mul(&a[15], &twiddlesCoset[38]) + a[17].Mul(&a[17], &twiddlesCoset[39]) + a[19].Mul(&a[19], &twiddlesCoset[40]) + a[21].Mul(&a[21], &twiddlesCoset[41]) + a[23].Mul(&a[23], &twiddlesCoset[42]) + a[25].Mul(&a[25], &twiddlesCoset[43]) + a[27].Mul(&a[27], &twiddlesCoset[44]) + a[29].Mul(&a[29], &twiddlesCoset[45]) + a[31].Mul(&a[31], &twiddlesCoset[46]) + a[33].Mul(&a[33], &twiddlesCoset[47]) + a[35].Mul(&a[35], &twiddlesCoset[48]) + a[37].Mul(&a[37], &twiddlesCoset[49]) + a[39].Mul(&a[39], &twiddlesCoset[50]) + a[41].Mul(&a[41], &twiddlesCoset[51]) + a[43].Mul(&a[43], &twiddlesCoset[52]) + a[45].Mul(&a[45], &twiddlesCoset[53]) + a[47].Mul(&a[47], &twiddlesCoset[54]) + a[49].Mul(&a[49], &twiddlesCoset[55]) + a[51].Mul(&a[51], &twiddlesCoset[56]) + a[53].Mul(&a[53], &twiddlesCoset[57]) + a[55].Mul(&a[55], &twiddlesCoset[58]) + a[57].Mul(&a[57], &twiddlesCoset[59]) + a[59].Mul(&a[59], &twiddlesCoset[60]) + a[61].Mul(&a[61], &twiddlesCoset[61]) + a[63].Mul(&a[63], &twiddlesCoset[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func PrecomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { + toReturn := make([]fr.Element, 63) + var r, s fr.Element + e := new(big.Int) + + s = shifter + for k := 0; k < 5; k++ { + s.Square(&s) + } + toReturn[0] = s + s = shifter + for k := 0; k < 4; k++ { + s.Square(&s) + } + toReturn[1] = s + r.Exp(generator, e.SetUint64(uint64(1<<4*1))) + toReturn[2].Mul(&r, &s) + s = shifter + for k := 0; k < 3; k++ { + s.Square(&s) + } + toReturn[3] = s + r.Exp(generator, e.SetUint64(uint64(1<<3*2))) + toReturn[4].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*1))) + toReturn[5].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*3))) + toReturn[6].Mul(&r, &s) + s = shifter + for k := 0; k < 2; k++ { + s.Square(&s) + } + toReturn[7] = s + r.Exp(generator, e.SetUint64(uint64(1<<2*4))) + toReturn[8].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*2))) + toReturn[9].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*6))) + toReturn[10].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*1))) + toReturn[11].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*5))) + toReturn[12].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*3))) + toReturn[13].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*7))) + toReturn[14].Mul(&r, &s) + s = shifter + for k := 0; k < 1; k++ { + s.Square(&s) + } + toReturn[15] = s + r.Exp(generator, e.SetUint64(uint64(1<<1*8))) + toReturn[16].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*4))) + toReturn[17].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*12))) + toReturn[18].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*2))) + toReturn[19].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*10))) + toReturn[20].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*6))) + toReturn[21].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*14))) + toReturn[22].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*1))) + toReturn[23].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*9))) + toReturn[24].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*5))) + toReturn[25].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*13))) + toReturn[26].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*3))) + toReturn[27].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*11))) + toReturn[28].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*7))) + toReturn[29].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*15))) + toReturn[30].Mul(&r, &s) + s = shifter + for k := 0; k < 0; k++ { + s.Square(&s) + } + toReturn[31] = s + r.Exp(generator, e.SetUint64(uint64(1<<0*16))) + toReturn[32].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*8))) + toReturn[33].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*24))) + toReturn[34].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*4))) + toReturn[35].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*20))) + toReturn[36].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*12))) + toReturn[37].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*28))) + toReturn[38].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*2))) + toReturn[39].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*18))) + toReturn[40].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*10))) + toReturn[41].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*26))) + toReturn[42].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*6))) + toReturn[43].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*22))) + toReturn[44].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*14))) + toReturn[45].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*30))) + toReturn[46].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*1))) + toReturn[47].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*17))) + toReturn[48].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*9))) + toReturn[49].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*25))) + toReturn[50].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*5))) + toReturn[51].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*21))) + toReturn[52].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*13))) + toReturn[53].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*29))) + toReturn[54].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*3))) + toReturn[55].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*19))) + toReturn[56].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*11))) + toReturn[57].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*27))) + toReturn[58].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*7))) + toReturn[59].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*23))) + toReturn[60].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*15))) + toReturn[61].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*31))) + toReturn[62].Mul(&r, &s) + return toReturn +} diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index 3b2e29739c..d2a5e7200c 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index 506f12fb4d..85e25dd54a 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 4a418f87fb..fead81fe47 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 463faffb6d..1da9a883e2 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 8fd7d57e79..3cae1e9f1a 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index 76d6bb7a76..c9ad068c0f 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index a623c24a97..506fb93807 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []babybear.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 2b2c0d931b..e951b4cd7b 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -37,6 +37,9 @@ type RSis struct { Domain *fft.Domain maxNbElementsToHash int + + smallFFT func([]babybear.Element) + twiddlesCoset []babybear.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -97,6 +100,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } + r.smallFFT = func(p []babybear.Element) { + r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + + // if we have a FFT kernel of the size of the domain cardinality, we use it. + if r.Domain.Cardinality == 64 { + r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(a []babybear.Element) { + FFT64(a, r.twiddlesCoset) + } + } + // filling A a := make([]babybear.Element, n*r.Degree) ag := make([]babybear.Element, n*r.Degree) @@ -171,7 +186,16 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { return } - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // for perf, we use directly what's exposed; + r.smallFFT(k) + // k.Mul(k, fr.Vector(r.cosetTable)) + // if r.Domain.KernelDIF != nil { + // r.Domain.KernelDIF(k) + // } else { + // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) + // } + mulModAcc(res, r.Ag[polId], k) } diff --git a/field/babybear/sis/sis_fft.go b/field/babybear/sis/sis_fft.go new file mode 100644 index 0000000000..0b30e84b20 --- /dev/null +++ b/field/babybear/sis/sis_fft.go @@ -0,0 +1,556 @@ +// Copyright 2020-2025 Consensys Software Inc. +// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. + +// Code generated by consensys/gnark-crypto DO NOT EDIT + +package sis + +import ( + "github.com/consensys/gnark-crypto/field/babybear" + "math/big" +) + +// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset +func FFT64(a []babybear.Element, twiddlesCoset []babybear.Element) { + + a[32].Mul(&a[32], &twiddlesCoset[0]) + a[33].Mul(&a[33], &twiddlesCoset[0]) + a[34].Mul(&a[34], &twiddlesCoset[0]) + a[35].Mul(&a[35], &twiddlesCoset[0]) + a[36].Mul(&a[36], &twiddlesCoset[0]) + a[37].Mul(&a[37], &twiddlesCoset[0]) + a[38].Mul(&a[38], &twiddlesCoset[0]) + a[39].Mul(&a[39], &twiddlesCoset[0]) + a[40].Mul(&a[40], &twiddlesCoset[0]) + a[41].Mul(&a[41], &twiddlesCoset[0]) + a[42].Mul(&a[42], &twiddlesCoset[0]) + a[43].Mul(&a[43], &twiddlesCoset[0]) + a[44].Mul(&a[44], &twiddlesCoset[0]) + a[45].Mul(&a[45], &twiddlesCoset[0]) + a[46].Mul(&a[46], &twiddlesCoset[0]) + a[47].Mul(&a[47], &twiddlesCoset[0]) + a[48].Mul(&a[48], &twiddlesCoset[0]) + a[49].Mul(&a[49], &twiddlesCoset[0]) + a[50].Mul(&a[50], &twiddlesCoset[0]) + a[51].Mul(&a[51], &twiddlesCoset[0]) + a[52].Mul(&a[52], &twiddlesCoset[0]) + a[53].Mul(&a[53], &twiddlesCoset[0]) + a[54].Mul(&a[54], &twiddlesCoset[0]) + a[55].Mul(&a[55], &twiddlesCoset[0]) + a[56].Mul(&a[56], &twiddlesCoset[0]) + a[57].Mul(&a[57], &twiddlesCoset[0]) + a[58].Mul(&a[58], &twiddlesCoset[0]) + a[59].Mul(&a[59], &twiddlesCoset[0]) + a[60].Mul(&a[60], &twiddlesCoset[0]) + a[61].Mul(&a[61], &twiddlesCoset[0]) + a[62].Mul(&a[62], &twiddlesCoset[0]) + a[63].Mul(&a[63], &twiddlesCoset[0]) + babybear.Butterfly(&a[0], &a[32]) + babybear.Butterfly(&a[1], &a[33]) + babybear.Butterfly(&a[2], &a[34]) + babybear.Butterfly(&a[3], &a[35]) + babybear.Butterfly(&a[4], &a[36]) + babybear.Butterfly(&a[5], &a[37]) + babybear.Butterfly(&a[6], &a[38]) + babybear.Butterfly(&a[7], &a[39]) + babybear.Butterfly(&a[8], &a[40]) + babybear.Butterfly(&a[9], &a[41]) + babybear.Butterfly(&a[10], &a[42]) + babybear.Butterfly(&a[11], &a[43]) + babybear.Butterfly(&a[12], &a[44]) + babybear.Butterfly(&a[13], &a[45]) + babybear.Butterfly(&a[14], &a[46]) + babybear.Butterfly(&a[15], &a[47]) + babybear.Butterfly(&a[16], &a[48]) + babybear.Butterfly(&a[17], &a[49]) + babybear.Butterfly(&a[18], &a[50]) + babybear.Butterfly(&a[19], &a[51]) + babybear.Butterfly(&a[20], &a[52]) + babybear.Butterfly(&a[21], &a[53]) + babybear.Butterfly(&a[22], &a[54]) + babybear.Butterfly(&a[23], &a[55]) + babybear.Butterfly(&a[24], &a[56]) + babybear.Butterfly(&a[25], &a[57]) + babybear.Butterfly(&a[26], &a[58]) + babybear.Butterfly(&a[27], &a[59]) + babybear.Butterfly(&a[28], &a[60]) + babybear.Butterfly(&a[29], &a[61]) + babybear.Butterfly(&a[30], &a[62]) + babybear.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddlesCoset[1]) + a[17].Mul(&a[17], &twiddlesCoset[1]) + a[18].Mul(&a[18], &twiddlesCoset[1]) + a[19].Mul(&a[19], &twiddlesCoset[1]) + a[20].Mul(&a[20], &twiddlesCoset[1]) + a[21].Mul(&a[21], &twiddlesCoset[1]) + a[22].Mul(&a[22], &twiddlesCoset[1]) + a[23].Mul(&a[23], &twiddlesCoset[1]) + a[24].Mul(&a[24], &twiddlesCoset[1]) + a[25].Mul(&a[25], &twiddlesCoset[1]) + a[26].Mul(&a[26], &twiddlesCoset[1]) + a[27].Mul(&a[27], &twiddlesCoset[1]) + a[28].Mul(&a[28], &twiddlesCoset[1]) + a[29].Mul(&a[29], &twiddlesCoset[1]) + a[30].Mul(&a[30], &twiddlesCoset[1]) + a[31].Mul(&a[31], &twiddlesCoset[1]) + a[48].Mul(&a[48], &twiddlesCoset[2]) + a[49].Mul(&a[49], &twiddlesCoset[2]) + a[50].Mul(&a[50], &twiddlesCoset[2]) + a[51].Mul(&a[51], &twiddlesCoset[2]) + a[52].Mul(&a[52], &twiddlesCoset[2]) + a[53].Mul(&a[53], &twiddlesCoset[2]) + a[54].Mul(&a[54], &twiddlesCoset[2]) + a[55].Mul(&a[55], &twiddlesCoset[2]) + a[56].Mul(&a[56], &twiddlesCoset[2]) + a[57].Mul(&a[57], &twiddlesCoset[2]) + a[58].Mul(&a[58], &twiddlesCoset[2]) + a[59].Mul(&a[59], &twiddlesCoset[2]) + a[60].Mul(&a[60], &twiddlesCoset[2]) + a[61].Mul(&a[61], &twiddlesCoset[2]) + a[62].Mul(&a[62], &twiddlesCoset[2]) + a[63].Mul(&a[63], &twiddlesCoset[2]) + babybear.Butterfly(&a[0], &a[16]) + babybear.Butterfly(&a[1], &a[17]) + babybear.Butterfly(&a[2], &a[18]) + babybear.Butterfly(&a[3], &a[19]) + babybear.Butterfly(&a[4], &a[20]) + babybear.Butterfly(&a[5], &a[21]) + babybear.Butterfly(&a[6], &a[22]) + babybear.Butterfly(&a[7], &a[23]) + babybear.Butterfly(&a[8], &a[24]) + babybear.Butterfly(&a[9], &a[25]) + babybear.Butterfly(&a[10], &a[26]) + babybear.Butterfly(&a[11], &a[27]) + babybear.Butterfly(&a[12], &a[28]) + babybear.Butterfly(&a[13], &a[29]) + babybear.Butterfly(&a[14], &a[30]) + babybear.Butterfly(&a[15], &a[31]) + babybear.Butterfly(&a[32], &a[48]) + babybear.Butterfly(&a[33], &a[49]) + babybear.Butterfly(&a[34], &a[50]) + babybear.Butterfly(&a[35], &a[51]) + babybear.Butterfly(&a[36], &a[52]) + babybear.Butterfly(&a[37], &a[53]) + babybear.Butterfly(&a[38], &a[54]) + babybear.Butterfly(&a[39], &a[55]) + babybear.Butterfly(&a[40], &a[56]) + babybear.Butterfly(&a[41], &a[57]) + babybear.Butterfly(&a[42], &a[58]) + babybear.Butterfly(&a[43], &a[59]) + babybear.Butterfly(&a[44], &a[60]) + babybear.Butterfly(&a[45], &a[61]) + babybear.Butterfly(&a[46], &a[62]) + babybear.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddlesCoset[3]) + a[9].Mul(&a[9], &twiddlesCoset[3]) + a[10].Mul(&a[10], &twiddlesCoset[3]) + a[11].Mul(&a[11], &twiddlesCoset[3]) + a[12].Mul(&a[12], &twiddlesCoset[3]) + a[13].Mul(&a[13], &twiddlesCoset[3]) + a[14].Mul(&a[14], &twiddlesCoset[3]) + a[15].Mul(&a[15], &twiddlesCoset[3]) + a[24].Mul(&a[24], &twiddlesCoset[4]) + a[25].Mul(&a[25], &twiddlesCoset[4]) + a[26].Mul(&a[26], &twiddlesCoset[4]) + a[27].Mul(&a[27], &twiddlesCoset[4]) + a[28].Mul(&a[28], &twiddlesCoset[4]) + a[29].Mul(&a[29], &twiddlesCoset[4]) + a[30].Mul(&a[30], &twiddlesCoset[4]) + a[31].Mul(&a[31], &twiddlesCoset[4]) + a[40].Mul(&a[40], &twiddlesCoset[5]) + a[41].Mul(&a[41], &twiddlesCoset[5]) + a[42].Mul(&a[42], &twiddlesCoset[5]) + a[43].Mul(&a[43], &twiddlesCoset[5]) + a[44].Mul(&a[44], &twiddlesCoset[5]) + a[45].Mul(&a[45], &twiddlesCoset[5]) + a[46].Mul(&a[46], &twiddlesCoset[5]) + a[47].Mul(&a[47], &twiddlesCoset[5]) + a[56].Mul(&a[56], &twiddlesCoset[6]) + a[57].Mul(&a[57], &twiddlesCoset[6]) + a[58].Mul(&a[58], &twiddlesCoset[6]) + a[59].Mul(&a[59], &twiddlesCoset[6]) + a[60].Mul(&a[60], &twiddlesCoset[6]) + a[61].Mul(&a[61], &twiddlesCoset[6]) + a[62].Mul(&a[62], &twiddlesCoset[6]) + a[63].Mul(&a[63], &twiddlesCoset[6]) + babybear.Butterfly(&a[0], &a[8]) + babybear.Butterfly(&a[1], &a[9]) + babybear.Butterfly(&a[2], &a[10]) + babybear.Butterfly(&a[3], &a[11]) + babybear.Butterfly(&a[4], &a[12]) + babybear.Butterfly(&a[5], &a[13]) + babybear.Butterfly(&a[6], &a[14]) + babybear.Butterfly(&a[7], &a[15]) + babybear.Butterfly(&a[16], &a[24]) + babybear.Butterfly(&a[17], &a[25]) + babybear.Butterfly(&a[18], &a[26]) + babybear.Butterfly(&a[19], &a[27]) + babybear.Butterfly(&a[20], &a[28]) + babybear.Butterfly(&a[21], &a[29]) + babybear.Butterfly(&a[22], &a[30]) + babybear.Butterfly(&a[23], &a[31]) + babybear.Butterfly(&a[32], &a[40]) + babybear.Butterfly(&a[33], &a[41]) + babybear.Butterfly(&a[34], &a[42]) + babybear.Butterfly(&a[35], &a[43]) + babybear.Butterfly(&a[36], &a[44]) + babybear.Butterfly(&a[37], &a[45]) + babybear.Butterfly(&a[38], &a[46]) + babybear.Butterfly(&a[39], &a[47]) + babybear.Butterfly(&a[48], &a[56]) + babybear.Butterfly(&a[49], &a[57]) + babybear.Butterfly(&a[50], &a[58]) + babybear.Butterfly(&a[51], &a[59]) + babybear.Butterfly(&a[52], &a[60]) + babybear.Butterfly(&a[53], &a[61]) + babybear.Butterfly(&a[54], &a[62]) + babybear.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddlesCoset[7]) + a[5].Mul(&a[5], &twiddlesCoset[7]) + a[6].Mul(&a[6], &twiddlesCoset[7]) + a[7].Mul(&a[7], &twiddlesCoset[7]) + a[12].Mul(&a[12], &twiddlesCoset[8]) + a[13].Mul(&a[13], &twiddlesCoset[8]) + a[14].Mul(&a[14], &twiddlesCoset[8]) + a[15].Mul(&a[15], &twiddlesCoset[8]) + a[20].Mul(&a[20], &twiddlesCoset[9]) + a[21].Mul(&a[21], &twiddlesCoset[9]) + a[22].Mul(&a[22], &twiddlesCoset[9]) + a[23].Mul(&a[23], &twiddlesCoset[9]) + a[28].Mul(&a[28], &twiddlesCoset[10]) + a[29].Mul(&a[29], &twiddlesCoset[10]) + a[30].Mul(&a[30], &twiddlesCoset[10]) + a[31].Mul(&a[31], &twiddlesCoset[10]) + a[36].Mul(&a[36], &twiddlesCoset[11]) + a[37].Mul(&a[37], &twiddlesCoset[11]) + a[38].Mul(&a[38], &twiddlesCoset[11]) + a[39].Mul(&a[39], &twiddlesCoset[11]) + a[44].Mul(&a[44], &twiddlesCoset[12]) + a[45].Mul(&a[45], &twiddlesCoset[12]) + a[46].Mul(&a[46], &twiddlesCoset[12]) + a[47].Mul(&a[47], &twiddlesCoset[12]) + a[52].Mul(&a[52], &twiddlesCoset[13]) + a[53].Mul(&a[53], &twiddlesCoset[13]) + a[54].Mul(&a[54], &twiddlesCoset[13]) + a[55].Mul(&a[55], &twiddlesCoset[13]) + a[60].Mul(&a[60], &twiddlesCoset[14]) + a[61].Mul(&a[61], &twiddlesCoset[14]) + a[62].Mul(&a[62], &twiddlesCoset[14]) + a[63].Mul(&a[63], &twiddlesCoset[14]) + babybear.Butterfly(&a[0], &a[4]) + babybear.Butterfly(&a[1], &a[5]) + babybear.Butterfly(&a[2], &a[6]) + babybear.Butterfly(&a[3], &a[7]) + babybear.Butterfly(&a[8], &a[12]) + babybear.Butterfly(&a[9], &a[13]) + babybear.Butterfly(&a[10], &a[14]) + babybear.Butterfly(&a[11], &a[15]) + babybear.Butterfly(&a[16], &a[20]) + babybear.Butterfly(&a[17], &a[21]) + babybear.Butterfly(&a[18], &a[22]) + babybear.Butterfly(&a[19], &a[23]) + babybear.Butterfly(&a[24], &a[28]) + babybear.Butterfly(&a[25], &a[29]) + babybear.Butterfly(&a[26], &a[30]) + babybear.Butterfly(&a[27], &a[31]) + babybear.Butterfly(&a[32], &a[36]) + babybear.Butterfly(&a[33], &a[37]) + babybear.Butterfly(&a[34], &a[38]) + babybear.Butterfly(&a[35], &a[39]) + babybear.Butterfly(&a[40], &a[44]) + babybear.Butterfly(&a[41], &a[45]) + babybear.Butterfly(&a[42], &a[46]) + babybear.Butterfly(&a[43], &a[47]) + babybear.Butterfly(&a[48], &a[52]) + babybear.Butterfly(&a[49], &a[53]) + babybear.Butterfly(&a[50], &a[54]) + babybear.Butterfly(&a[51], &a[55]) + babybear.Butterfly(&a[56], &a[60]) + babybear.Butterfly(&a[57], &a[61]) + babybear.Butterfly(&a[58], &a[62]) + babybear.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddlesCoset[15]) + a[3].Mul(&a[3], &twiddlesCoset[15]) + a[6].Mul(&a[6], &twiddlesCoset[16]) + a[7].Mul(&a[7], &twiddlesCoset[16]) + a[10].Mul(&a[10], &twiddlesCoset[17]) + a[11].Mul(&a[11], &twiddlesCoset[17]) + a[14].Mul(&a[14], &twiddlesCoset[18]) + a[15].Mul(&a[15], &twiddlesCoset[18]) + a[18].Mul(&a[18], &twiddlesCoset[19]) + a[19].Mul(&a[19], &twiddlesCoset[19]) + a[22].Mul(&a[22], &twiddlesCoset[20]) + a[23].Mul(&a[23], &twiddlesCoset[20]) + a[26].Mul(&a[26], &twiddlesCoset[21]) + a[27].Mul(&a[27], &twiddlesCoset[21]) + a[30].Mul(&a[30], &twiddlesCoset[22]) + a[31].Mul(&a[31], &twiddlesCoset[22]) + a[34].Mul(&a[34], &twiddlesCoset[23]) + a[35].Mul(&a[35], &twiddlesCoset[23]) + a[38].Mul(&a[38], &twiddlesCoset[24]) + a[39].Mul(&a[39], &twiddlesCoset[24]) + a[42].Mul(&a[42], &twiddlesCoset[25]) + a[43].Mul(&a[43], &twiddlesCoset[25]) + a[46].Mul(&a[46], &twiddlesCoset[26]) + a[47].Mul(&a[47], &twiddlesCoset[26]) + a[50].Mul(&a[50], &twiddlesCoset[27]) + a[51].Mul(&a[51], &twiddlesCoset[27]) + a[54].Mul(&a[54], &twiddlesCoset[28]) + a[55].Mul(&a[55], &twiddlesCoset[28]) + a[58].Mul(&a[58], &twiddlesCoset[29]) + a[59].Mul(&a[59], &twiddlesCoset[29]) + a[62].Mul(&a[62], &twiddlesCoset[30]) + a[63].Mul(&a[63], &twiddlesCoset[30]) + babybear.Butterfly(&a[0], &a[2]) + babybear.Butterfly(&a[1], &a[3]) + babybear.Butterfly(&a[4], &a[6]) + babybear.Butterfly(&a[5], &a[7]) + babybear.Butterfly(&a[8], &a[10]) + babybear.Butterfly(&a[9], &a[11]) + babybear.Butterfly(&a[12], &a[14]) + babybear.Butterfly(&a[13], &a[15]) + babybear.Butterfly(&a[16], &a[18]) + babybear.Butterfly(&a[17], &a[19]) + babybear.Butterfly(&a[20], &a[22]) + babybear.Butterfly(&a[21], &a[23]) + babybear.Butterfly(&a[24], &a[26]) + babybear.Butterfly(&a[25], &a[27]) + babybear.Butterfly(&a[28], &a[30]) + babybear.Butterfly(&a[29], &a[31]) + babybear.Butterfly(&a[32], &a[34]) + babybear.Butterfly(&a[33], &a[35]) + babybear.Butterfly(&a[36], &a[38]) + babybear.Butterfly(&a[37], &a[39]) + babybear.Butterfly(&a[40], &a[42]) + babybear.Butterfly(&a[41], &a[43]) + babybear.Butterfly(&a[44], &a[46]) + babybear.Butterfly(&a[45], &a[47]) + babybear.Butterfly(&a[48], &a[50]) + babybear.Butterfly(&a[49], &a[51]) + babybear.Butterfly(&a[52], &a[54]) + babybear.Butterfly(&a[53], &a[55]) + babybear.Butterfly(&a[56], &a[58]) + babybear.Butterfly(&a[57], &a[59]) + babybear.Butterfly(&a[60], &a[62]) + babybear.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddlesCoset[31]) + a[3].Mul(&a[3], &twiddlesCoset[32]) + a[5].Mul(&a[5], &twiddlesCoset[33]) + a[7].Mul(&a[7], &twiddlesCoset[34]) + a[9].Mul(&a[9], &twiddlesCoset[35]) + a[11].Mul(&a[11], &twiddlesCoset[36]) + a[13].Mul(&a[13], &twiddlesCoset[37]) + a[15].Mul(&a[15], &twiddlesCoset[38]) + a[17].Mul(&a[17], &twiddlesCoset[39]) + a[19].Mul(&a[19], &twiddlesCoset[40]) + a[21].Mul(&a[21], &twiddlesCoset[41]) + a[23].Mul(&a[23], &twiddlesCoset[42]) + a[25].Mul(&a[25], &twiddlesCoset[43]) + a[27].Mul(&a[27], &twiddlesCoset[44]) + a[29].Mul(&a[29], &twiddlesCoset[45]) + a[31].Mul(&a[31], &twiddlesCoset[46]) + a[33].Mul(&a[33], &twiddlesCoset[47]) + a[35].Mul(&a[35], &twiddlesCoset[48]) + a[37].Mul(&a[37], &twiddlesCoset[49]) + a[39].Mul(&a[39], &twiddlesCoset[50]) + a[41].Mul(&a[41], &twiddlesCoset[51]) + a[43].Mul(&a[43], &twiddlesCoset[52]) + a[45].Mul(&a[45], &twiddlesCoset[53]) + a[47].Mul(&a[47], &twiddlesCoset[54]) + a[49].Mul(&a[49], &twiddlesCoset[55]) + a[51].Mul(&a[51], &twiddlesCoset[56]) + a[53].Mul(&a[53], &twiddlesCoset[57]) + a[55].Mul(&a[55], &twiddlesCoset[58]) + a[57].Mul(&a[57], &twiddlesCoset[59]) + a[59].Mul(&a[59], &twiddlesCoset[60]) + a[61].Mul(&a[61], &twiddlesCoset[61]) + a[63].Mul(&a[63], &twiddlesCoset[62]) + babybear.Butterfly(&a[0], &a[1]) + babybear.Butterfly(&a[2], &a[3]) + babybear.Butterfly(&a[4], &a[5]) + babybear.Butterfly(&a[6], &a[7]) + babybear.Butterfly(&a[8], &a[9]) + babybear.Butterfly(&a[10], &a[11]) + babybear.Butterfly(&a[12], &a[13]) + babybear.Butterfly(&a[14], &a[15]) + babybear.Butterfly(&a[16], &a[17]) + babybear.Butterfly(&a[18], &a[19]) + babybear.Butterfly(&a[20], &a[21]) + babybear.Butterfly(&a[22], &a[23]) + babybear.Butterfly(&a[24], &a[25]) + babybear.Butterfly(&a[26], &a[27]) + babybear.Butterfly(&a[28], &a[29]) + babybear.Butterfly(&a[30], &a[31]) + babybear.Butterfly(&a[32], &a[33]) + babybear.Butterfly(&a[34], &a[35]) + babybear.Butterfly(&a[36], &a[37]) + babybear.Butterfly(&a[38], &a[39]) + babybear.Butterfly(&a[40], &a[41]) + babybear.Butterfly(&a[42], &a[43]) + babybear.Butterfly(&a[44], &a[45]) + babybear.Butterfly(&a[46], &a[47]) + babybear.Butterfly(&a[48], &a[49]) + babybear.Butterfly(&a[50], &a[51]) + babybear.Butterfly(&a[52], &a[53]) + babybear.Butterfly(&a[54], &a[55]) + babybear.Butterfly(&a[56], &a[57]) + babybear.Butterfly(&a[58], &a[59]) + babybear.Butterfly(&a[60], &a[61]) + babybear.Butterfly(&a[62], &a[63]) +} + +// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func PrecomputeTwiddlesCoset(generator, shifter babybear.Element) []babybear.Element { + toReturn := make([]babybear.Element, 63) + var r, s babybear.Element + e := new(big.Int) + + s = shifter + for k := 0; k < 5; k++ { + s.Square(&s) + } + toReturn[0] = s + s = shifter + for k := 0; k < 4; k++ { + s.Square(&s) + } + toReturn[1] = s + r.Exp(generator, e.SetUint64(uint64(1<<4*1))) + toReturn[2].Mul(&r, &s) + s = shifter + for k := 0; k < 3; k++ { + s.Square(&s) + } + toReturn[3] = s + r.Exp(generator, e.SetUint64(uint64(1<<3*2))) + toReturn[4].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*1))) + toReturn[5].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*3))) + toReturn[6].Mul(&r, &s) + s = shifter + for k := 0; k < 2; k++ { + s.Square(&s) + } + toReturn[7] = s + r.Exp(generator, e.SetUint64(uint64(1<<2*4))) + toReturn[8].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*2))) + toReturn[9].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*6))) + toReturn[10].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*1))) + toReturn[11].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*5))) + toReturn[12].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*3))) + toReturn[13].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*7))) + toReturn[14].Mul(&r, &s) + s = shifter + for k := 0; k < 1; k++ { + s.Square(&s) + } + toReturn[15] = s + r.Exp(generator, e.SetUint64(uint64(1<<1*8))) + toReturn[16].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*4))) + toReturn[17].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*12))) + toReturn[18].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*2))) + toReturn[19].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*10))) + toReturn[20].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*6))) + toReturn[21].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*14))) + toReturn[22].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*1))) + toReturn[23].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*9))) + toReturn[24].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*5))) + toReturn[25].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*13))) + toReturn[26].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*3))) + toReturn[27].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*11))) + toReturn[28].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*7))) + toReturn[29].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*15))) + toReturn[30].Mul(&r, &s) + s = shifter + for k := 0; k < 0; k++ { + s.Square(&s) + } + toReturn[31] = s + r.Exp(generator, e.SetUint64(uint64(1<<0*16))) + toReturn[32].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*8))) + toReturn[33].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*24))) + toReturn[34].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*4))) + toReturn[35].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*20))) + toReturn[36].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*12))) + toReturn[37].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*28))) + toReturn[38].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*2))) + toReturn[39].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*18))) + toReturn[40].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*10))) + toReturn[41].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*26))) + toReturn[42].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*6))) + toReturn[43].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*22))) + toReturn[44].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*14))) + toReturn[45].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*30))) + toReturn[46].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*1))) + toReturn[47].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*17))) + toReturn[48].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*9))) + toReturn[49].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*25))) + toReturn[50].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*5))) + toReturn[51].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*21))) + toReturn[52].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*13))) + toReturn[53].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*29))) + toReturn[54].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*3))) + toReturn[55].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*19))) + toReturn[56].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*11))) + toReturn[57].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*27))) + toReturn[58].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*7))) + toReturn[59].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*23))) + toReturn[60].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*15))) + toReturn[61].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*31))) + toReturn[62].Mul(&r, &s) + return toReturn +} diff --git a/field/generator/generator_sis.go b/field/generator/generator_sis.go index d4f251f712..f8f456b29c 100644 --- a/field/generator/generator_sis.go +++ b/field/generator/generator_sis.go @@ -1,7 +1,6 @@ package generator import ( - "os" "path/filepath" "github.com/consensys/bavard" @@ -18,12 +17,11 @@ func generateSIS(F *config.Field, outputDir string) error { outputDir = filepath.Join(outputDir, "sis") entries := []bavard.Entry{ + {File: filepath.Join(outputDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}, {File: filepath.Join(outputDir, "sis.go"), Templates: []string{"sis.go.tmpl"}}, {File: filepath.Join(outputDir, "sis_test.go"), Templates: []string{"sis.test.go.tmpl"}}, } - os.Remove(filepath.Join(outputDir, "sis_fft.go")) - funcs := make(map[string]interface{}) funcs["bitReverse"] = bitReverse diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index a20785ba28..d777b2164b 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,8 +11,6 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} -{{ $sizeKernel2Log2 := 5}} -{{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} // Decimation is used in the FFT call to select decimation in time or in frequency type Decimation uint8 @@ -29,7 +27,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []{{ .FF }}.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -202,15 +201,9 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == {{$sizeKernel}} { - kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return - } else if n == {{$sizeKernel2}} { - kerDIFNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { + kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -294,15 +287,9 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == {{$sizeKernel2}} { - kerDITNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) - return - } else if n == {{$sizeKernel}} { - kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { + kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -379,7 +366,6 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta } {{genKernel $.FF $sizeKernel $sizeKernelLog2}} -{{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} {{define "genKernel FF sizeKernel sizeKernelLog2"}} @@ -439,6 +425,3 @@ func kerDITNP_{{.sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Elem } {{end}} - - - diff --git a/field/generator/internal/templates/sis/fft.go.tmpl b/field/generator/internal/templates/sis/fft.go.tmpl new file mode 100644 index 0000000000..777eb8610a --- /dev/null +++ b/field/generator/internal/templates/sis/fft.go.tmpl @@ -0,0 +1,82 @@ +import ( + "{{ .FieldPackagePath }}" + "math/big" +) + +// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset +func FFT64(a []{{ .FF }}.Element, twiddlesCoset []{{ .FF }}.Element) { + + {{- /* notes: + this function can be updated with larger n + nbSteps must be updated too such as 1 << nbSteps == n + butterflies and multiplication are separated for size n = 8, must check perf for larger n + */}} + {{$tIndex := 0}} + {{ $n := 64}} + {{ $m := div $n 2}} + {{ $split := 1}} + {{ $split = div $split 1}} + {{- range $step := reverse (iterate 0 6)}} + + {{- $offset := 0}} + {{- range $s := iterate 0 $split}} + {{- range $i := iterate 0 $m}} + {{- $j := add $i $offset}} + {{- $k := add $j $m}} + a[{{$k}}].Mul(&a[{{$k}}], &twiddlesCoset[{{$tIndex}}]) + {{- end}} + {{- $offset = add $offset $n}} + {{- $tIndex = add $tIndex 1}} + {{- end}} + + {{- $offset := 0}} + {{- range $s := iterate 0 $split}} + {{- range $i := iterate 0 $m}} + {{- $j := add $i $offset}} + {{- $k := add $j $m}} + {{ $.FF }}.Butterfly(&a[{{$j}}], &a[{{$k}}]) + {{- end}} + {{- $offset = add $offset $n}} + {{- end}} + + {{- $n = div $n 2}} + {{- $m = div $n 2}} + {{- $split = mul $split 2}} + {{- end}} +} + +// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func PrecomputeTwiddlesCoset(generator, shifter {{ .FF }}.Element) []{{ .FF }}.Element { + toReturn := make([]{{ .FF }}.Element, 63) + var r, s {{ .FF }}.Element + e := new(big.Int) + {{ $n := 64}} + {{ $m := div $n 2}} + {{ $split := 1}} + {{ $split = div $split 1}} + {{ $j := 0}} + {{- range $step := reverse (iterate 0 6)}} + s = shifter + for k:=0; k <{{$step}};k++ { + s.Square(&s) + } + + {{- $offset := 0}} + {{- range $s := iterate 0 $split}} + {{- $exp := bitReverse $split $s}} + {{- if eq $exp 0}} + toReturn[{{$j}}] = s + {{- else}} + r.Exp(generator, e.SetUint64(uint64(1<<{{$step}} * {{$exp}}))) + toReturn[{{$j}}].Mul(&r, &s) + {{- end}} + {{- $j = add $j 1}} + {{- end}} + + {{- $split = mul $split 2}} + {{- end}} + return toReturn +} \ No newline at end of file diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 0a2f3c7300..cb235cc9ed 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -35,8 +35,11 @@ type RSis struct { // domain for the polynomial multiplication Domain *fft.Domain - + maxNbElementsToHash int + + smallFFT func([]{{ .FF }}.Element) + twiddlesCoset []{{ .FF }}.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -97,6 +100,20 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } + r.smallFFT = func(p []{{ .FF }}.Element) { + r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + + // if we have a FFT kernel of the size of the domain cardinality, we use it. + if r.Domain.Cardinality == 64 { + r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(a []{{ .FF }}.Element) { + FFT64(a, r.twiddlesCoset) + } + } + + + // filling A a := make([]{{ .FF }}.Element, n*r.Degree) ag := make([]{{ .FF }}.Element, n*r.Degree) @@ -171,7 +188,16 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { return } - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // for perf, we use directly what's exposed; + r.smallFFT(k) + // k.Mul(k, fr.Vector(r.cosetTable)) + // if r.Domain.KernelDIF != nil { + // r.Domain.KernelDIF(k) + // } else { + // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) + // } + mulModAcc(res, r.Ag[polId], k) } diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 20c5e57c55..84ca5af4f0 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []goldilocks.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index c5d703c1f0..51b8256ea0 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -37,6 +37,9 @@ type RSis struct { Domain *fft.Domain maxNbElementsToHash int + + smallFFT func([]goldilocks.Element) + twiddlesCoset []goldilocks.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -97,6 +100,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } + r.smallFFT = func(p []goldilocks.Element) { + r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + + // if we have a FFT kernel of the size of the domain cardinality, we use it. + if r.Domain.Cardinality == 64 { + r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(a []goldilocks.Element) { + FFT64(a, r.twiddlesCoset) + } + } + // filling A a := make([]goldilocks.Element, n*r.Degree) ag := make([]goldilocks.Element, n*r.Degree) @@ -171,7 +186,16 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) return } - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // for perf, we use directly what's exposed; + r.smallFFT(k) + // k.Mul(k, fr.Vector(r.cosetTable)) + // if r.Domain.KernelDIF != nil { + // r.Domain.KernelDIF(k) + // } else { + // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) + // } + mulModAcc(res, r.Ag[polId], k) } diff --git a/field/goldilocks/sis/sis_fft.go b/field/goldilocks/sis/sis_fft.go new file mode 100644 index 0000000000..321a84e905 --- /dev/null +++ b/field/goldilocks/sis/sis_fft.go @@ -0,0 +1,556 @@ +// Copyright 2020-2025 Consensys Software Inc. +// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. + +// Code generated by consensys/gnark-crypto DO NOT EDIT + +package sis + +import ( + "github.com/consensys/gnark-crypto/field/goldilocks" + "math/big" +) + +// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset +func FFT64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { + + a[32].Mul(&a[32], &twiddlesCoset[0]) + a[33].Mul(&a[33], &twiddlesCoset[0]) + a[34].Mul(&a[34], &twiddlesCoset[0]) + a[35].Mul(&a[35], &twiddlesCoset[0]) + a[36].Mul(&a[36], &twiddlesCoset[0]) + a[37].Mul(&a[37], &twiddlesCoset[0]) + a[38].Mul(&a[38], &twiddlesCoset[0]) + a[39].Mul(&a[39], &twiddlesCoset[0]) + a[40].Mul(&a[40], &twiddlesCoset[0]) + a[41].Mul(&a[41], &twiddlesCoset[0]) + a[42].Mul(&a[42], &twiddlesCoset[0]) + a[43].Mul(&a[43], &twiddlesCoset[0]) + a[44].Mul(&a[44], &twiddlesCoset[0]) + a[45].Mul(&a[45], &twiddlesCoset[0]) + a[46].Mul(&a[46], &twiddlesCoset[0]) + a[47].Mul(&a[47], &twiddlesCoset[0]) + a[48].Mul(&a[48], &twiddlesCoset[0]) + a[49].Mul(&a[49], &twiddlesCoset[0]) + a[50].Mul(&a[50], &twiddlesCoset[0]) + a[51].Mul(&a[51], &twiddlesCoset[0]) + a[52].Mul(&a[52], &twiddlesCoset[0]) + a[53].Mul(&a[53], &twiddlesCoset[0]) + a[54].Mul(&a[54], &twiddlesCoset[0]) + a[55].Mul(&a[55], &twiddlesCoset[0]) + a[56].Mul(&a[56], &twiddlesCoset[0]) + a[57].Mul(&a[57], &twiddlesCoset[0]) + a[58].Mul(&a[58], &twiddlesCoset[0]) + a[59].Mul(&a[59], &twiddlesCoset[0]) + a[60].Mul(&a[60], &twiddlesCoset[0]) + a[61].Mul(&a[61], &twiddlesCoset[0]) + a[62].Mul(&a[62], &twiddlesCoset[0]) + a[63].Mul(&a[63], &twiddlesCoset[0]) + goldilocks.Butterfly(&a[0], &a[32]) + goldilocks.Butterfly(&a[1], &a[33]) + goldilocks.Butterfly(&a[2], &a[34]) + goldilocks.Butterfly(&a[3], &a[35]) + goldilocks.Butterfly(&a[4], &a[36]) + goldilocks.Butterfly(&a[5], &a[37]) + goldilocks.Butterfly(&a[6], &a[38]) + goldilocks.Butterfly(&a[7], &a[39]) + goldilocks.Butterfly(&a[8], &a[40]) + goldilocks.Butterfly(&a[9], &a[41]) + goldilocks.Butterfly(&a[10], &a[42]) + goldilocks.Butterfly(&a[11], &a[43]) + goldilocks.Butterfly(&a[12], &a[44]) + goldilocks.Butterfly(&a[13], &a[45]) + goldilocks.Butterfly(&a[14], &a[46]) + goldilocks.Butterfly(&a[15], &a[47]) + goldilocks.Butterfly(&a[16], &a[48]) + goldilocks.Butterfly(&a[17], &a[49]) + goldilocks.Butterfly(&a[18], &a[50]) + goldilocks.Butterfly(&a[19], &a[51]) + goldilocks.Butterfly(&a[20], &a[52]) + goldilocks.Butterfly(&a[21], &a[53]) + goldilocks.Butterfly(&a[22], &a[54]) + goldilocks.Butterfly(&a[23], &a[55]) + goldilocks.Butterfly(&a[24], &a[56]) + goldilocks.Butterfly(&a[25], &a[57]) + goldilocks.Butterfly(&a[26], &a[58]) + goldilocks.Butterfly(&a[27], &a[59]) + goldilocks.Butterfly(&a[28], &a[60]) + goldilocks.Butterfly(&a[29], &a[61]) + goldilocks.Butterfly(&a[30], &a[62]) + goldilocks.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddlesCoset[1]) + a[17].Mul(&a[17], &twiddlesCoset[1]) + a[18].Mul(&a[18], &twiddlesCoset[1]) + a[19].Mul(&a[19], &twiddlesCoset[1]) + a[20].Mul(&a[20], &twiddlesCoset[1]) + a[21].Mul(&a[21], &twiddlesCoset[1]) + a[22].Mul(&a[22], &twiddlesCoset[1]) + a[23].Mul(&a[23], &twiddlesCoset[1]) + a[24].Mul(&a[24], &twiddlesCoset[1]) + a[25].Mul(&a[25], &twiddlesCoset[1]) + a[26].Mul(&a[26], &twiddlesCoset[1]) + a[27].Mul(&a[27], &twiddlesCoset[1]) + a[28].Mul(&a[28], &twiddlesCoset[1]) + a[29].Mul(&a[29], &twiddlesCoset[1]) + a[30].Mul(&a[30], &twiddlesCoset[1]) + a[31].Mul(&a[31], &twiddlesCoset[1]) + a[48].Mul(&a[48], &twiddlesCoset[2]) + a[49].Mul(&a[49], &twiddlesCoset[2]) + a[50].Mul(&a[50], &twiddlesCoset[2]) + a[51].Mul(&a[51], &twiddlesCoset[2]) + a[52].Mul(&a[52], &twiddlesCoset[2]) + a[53].Mul(&a[53], &twiddlesCoset[2]) + a[54].Mul(&a[54], &twiddlesCoset[2]) + a[55].Mul(&a[55], &twiddlesCoset[2]) + a[56].Mul(&a[56], &twiddlesCoset[2]) + a[57].Mul(&a[57], &twiddlesCoset[2]) + a[58].Mul(&a[58], &twiddlesCoset[2]) + a[59].Mul(&a[59], &twiddlesCoset[2]) + a[60].Mul(&a[60], &twiddlesCoset[2]) + a[61].Mul(&a[61], &twiddlesCoset[2]) + a[62].Mul(&a[62], &twiddlesCoset[2]) + a[63].Mul(&a[63], &twiddlesCoset[2]) + goldilocks.Butterfly(&a[0], &a[16]) + goldilocks.Butterfly(&a[1], &a[17]) + goldilocks.Butterfly(&a[2], &a[18]) + goldilocks.Butterfly(&a[3], &a[19]) + goldilocks.Butterfly(&a[4], &a[20]) + goldilocks.Butterfly(&a[5], &a[21]) + goldilocks.Butterfly(&a[6], &a[22]) + goldilocks.Butterfly(&a[7], &a[23]) + goldilocks.Butterfly(&a[8], &a[24]) + goldilocks.Butterfly(&a[9], &a[25]) + goldilocks.Butterfly(&a[10], &a[26]) + goldilocks.Butterfly(&a[11], &a[27]) + goldilocks.Butterfly(&a[12], &a[28]) + goldilocks.Butterfly(&a[13], &a[29]) + goldilocks.Butterfly(&a[14], &a[30]) + goldilocks.Butterfly(&a[15], &a[31]) + goldilocks.Butterfly(&a[32], &a[48]) + goldilocks.Butterfly(&a[33], &a[49]) + goldilocks.Butterfly(&a[34], &a[50]) + goldilocks.Butterfly(&a[35], &a[51]) + goldilocks.Butterfly(&a[36], &a[52]) + goldilocks.Butterfly(&a[37], &a[53]) + goldilocks.Butterfly(&a[38], &a[54]) + goldilocks.Butterfly(&a[39], &a[55]) + goldilocks.Butterfly(&a[40], &a[56]) + goldilocks.Butterfly(&a[41], &a[57]) + goldilocks.Butterfly(&a[42], &a[58]) + goldilocks.Butterfly(&a[43], &a[59]) + goldilocks.Butterfly(&a[44], &a[60]) + goldilocks.Butterfly(&a[45], &a[61]) + goldilocks.Butterfly(&a[46], &a[62]) + goldilocks.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddlesCoset[3]) + a[9].Mul(&a[9], &twiddlesCoset[3]) + a[10].Mul(&a[10], &twiddlesCoset[3]) + a[11].Mul(&a[11], &twiddlesCoset[3]) + a[12].Mul(&a[12], &twiddlesCoset[3]) + a[13].Mul(&a[13], &twiddlesCoset[3]) + a[14].Mul(&a[14], &twiddlesCoset[3]) + a[15].Mul(&a[15], &twiddlesCoset[3]) + a[24].Mul(&a[24], &twiddlesCoset[4]) + a[25].Mul(&a[25], &twiddlesCoset[4]) + a[26].Mul(&a[26], &twiddlesCoset[4]) + a[27].Mul(&a[27], &twiddlesCoset[4]) + a[28].Mul(&a[28], &twiddlesCoset[4]) + a[29].Mul(&a[29], &twiddlesCoset[4]) + a[30].Mul(&a[30], &twiddlesCoset[4]) + a[31].Mul(&a[31], &twiddlesCoset[4]) + a[40].Mul(&a[40], &twiddlesCoset[5]) + a[41].Mul(&a[41], &twiddlesCoset[5]) + a[42].Mul(&a[42], &twiddlesCoset[5]) + a[43].Mul(&a[43], &twiddlesCoset[5]) + a[44].Mul(&a[44], &twiddlesCoset[5]) + a[45].Mul(&a[45], &twiddlesCoset[5]) + a[46].Mul(&a[46], &twiddlesCoset[5]) + a[47].Mul(&a[47], &twiddlesCoset[5]) + a[56].Mul(&a[56], &twiddlesCoset[6]) + a[57].Mul(&a[57], &twiddlesCoset[6]) + a[58].Mul(&a[58], &twiddlesCoset[6]) + a[59].Mul(&a[59], &twiddlesCoset[6]) + a[60].Mul(&a[60], &twiddlesCoset[6]) + a[61].Mul(&a[61], &twiddlesCoset[6]) + a[62].Mul(&a[62], &twiddlesCoset[6]) + a[63].Mul(&a[63], &twiddlesCoset[6]) + goldilocks.Butterfly(&a[0], &a[8]) + goldilocks.Butterfly(&a[1], &a[9]) + goldilocks.Butterfly(&a[2], &a[10]) + goldilocks.Butterfly(&a[3], &a[11]) + goldilocks.Butterfly(&a[4], &a[12]) + goldilocks.Butterfly(&a[5], &a[13]) + goldilocks.Butterfly(&a[6], &a[14]) + goldilocks.Butterfly(&a[7], &a[15]) + goldilocks.Butterfly(&a[16], &a[24]) + goldilocks.Butterfly(&a[17], &a[25]) + goldilocks.Butterfly(&a[18], &a[26]) + goldilocks.Butterfly(&a[19], &a[27]) + goldilocks.Butterfly(&a[20], &a[28]) + goldilocks.Butterfly(&a[21], &a[29]) + goldilocks.Butterfly(&a[22], &a[30]) + goldilocks.Butterfly(&a[23], &a[31]) + goldilocks.Butterfly(&a[32], &a[40]) + goldilocks.Butterfly(&a[33], &a[41]) + goldilocks.Butterfly(&a[34], &a[42]) + goldilocks.Butterfly(&a[35], &a[43]) + goldilocks.Butterfly(&a[36], &a[44]) + goldilocks.Butterfly(&a[37], &a[45]) + goldilocks.Butterfly(&a[38], &a[46]) + goldilocks.Butterfly(&a[39], &a[47]) + goldilocks.Butterfly(&a[48], &a[56]) + goldilocks.Butterfly(&a[49], &a[57]) + goldilocks.Butterfly(&a[50], &a[58]) + goldilocks.Butterfly(&a[51], &a[59]) + goldilocks.Butterfly(&a[52], &a[60]) + goldilocks.Butterfly(&a[53], &a[61]) + goldilocks.Butterfly(&a[54], &a[62]) + goldilocks.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddlesCoset[7]) + a[5].Mul(&a[5], &twiddlesCoset[7]) + a[6].Mul(&a[6], &twiddlesCoset[7]) + a[7].Mul(&a[7], &twiddlesCoset[7]) + a[12].Mul(&a[12], &twiddlesCoset[8]) + a[13].Mul(&a[13], &twiddlesCoset[8]) + a[14].Mul(&a[14], &twiddlesCoset[8]) + a[15].Mul(&a[15], &twiddlesCoset[8]) + a[20].Mul(&a[20], &twiddlesCoset[9]) + a[21].Mul(&a[21], &twiddlesCoset[9]) + a[22].Mul(&a[22], &twiddlesCoset[9]) + a[23].Mul(&a[23], &twiddlesCoset[9]) + a[28].Mul(&a[28], &twiddlesCoset[10]) + a[29].Mul(&a[29], &twiddlesCoset[10]) + a[30].Mul(&a[30], &twiddlesCoset[10]) + a[31].Mul(&a[31], &twiddlesCoset[10]) + a[36].Mul(&a[36], &twiddlesCoset[11]) + a[37].Mul(&a[37], &twiddlesCoset[11]) + a[38].Mul(&a[38], &twiddlesCoset[11]) + a[39].Mul(&a[39], &twiddlesCoset[11]) + a[44].Mul(&a[44], &twiddlesCoset[12]) + a[45].Mul(&a[45], &twiddlesCoset[12]) + a[46].Mul(&a[46], &twiddlesCoset[12]) + a[47].Mul(&a[47], &twiddlesCoset[12]) + a[52].Mul(&a[52], &twiddlesCoset[13]) + a[53].Mul(&a[53], &twiddlesCoset[13]) + a[54].Mul(&a[54], &twiddlesCoset[13]) + a[55].Mul(&a[55], &twiddlesCoset[13]) + a[60].Mul(&a[60], &twiddlesCoset[14]) + a[61].Mul(&a[61], &twiddlesCoset[14]) + a[62].Mul(&a[62], &twiddlesCoset[14]) + a[63].Mul(&a[63], &twiddlesCoset[14]) + goldilocks.Butterfly(&a[0], &a[4]) + goldilocks.Butterfly(&a[1], &a[5]) + goldilocks.Butterfly(&a[2], &a[6]) + goldilocks.Butterfly(&a[3], &a[7]) + goldilocks.Butterfly(&a[8], &a[12]) + goldilocks.Butterfly(&a[9], &a[13]) + goldilocks.Butterfly(&a[10], &a[14]) + goldilocks.Butterfly(&a[11], &a[15]) + goldilocks.Butterfly(&a[16], &a[20]) + goldilocks.Butterfly(&a[17], &a[21]) + goldilocks.Butterfly(&a[18], &a[22]) + goldilocks.Butterfly(&a[19], &a[23]) + goldilocks.Butterfly(&a[24], &a[28]) + goldilocks.Butterfly(&a[25], &a[29]) + goldilocks.Butterfly(&a[26], &a[30]) + goldilocks.Butterfly(&a[27], &a[31]) + goldilocks.Butterfly(&a[32], &a[36]) + goldilocks.Butterfly(&a[33], &a[37]) + goldilocks.Butterfly(&a[34], &a[38]) + goldilocks.Butterfly(&a[35], &a[39]) + goldilocks.Butterfly(&a[40], &a[44]) + goldilocks.Butterfly(&a[41], &a[45]) + goldilocks.Butterfly(&a[42], &a[46]) + goldilocks.Butterfly(&a[43], &a[47]) + goldilocks.Butterfly(&a[48], &a[52]) + goldilocks.Butterfly(&a[49], &a[53]) + goldilocks.Butterfly(&a[50], &a[54]) + goldilocks.Butterfly(&a[51], &a[55]) + goldilocks.Butterfly(&a[56], &a[60]) + goldilocks.Butterfly(&a[57], &a[61]) + goldilocks.Butterfly(&a[58], &a[62]) + goldilocks.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddlesCoset[15]) + a[3].Mul(&a[3], &twiddlesCoset[15]) + a[6].Mul(&a[6], &twiddlesCoset[16]) + a[7].Mul(&a[7], &twiddlesCoset[16]) + a[10].Mul(&a[10], &twiddlesCoset[17]) + a[11].Mul(&a[11], &twiddlesCoset[17]) + a[14].Mul(&a[14], &twiddlesCoset[18]) + a[15].Mul(&a[15], &twiddlesCoset[18]) + a[18].Mul(&a[18], &twiddlesCoset[19]) + a[19].Mul(&a[19], &twiddlesCoset[19]) + a[22].Mul(&a[22], &twiddlesCoset[20]) + a[23].Mul(&a[23], &twiddlesCoset[20]) + a[26].Mul(&a[26], &twiddlesCoset[21]) + a[27].Mul(&a[27], &twiddlesCoset[21]) + a[30].Mul(&a[30], &twiddlesCoset[22]) + a[31].Mul(&a[31], &twiddlesCoset[22]) + a[34].Mul(&a[34], &twiddlesCoset[23]) + a[35].Mul(&a[35], &twiddlesCoset[23]) + a[38].Mul(&a[38], &twiddlesCoset[24]) + a[39].Mul(&a[39], &twiddlesCoset[24]) + a[42].Mul(&a[42], &twiddlesCoset[25]) + a[43].Mul(&a[43], &twiddlesCoset[25]) + a[46].Mul(&a[46], &twiddlesCoset[26]) + a[47].Mul(&a[47], &twiddlesCoset[26]) + a[50].Mul(&a[50], &twiddlesCoset[27]) + a[51].Mul(&a[51], &twiddlesCoset[27]) + a[54].Mul(&a[54], &twiddlesCoset[28]) + a[55].Mul(&a[55], &twiddlesCoset[28]) + a[58].Mul(&a[58], &twiddlesCoset[29]) + a[59].Mul(&a[59], &twiddlesCoset[29]) + a[62].Mul(&a[62], &twiddlesCoset[30]) + a[63].Mul(&a[63], &twiddlesCoset[30]) + goldilocks.Butterfly(&a[0], &a[2]) + goldilocks.Butterfly(&a[1], &a[3]) + goldilocks.Butterfly(&a[4], &a[6]) + goldilocks.Butterfly(&a[5], &a[7]) + goldilocks.Butterfly(&a[8], &a[10]) + goldilocks.Butterfly(&a[9], &a[11]) + goldilocks.Butterfly(&a[12], &a[14]) + goldilocks.Butterfly(&a[13], &a[15]) + goldilocks.Butterfly(&a[16], &a[18]) + goldilocks.Butterfly(&a[17], &a[19]) + goldilocks.Butterfly(&a[20], &a[22]) + goldilocks.Butterfly(&a[21], &a[23]) + goldilocks.Butterfly(&a[24], &a[26]) + goldilocks.Butterfly(&a[25], &a[27]) + goldilocks.Butterfly(&a[28], &a[30]) + goldilocks.Butterfly(&a[29], &a[31]) + goldilocks.Butterfly(&a[32], &a[34]) + goldilocks.Butterfly(&a[33], &a[35]) + goldilocks.Butterfly(&a[36], &a[38]) + goldilocks.Butterfly(&a[37], &a[39]) + goldilocks.Butterfly(&a[40], &a[42]) + goldilocks.Butterfly(&a[41], &a[43]) + goldilocks.Butterfly(&a[44], &a[46]) + goldilocks.Butterfly(&a[45], &a[47]) + goldilocks.Butterfly(&a[48], &a[50]) + goldilocks.Butterfly(&a[49], &a[51]) + goldilocks.Butterfly(&a[52], &a[54]) + goldilocks.Butterfly(&a[53], &a[55]) + goldilocks.Butterfly(&a[56], &a[58]) + goldilocks.Butterfly(&a[57], &a[59]) + goldilocks.Butterfly(&a[60], &a[62]) + goldilocks.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddlesCoset[31]) + a[3].Mul(&a[3], &twiddlesCoset[32]) + a[5].Mul(&a[5], &twiddlesCoset[33]) + a[7].Mul(&a[7], &twiddlesCoset[34]) + a[9].Mul(&a[9], &twiddlesCoset[35]) + a[11].Mul(&a[11], &twiddlesCoset[36]) + a[13].Mul(&a[13], &twiddlesCoset[37]) + a[15].Mul(&a[15], &twiddlesCoset[38]) + a[17].Mul(&a[17], &twiddlesCoset[39]) + a[19].Mul(&a[19], &twiddlesCoset[40]) + a[21].Mul(&a[21], &twiddlesCoset[41]) + a[23].Mul(&a[23], &twiddlesCoset[42]) + a[25].Mul(&a[25], &twiddlesCoset[43]) + a[27].Mul(&a[27], &twiddlesCoset[44]) + a[29].Mul(&a[29], &twiddlesCoset[45]) + a[31].Mul(&a[31], &twiddlesCoset[46]) + a[33].Mul(&a[33], &twiddlesCoset[47]) + a[35].Mul(&a[35], &twiddlesCoset[48]) + a[37].Mul(&a[37], &twiddlesCoset[49]) + a[39].Mul(&a[39], &twiddlesCoset[50]) + a[41].Mul(&a[41], &twiddlesCoset[51]) + a[43].Mul(&a[43], &twiddlesCoset[52]) + a[45].Mul(&a[45], &twiddlesCoset[53]) + a[47].Mul(&a[47], &twiddlesCoset[54]) + a[49].Mul(&a[49], &twiddlesCoset[55]) + a[51].Mul(&a[51], &twiddlesCoset[56]) + a[53].Mul(&a[53], &twiddlesCoset[57]) + a[55].Mul(&a[55], &twiddlesCoset[58]) + a[57].Mul(&a[57], &twiddlesCoset[59]) + a[59].Mul(&a[59], &twiddlesCoset[60]) + a[61].Mul(&a[61], &twiddlesCoset[61]) + a[63].Mul(&a[63], &twiddlesCoset[62]) + goldilocks.Butterfly(&a[0], &a[1]) + goldilocks.Butterfly(&a[2], &a[3]) + goldilocks.Butterfly(&a[4], &a[5]) + goldilocks.Butterfly(&a[6], &a[7]) + goldilocks.Butterfly(&a[8], &a[9]) + goldilocks.Butterfly(&a[10], &a[11]) + goldilocks.Butterfly(&a[12], &a[13]) + goldilocks.Butterfly(&a[14], &a[15]) + goldilocks.Butterfly(&a[16], &a[17]) + goldilocks.Butterfly(&a[18], &a[19]) + goldilocks.Butterfly(&a[20], &a[21]) + goldilocks.Butterfly(&a[22], &a[23]) + goldilocks.Butterfly(&a[24], &a[25]) + goldilocks.Butterfly(&a[26], &a[27]) + goldilocks.Butterfly(&a[28], &a[29]) + goldilocks.Butterfly(&a[30], &a[31]) + goldilocks.Butterfly(&a[32], &a[33]) + goldilocks.Butterfly(&a[34], &a[35]) + goldilocks.Butterfly(&a[36], &a[37]) + goldilocks.Butterfly(&a[38], &a[39]) + goldilocks.Butterfly(&a[40], &a[41]) + goldilocks.Butterfly(&a[42], &a[43]) + goldilocks.Butterfly(&a[44], &a[45]) + goldilocks.Butterfly(&a[46], &a[47]) + goldilocks.Butterfly(&a[48], &a[49]) + goldilocks.Butterfly(&a[50], &a[51]) + goldilocks.Butterfly(&a[52], &a[53]) + goldilocks.Butterfly(&a[54], &a[55]) + goldilocks.Butterfly(&a[56], &a[57]) + goldilocks.Butterfly(&a[58], &a[59]) + goldilocks.Butterfly(&a[60], &a[61]) + goldilocks.Butterfly(&a[62], &a[63]) +} + +// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func PrecomputeTwiddlesCoset(generator, shifter goldilocks.Element) []goldilocks.Element { + toReturn := make([]goldilocks.Element, 63) + var r, s goldilocks.Element + e := new(big.Int) + + s = shifter + for k := 0; k < 5; k++ { + s.Square(&s) + } + toReturn[0] = s + s = shifter + for k := 0; k < 4; k++ { + s.Square(&s) + } + toReturn[1] = s + r.Exp(generator, e.SetUint64(uint64(1<<4*1))) + toReturn[2].Mul(&r, &s) + s = shifter + for k := 0; k < 3; k++ { + s.Square(&s) + } + toReturn[3] = s + r.Exp(generator, e.SetUint64(uint64(1<<3*2))) + toReturn[4].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*1))) + toReturn[5].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*3))) + toReturn[6].Mul(&r, &s) + s = shifter + for k := 0; k < 2; k++ { + s.Square(&s) + } + toReturn[7] = s + r.Exp(generator, e.SetUint64(uint64(1<<2*4))) + toReturn[8].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*2))) + toReturn[9].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*6))) + toReturn[10].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*1))) + toReturn[11].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*5))) + toReturn[12].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*3))) + toReturn[13].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*7))) + toReturn[14].Mul(&r, &s) + s = shifter + for k := 0; k < 1; k++ { + s.Square(&s) + } + toReturn[15] = s + r.Exp(generator, e.SetUint64(uint64(1<<1*8))) + toReturn[16].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*4))) + toReturn[17].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*12))) + toReturn[18].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*2))) + toReturn[19].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*10))) + toReturn[20].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*6))) + toReturn[21].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*14))) + toReturn[22].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*1))) + toReturn[23].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*9))) + toReturn[24].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*5))) + toReturn[25].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*13))) + toReturn[26].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*3))) + toReturn[27].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*11))) + toReturn[28].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*7))) + toReturn[29].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*15))) + toReturn[30].Mul(&r, &s) + s = shifter + for k := 0; k < 0; k++ { + s.Square(&s) + } + toReturn[31] = s + r.Exp(generator, e.SetUint64(uint64(1<<0*16))) + toReturn[32].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*8))) + toReturn[33].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*24))) + toReturn[34].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*4))) + toReturn[35].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*20))) + toReturn[36].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*12))) + toReturn[37].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*28))) + toReturn[38].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*2))) + toReturn[39].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*18))) + toReturn[40].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*10))) + toReturn[41].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*26))) + toReturn[42].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*6))) + toReturn[43].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*22))) + toReturn[44].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*14))) + toReturn[45].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*30))) + toReturn[46].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*1))) + toReturn[47].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*17))) + toReturn[48].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*9))) + toReturn[49].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*25))) + toReturn[50].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*5))) + toReturn[51].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*21))) + toReturn[52].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*13))) + toReturn[53].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*29))) + toReturn[54].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*3))) + toReturn[55].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*19))) + toReturn[56].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*11))) + toReturn[57].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*27))) + toReturn[58].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*7))) + toReturn[59].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*23))) + toReturn[60].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*15))) + toReturn[61].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*31))) + toReturn[62].Mul(&r, &s) + return toReturn +} diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index 4c5e4ecfe9..c8f1d282ba 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -29,7 +29,8 @@ const butterflyThreshold = 16 // if decimation == DIT (decimation in time), the input must be in bit-reversed order // if decimation == DIF (decimation in frequency), the output will be in bit-reversed order func (domain *Domain) FFT(a []koalabear.Element, decimation Decimation, opts ...Option) { - + // perf note; this option pattern actually allocates on the heap and comes at a cost when + // doing many small FFTs! opt := fftOptions(opts...) // find the stage where we should stop spawning go routines in our recursive calls @@ -199,15 +200,9 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -290,15 +285,9 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -426,39 +415,3 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 21224cceed..cfba8a332c 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -37,6 +37,9 @@ type RSis struct { Domain *fft.Domain maxNbElementsToHash int + + smallFFT func([]koalabear.Element) + twiddlesCoset []koalabear.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -97,6 +100,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } + r.smallFFT = func(p []koalabear.Element) { + r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + } + + // if we have a FFT kernel of the size of the domain cardinality, we use it. + if r.Domain.Cardinality == 64 { + r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(a []koalabear.Element) { + FFT64(a, r.twiddlesCoset) + } + } + // filling A a := make([]koalabear.Element, n*r.Degree) ag := make([]koalabear.Element, n*r.Degree) @@ -171,7 +186,16 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { return } - r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // for perf, we use directly what's exposed; + r.smallFFT(k) + // k.Mul(k, fr.Vector(r.cosetTable)) + // if r.Domain.KernelDIF != nil { + // r.Domain.KernelDIF(k) + // } else { + // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) + // } + mulModAcc(res, r.Ag[polId], k) } diff --git a/field/koalabear/sis/sis_fft.go b/field/koalabear/sis/sis_fft.go new file mode 100644 index 0000000000..7706135c9c --- /dev/null +++ b/field/koalabear/sis/sis_fft.go @@ -0,0 +1,556 @@ +// Copyright 2020-2025 Consensys Software Inc. +// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. + +// Code generated by consensys/gnark-crypto DO NOT EDIT + +package sis + +import ( + "github.com/consensys/gnark-crypto/field/koalabear" + "math/big" +) + +// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset +func FFT64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { + + a[32].Mul(&a[32], &twiddlesCoset[0]) + a[33].Mul(&a[33], &twiddlesCoset[0]) + a[34].Mul(&a[34], &twiddlesCoset[0]) + a[35].Mul(&a[35], &twiddlesCoset[0]) + a[36].Mul(&a[36], &twiddlesCoset[0]) + a[37].Mul(&a[37], &twiddlesCoset[0]) + a[38].Mul(&a[38], &twiddlesCoset[0]) + a[39].Mul(&a[39], &twiddlesCoset[0]) + a[40].Mul(&a[40], &twiddlesCoset[0]) + a[41].Mul(&a[41], &twiddlesCoset[0]) + a[42].Mul(&a[42], &twiddlesCoset[0]) + a[43].Mul(&a[43], &twiddlesCoset[0]) + a[44].Mul(&a[44], &twiddlesCoset[0]) + a[45].Mul(&a[45], &twiddlesCoset[0]) + a[46].Mul(&a[46], &twiddlesCoset[0]) + a[47].Mul(&a[47], &twiddlesCoset[0]) + a[48].Mul(&a[48], &twiddlesCoset[0]) + a[49].Mul(&a[49], &twiddlesCoset[0]) + a[50].Mul(&a[50], &twiddlesCoset[0]) + a[51].Mul(&a[51], &twiddlesCoset[0]) + a[52].Mul(&a[52], &twiddlesCoset[0]) + a[53].Mul(&a[53], &twiddlesCoset[0]) + a[54].Mul(&a[54], &twiddlesCoset[0]) + a[55].Mul(&a[55], &twiddlesCoset[0]) + a[56].Mul(&a[56], &twiddlesCoset[0]) + a[57].Mul(&a[57], &twiddlesCoset[0]) + a[58].Mul(&a[58], &twiddlesCoset[0]) + a[59].Mul(&a[59], &twiddlesCoset[0]) + a[60].Mul(&a[60], &twiddlesCoset[0]) + a[61].Mul(&a[61], &twiddlesCoset[0]) + a[62].Mul(&a[62], &twiddlesCoset[0]) + a[63].Mul(&a[63], &twiddlesCoset[0]) + koalabear.Butterfly(&a[0], &a[32]) + koalabear.Butterfly(&a[1], &a[33]) + koalabear.Butterfly(&a[2], &a[34]) + koalabear.Butterfly(&a[3], &a[35]) + koalabear.Butterfly(&a[4], &a[36]) + koalabear.Butterfly(&a[5], &a[37]) + koalabear.Butterfly(&a[6], &a[38]) + koalabear.Butterfly(&a[7], &a[39]) + koalabear.Butterfly(&a[8], &a[40]) + koalabear.Butterfly(&a[9], &a[41]) + koalabear.Butterfly(&a[10], &a[42]) + koalabear.Butterfly(&a[11], &a[43]) + koalabear.Butterfly(&a[12], &a[44]) + koalabear.Butterfly(&a[13], &a[45]) + koalabear.Butterfly(&a[14], &a[46]) + koalabear.Butterfly(&a[15], &a[47]) + koalabear.Butterfly(&a[16], &a[48]) + koalabear.Butterfly(&a[17], &a[49]) + koalabear.Butterfly(&a[18], &a[50]) + koalabear.Butterfly(&a[19], &a[51]) + koalabear.Butterfly(&a[20], &a[52]) + koalabear.Butterfly(&a[21], &a[53]) + koalabear.Butterfly(&a[22], &a[54]) + koalabear.Butterfly(&a[23], &a[55]) + koalabear.Butterfly(&a[24], &a[56]) + koalabear.Butterfly(&a[25], &a[57]) + koalabear.Butterfly(&a[26], &a[58]) + koalabear.Butterfly(&a[27], &a[59]) + koalabear.Butterfly(&a[28], &a[60]) + koalabear.Butterfly(&a[29], &a[61]) + koalabear.Butterfly(&a[30], &a[62]) + koalabear.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddlesCoset[1]) + a[17].Mul(&a[17], &twiddlesCoset[1]) + a[18].Mul(&a[18], &twiddlesCoset[1]) + a[19].Mul(&a[19], &twiddlesCoset[1]) + a[20].Mul(&a[20], &twiddlesCoset[1]) + a[21].Mul(&a[21], &twiddlesCoset[1]) + a[22].Mul(&a[22], &twiddlesCoset[1]) + a[23].Mul(&a[23], &twiddlesCoset[1]) + a[24].Mul(&a[24], &twiddlesCoset[1]) + a[25].Mul(&a[25], &twiddlesCoset[1]) + a[26].Mul(&a[26], &twiddlesCoset[1]) + a[27].Mul(&a[27], &twiddlesCoset[1]) + a[28].Mul(&a[28], &twiddlesCoset[1]) + a[29].Mul(&a[29], &twiddlesCoset[1]) + a[30].Mul(&a[30], &twiddlesCoset[1]) + a[31].Mul(&a[31], &twiddlesCoset[1]) + a[48].Mul(&a[48], &twiddlesCoset[2]) + a[49].Mul(&a[49], &twiddlesCoset[2]) + a[50].Mul(&a[50], &twiddlesCoset[2]) + a[51].Mul(&a[51], &twiddlesCoset[2]) + a[52].Mul(&a[52], &twiddlesCoset[2]) + a[53].Mul(&a[53], &twiddlesCoset[2]) + a[54].Mul(&a[54], &twiddlesCoset[2]) + a[55].Mul(&a[55], &twiddlesCoset[2]) + a[56].Mul(&a[56], &twiddlesCoset[2]) + a[57].Mul(&a[57], &twiddlesCoset[2]) + a[58].Mul(&a[58], &twiddlesCoset[2]) + a[59].Mul(&a[59], &twiddlesCoset[2]) + a[60].Mul(&a[60], &twiddlesCoset[2]) + a[61].Mul(&a[61], &twiddlesCoset[2]) + a[62].Mul(&a[62], &twiddlesCoset[2]) + a[63].Mul(&a[63], &twiddlesCoset[2]) + koalabear.Butterfly(&a[0], &a[16]) + koalabear.Butterfly(&a[1], &a[17]) + koalabear.Butterfly(&a[2], &a[18]) + koalabear.Butterfly(&a[3], &a[19]) + koalabear.Butterfly(&a[4], &a[20]) + koalabear.Butterfly(&a[5], &a[21]) + koalabear.Butterfly(&a[6], &a[22]) + koalabear.Butterfly(&a[7], &a[23]) + koalabear.Butterfly(&a[8], &a[24]) + koalabear.Butterfly(&a[9], &a[25]) + koalabear.Butterfly(&a[10], &a[26]) + koalabear.Butterfly(&a[11], &a[27]) + koalabear.Butterfly(&a[12], &a[28]) + koalabear.Butterfly(&a[13], &a[29]) + koalabear.Butterfly(&a[14], &a[30]) + koalabear.Butterfly(&a[15], &a[31]) + koalabear.Butterfly(&a[32], &a[48]) + koalabear.Butterfly(&a[33], &a[49]) + koalabear.Butterfly(&a[34], &a[50]) + koalabear.Butterfly(&a[35], &a[51]) + koalabear.Butterfly(&a[36], &a[52]) + koalabear.Butterfly(&a[37], &a[53]) + koalabear.Butterfly(&a[38], &a[54]) + koalabear.Butterfly(&a[39], &a[55]) + koalabear.Butterfly(&a[40], &a[56]) + koalabear.Butterfly(&a[41], &a[57]) + koalabear.Butterfly(&a[42], &a[58]) + koalabear.Butterfly(&a[43], &a[59]) + koalabear.Butterfly(&a[44], &a[60]) + koalabear.Butterfly(&a[45], &a[61]) + koalabear.Butterfly(&a[46], &a[62]) + koalabear.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddlesCoset[3]) + a[9].Mul(&a[9], &twiddlesCoset[3]) + a[10].Mul(&a[10], &twiddlesCoset[3]) + a[11].Mul(&a[11], &twiddlesCoset[3]) + a[12].Mul(&a[12], &twiddlesCoset[3]) + a[13].Mul(&a[13], &twiddlesCoset[3]) + a[14].Mul(&a[14], &twiddlesCoset[3]) + a[15].Mul(&a[15], &twiddlesCoset[3]) + a[24].Mul(&a[24], &twiddlesCoset[4]) + a[25].Mul(&a[25], &twiddlesCoset[4]) + a[26].Mul(&a[26], &twiddlesCoset[4]) + a[27].Mul(&a[27], &twiddlesCoset[4]) + a[28].Mul(&a[28], &twiddlesCoset[4]) + a[29].Mul(&a[29], &twiddlesCoset[4]) + a[30].Mul(&a[30], &twiddlesCoset[4]) + a[31].Mul(&a[31], &twiddlesCoset[4]) + a[40].Mul(&a[40], &twiddlesCoset[5]) + a[41].Mul(&a[41], &twiddlesCoset[5]) + a[42].Mul(&a[42], &twiddlesCoset[5]) + a[43].Mul(&a[43], &twiddlesCoset[5]) + a[44].Mul(&a[44], &twiddlesCoset[5]) + a[45].Mul(&a[45], &twiddlesCoset[5]) + a[46].Mul(&a[46], &twiddlesCoset[5]) + a[47].Mul(&a[47], &twiddlesCoset[5]) + a[56].Mul(&a[56], &twiddlesCoset[6]) + a[57].Mul(&a[57], &twiddlesCoset[6]) + a[58].Mul(&a[58], &twiddlesCoset[6]) + a[59].Mul(&a[59], &twiddlesCoset[6]) + a[60].Mul(&a[60], &twiddlesCoset[6]) + a[61].Mul(&a[61], &twiddlesCoset[6]) + a[62].Mul(&a[62], &twiddlesCoset[6]) + a[63].Mul(&a[63], &twiddlesCoset[6]) + koalabear.Butterfly(&a[0], &a[8]) + koalabear.Butterfly(&a[1], &a[9]) + koalabear.Butterfly(&a[2], &a[10]) + koalabear.Butterfly(&a[3], &a[11]) + koalabear.Butterfly(&a[4], &a[12]) + koalabear.Butterfly(&a[5], &a[13]) + koalabear.Butterfly(&a[6], &a[14]) + koalabear.Butterfly(&a[7], &a[15]) + koalabear.Butterfly(&a[16], &a[24]) + koalabear.Butterfly(&a[17], &a[25]) + koalabear.Butterfly(&a[18], &a[26]) + koalabear.Butterfly(&a[19], &a[27]) + koalabear.Butterfly(&a[20], &a[28]) + koalabear.Butterfly(&a[21], &a[29]) + koalabear.Butterfly(&a[22], &a[30]) + koalabear.Butterfly(&a[23], &a[31]) + koalabear.Butterfly(&a[32], &a[40]) + koalabear.Butterfly(&a[33], &a[41]) + koalabear.Butterfly(&a[34], &a[42]) + koalabear.Butterfly(&a[35], &a[43]) + koalabear.Butterfly(&a[36], &a[44]) + koalabear.Butterfly(&a[37], &a[45]) + koalabear.Butterfly(&a[38], &a[46]) + koalabear.Butterfly(&a[39], &a[47]) + koalabear.Butterfly(&a[48], &a[56]) + koalabear.Butterfly(&a[49], &a[57]) + koalabear.Butterfly(&a[50], &a[58]) + koalabear.Butterfly(&a[51], &a[59]) + koalabear.Butterfly(&a[52], &a[60]) + koalabear.Butterfly(&a[53], &a[61]) + koalabear.Butterfly(&a[54], &a[62]) + koalabear.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddlesCoset[7]) + a[5].Mul(&a[5], &twiddlesCoset[7]) + a[6].Mul(&a[6], &twiddlesCoset[7]) + a[7].Mul(&a[7], &twiddlesCoset[7]) + a[12].Mul(&a[12], &twiddlesCoset[8]) + a[13].Mul(&a[13], &twiddlesCoset[8]) + a[14].Mul(&a[14], &twiddlesCoset[8]) + a[15].Mul(&a[15], &twiddlesCoset[8]) + a[20].Mul(&a[20], &twiddlesCoset[9]) + a[21].Mul(&a[21], &twiddlesCoset[9]) + a[22].Mul(&a[22], &twiddlesCoset[9]) + a[23].Mul(&a[23], &twiddlesCoset[9]) + a[28].Mul(&a[28], &twiddlesCoset[10]) + a[29].Mul(&a[29], &twiddlesCoset[10]) + a[30].Mul(&a[30], &twiddlesCoset[10]) + a[31].Mul(&a[31], &twiddlesCoset[10]) + a[36].Mul(&a[36], &twiddlesCoset[11]) + a[37].Mul(&a[37], &twiddlesCoset[11]) + a[38].Mul(&a[38], &twiddlesCoset[11]) + a[39].Mul(&a[39], &twiddlesCoset[11]) + a[44].Mul(&a[44], &twiddlesCoset[12]) + a[45].Mul(&a[45], &twiddlesCoset[12]) + a[46].Mul(&a[46], &twiddlesCoset[12]) + a[47].Mul(&a[47], &twiddlesCoset[12]) + a[52].Mul(&a[52], &twiddlesCoset[13]) + a[53].Mul(&a[53], &twiddlesCoset[13]) + a[54].Mul(&a[54], &twiddlesCoset[13]) + a[55].Mul(&a[55], &twiddlesCoset[13]) + a[60].Mul(&a[60], &twiddlesCoset[14]) + a[61].Mul(&a[61], &twiddlesCoset[14]) + a[62].Mul(&a[62], &twiddlesCoset[14]) + a[63].Mul(&a[63], &twiddlesCoset[14]) + koalabear.Butterfly(&a[0], &a[4]) + koalabear.Butterfly(&a[1], &a[5]) + koalabear.Butterfly(&a[2], &a[6]) + koalabear.Butterfly(&a[3], &a[7]) + koalabear.Butterfly(&a[8], &a[12]) + koalabear.Butterfly(&a[9], &a[13]) + koalabear.Butterfly(&a[10], &a[14]) + koalabear.Butterfly(&a[11], &a[15]) + koalabear.Butterfly(&a[16], &a[20]) + koalabear.Butterfly(&a[17], &a[21]) + koalabear.Butterfly(&a[18], &a[22]) + koalabear.Butterfly(&a[19], &a[23]) + koalabear.Butterfly(&a[24], &a[28]) + koalabear.Butterfly(&a[25], &a[29]) + koalabear.Butterfly(&a[26], &a[30]) + koalabear.Butterfly(&a[27], &a[31]) + koalabear.Butterfly(&a[32], &a[36]) + koalabear.Butterfly(&a[33], &a[37]) + koalabear.Butterfly(&a[34], &a[38]) + koalabear.Butterfly(&a[35], &a[39]) + koalabear.Butterfly(&a[40], &a[44]) + koalabear.Butterfly(&a[41], &a[45]) + koalabear.Butterfly(&a[42], &a[46]) + koalabear.Butterfly(&a[43], &a[47]) + koalabear.Butterfly(&a[48], &a[52]) + koalabear.Butterfly(&a[49], &a[53]) + koalabear.Butterfly(&a[50], &a[54]) + koalabear.Butterfly(&a[51], &a[55]) + koalabear.Butterfly(&a[56], &a[60]) + koalabear.Butterfly(&a[57], &a[61]) + koalabear.Butterfly(&a[58], &a[62]) + koalabear.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddlesCoset[15]) + a[3].Mul(&a[3], &twiddlesCoset[15]) + a[6].Mul(&a[6], &twiddlesCoset[16]) + a[7].Mul(&a[7], &twiddlesCoset[16]) + a[10].Mul(&a[10], &twiddlesCoset[17]) + a[11].Mul(&a[11], &twiddlesCoset[17]) + a[14].Mul(&a[14], &twiddlesCoset[18]) + a[15].Mul(&a[15], &twiddlesCoset[18]) + a[18].Mul(&a[18], &twiddlesCoset[19]) + a[19].Mul(&a[19], &twiddlesCoset[19]) + a[22].Mul(&a[22], &twiddlesCoset[20]) + a[23].Mul(&a[23], &twiddlesCoset[20]) + a[26].Mul(&a[26], &twiddlesCoset[21]) + a[27].Mul(&a[27], &twiddlesCoset[21]) + a[30].Mul(&a[30], &twiddlesCoset[22]) + a[31].Mul(&a[31], &twiddlesCoset[22]) + a[34].Mul(&a[34], &twiddlesCoset[23]) + a[35].Mul(&a[35], &twiddlesCoset[23]) + a[38].Mul(&a[38], &twiddlesCoset[24]) + a[39].Mul(&a[39], &twiddlesCoset[24]) + a[42].Mul(&a[42], &twiddlesCoset[25]) + a[43].Mul(&a[43], &twiddlesCoset[25]) + a[46].Mul(&a[46], &twiddlesCoset[26]) + a[47].Mul(&a[47], &twiddlesCoset[26]) + a[50].Mul(&a[50], &twiddlesCoset[27]) + a[51].Mul(&a[51], &twiddlesCoset[27]) + a[54].Mul(&a[54], &twiddlesCoset[28]) + a[55].Mul(&a[55], &twiddlesCoset[28]) + a[58].Mul(&a[58], &twiddlesCoset[29]) + a[59].Mul(&a[59], &twiddlesCoset[29]) + a[62].Mul(&a[62], &twiddlesCoset[30]) + a[63].Mul(&a[63], &twiddlesCoset[30]) + koalabear.Butterfly(&a[0], &a[2]) + koalabear.Butterfly(&a[1], &a[3]) + koalabear.Butterfly(&a[4], &a[6]) + koalabear.Butterfly(&a[5], &a[7]) + koalabear.Butterfly(&a[8], &a[10]) + koalabear.Butterfly(&a[9], &a[11]) + koalabear.Butterfly(&a[12], &a[14]) + koalabear.Butterfly(&a[13], &a[15]) + koalabear.Butterfly(&a[16], &a[18]) + koalabear.Butterfly(&a[17], &a[19]) + koalabear.Butterfly(&a[20], &a[22]) + koalabear.Butterfly(&a[21], &a[23]) + koalabear.Butterfly(&a[24], &a[26]) + koalabear.Butterfly(&a[25], &a[27]) + koalabear.Butterfly(&a[28], &a[30]) + koalabear.Butterfly(&a[29], &a[31]) + koalabear.Butterfly(&a[32], &a[34]) + koalabear.Butterfly(&a[33], &a[35]) + koalabear.Butterfly(&a[36], &a[38]) + koalabear.Butterfly(&a[37], &a[39]) + koalabear.Butterfly(&a[40], &a[42]) + koalabear.Butterfly(&a[41], &a[43]) + koalabear.Butterfly(&a[44], &a[46]) + koalabear.Butterfly(&a[45], &a[47]) + koalabear.Butterfly(&a[48], &a[50]) + koalabear.Butterfly(&a[49], &a[51]) + koalabear.Butterfly(&a[52], &a[54]) + koalabear.Butterfly(&a[53], &a[55]) + koalabear.Butterfly(&a[56], &a[58]) + koalabear.Butterfly(&a[57], &a[59]) + koalabear.Butterfly(&a[60], &a[62]) + koalabear.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddlesCoset[31]) + a[3].Mul(&a[3], &twiddlesCoset[32]) + a[5].Mul(&a[5], &twiddlesCoset[33]) + a[7].Mul(&a[7], &twiddlesCoset[34]) + a[9].Mul(&a[9], &twiddlesCoset[35]) + a[11].Mul(&a[11], &twiddlesCoset[36]) + a[13].Mul(&a[13], &twiddlesCoset[37]) + a[15].Mul(&a[15], &twiddlesCoset[38]) + a[17].Mul(&a[17], &twiddlesCoset[39]) + a[19].Mul(&a[19], &twiddlesCoset[40]) + a[21].Mul(&a[21], &twiddlesCoset[41]) + a[23].Mul(&a[23], &twiddlesCoset[42]) + a[25].Mul(&a[25], &twiddlesCoset[43]) + a[27].Mul(&a[27], &twiddlesCoset[44]) + a[29].Mul(&a[29], &twiddlesCoset[45]) + a[31].Mul(&a[31], &twiddlesCoset[46]) + a[33].Mul(&a[33], &twiddlesCoset[47]) + a[35].Mul(&a[35], &twiddlesCoset[48]) + a[37].Mul(&a[37], &twiddlesCoset[49]) + a[39].Mul(&a[39], &twiddlesCoset[50]) + a[41].Mul(&a[41], &twiddlesCoset[51]) + a[43].Mul(&a[43], &twiddlesCoset[52]) + a[45].Mul(&a[45], &twiddlesCoset[53]) + a[47].Mul(&a[47], &twiddlesCoset[54]) + a[49].Mul(&a[49], &twiddlesCoset[55]) + a[51].Mul(&a[51], &twiddlesCoset[56]) + a[53].Mul(&a[53], &twiddlesCoset[57]) + a[55].Mul(&a[55], &twiddlesCoset[58]) + a[57].Mul(&a[57], &twiddlesCoset[59]) + a[59].Mul(&a[59], &twiddlesCoset[60]) + a[61].Mul(&a[61], &twiddlesCoset[61]) + a[63].Mul(&a[63], &twiddlesCoset[62]) + koalabear.Butterfly(&a[0], &a[1]) + koalabear.Butterfly(&a[2], &a[3]) + koalabear.Butterfly(&a[4], &a[5]) + koalabear.Butterfly(&a[6], &a[7]) + koalabear.Butterfly(&a[8], &a[9]) + koalabear.Butterfly(&a[10], &a[11]) + koalabear.Butterfly(&a[12], &a[13]) + koalabear.Butterfly(&a[14], &a[15]) + koalabear.Butterfly(&a[16], &a[17]) + koalabear.Butterfly(&a[18], &a[19]) + koalabear.Butterfly(&a[20], &a[21]) + koalabear.Butterfly(&a[22], &a[23]) + koalabear.Butterfly(&a[24], &a[25]) + koalabear.Butterfly(&a[26], &a[27]) + koalabear.Butterfly(&a[28], &a[29]) + koalabear.Butterfly(&a[30], &a[31]) + koalabear.Butterfly(&a[32], &a[33]) + koalabear.Butterfly(&a[34], &a[35]) + koalabear.Butterfly(&a[36], &a[37]) + koalabear.Butterfly(&a[38], &a[39]) + koalabear.Butterfly(&a[40], &a[41]) + koalabear.Butterfly(&a[42], &a[43]) + koalabear.Butterfly(&a[44], &a[45]) + koalabear.Butterfly(&a[46], &a[47]) + koalabear.Butterfly(&a[48], &a[49]) + koalabear.Butterfly(&a[50], &a[51]) + koalabear.Butterfly(&a[52], &a[53]) + koalabear.Butterfly(&a[54], &a[55]) + koalabear.Butterfly(&a[56], &a[57]) + koalabear.Butterfly(&a[58], &a[59]) + koalabear.Butterfly(&a[60], &a[61]) + koalabear.Butterfly(&a[62], &a[63]) +} + +// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func PrecomputeTwiddlesCoset(generator, shifter koalabear.Element) []koalabear.Element { + toReturn := make([]koalabear.Element, 63) + var r, s koalabear.Element + e := new(big.Int) + + s = shifter + for k := 0; k < 5; k++ { + s.Square(&s) + } + toReturn[0] = s + s = shifter + for k := 0; k < 4; k++ { + s.Square(&s) + } + toReturn[1] = s + r.Exp(generator, e.SetUint64(uint64(1<<4*1))) + toReturn[2].Mul(&r, &s) + s = shifter + for k := 0; k < 3; k++ { + s.Square(&s) + } + toReturn[3] = s + r.Exp(generator, e.SetUint64(uint64(1<<3*2))) + toReturn[4].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*1))) + toReturn[5].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*3))) + toReturn[6].Mul(&r, &s) + s = shifter + for k := 0; k < 2; k++ { + s.Square(&s) + } + toReturn[7] = s + r.Exp(generator, e.SetUint64(uint64(1<<2*4))) + toReturn[8].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*2))) + toReturn[9].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*6))) + toReturn[10].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*1))) + toReturn[11].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*5))) + toReturn[12].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*3))) + toReturn[13].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*7))) + toReturn[14].Mul(&r, &s) + s = shifter + for k := 0; k < 1; k++ { + s.Square(&s) + } + toReturn[15] = s + r.Exp(generator, e.SetUint64(uint64(1<<1*8))) + toReturn[16].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*4))) + toReturn[17].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*12))) + toReturn[18].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*2))) + toReturn[19].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*10))) + toReturn[20].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*6))) + toReturn[21].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*14))) + toReturn[22].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*1))) + toReturn[23].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*9))) + toReturn[24].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*5))) + toReturn[25].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*13))) + toReturn[26].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*3))) + toReturn[27].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*11))) + toReturn[28].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*7))) + toReturn[29].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*15))) + toReturn[30].Mul(&r, &s) + s = shifter + for k := 0; k < 0; k++ { + s.Square(&s) + } + toReturn[31] = s + r.Exp(generator, e.SetUint64(uint64(1<<0*16))) + toReturn[32].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*8))) + toReturn[33].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*24))) + toReturn[34].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*4))) + toReturn[35].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*20))) + toReturn[36].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*12))) + toReturn[37].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*28))) + toReturn[38].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*2))) + toReturn[39].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*18))) + toReturn[40].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*10))) + toReturn[41].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*26))) + toReturn[42].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*6))) + toReturn[43].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*22))) + toReturn[44].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*14))) + toReturn[45].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*30))) + toReturn[46].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*1))) + toReturn[47].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*17))) + toReturn[48].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*9))) + toReturn[49].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*25))) + toReturn[50].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*5))) + toReturn[51].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*21))) + toReturn[52].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*13))) + toReturn[53].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*29))) + toReturn[54].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*3))) + toReturn[55].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*19))) + toReturn[56].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*11))) + toReturn[57].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*27))) + toReturn[58].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*7))) + toReturn[59].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*23))) + toReturn[60].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*15))) + toReturn[61].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*31))) + toReturn[62].Mul(&r, &s) + return toReturn +} From 414b6fcef9063f80bbdeba10e59d538973b1e72f Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Sat, 11 Jan 2025 10:32:04 -0600 Subject: [PATCH 16/25] refactor: experiment --- ecc/bls12-377/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bls12-381/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bls24-315/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bls24-317/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bn254/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bw6-633/fr/fft/fft.go | 102 ++++++++++++++++-- ecc/bw6-761/fr/fft/fft.go | 102 ++++++++++++++++-- field/babybear/fft/fft.go | 102 ++++++++++++++++-- .../internal/templates/fft/fft.go.tmpl | 31 ++++-- field/goldilocks/fft/fft.go | 102 ++++++++++++++++-- field/koalabear/fft/fft.go | 102 ++++++++++++++++-- 11 files changed, 985 insertions(+), 66 deletions(-) diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index afdb821d34..b9da0e69f9 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index d2a5e7200c..d617c893f5 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index 85e25dd54a..de7715c9fb 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index fead81fe47..2dbebe61c0 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 1da9a883e2..7ee78bda92 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 3cae1e9f1a..122c890456 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index c9ad068c0f..9e196c8efb 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index 506fb93807..27c6c6a123 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index d777b2164b..8ca7a3e74c 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,6 +11,8 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} +{{ $sizeKernel2Log2 := 5}} +{{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} // Decimation is used in the FFT call to select decimation in time or in frequency type Decimation uint8 @@ -201,9 +203,15 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel}} { + kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel2}} { + kerDIFNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -287,9 +295,15 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel2}} { + kerDITNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel}} { + kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -366,6 +380,8 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta } {{genKernel $.FF $sizeKernel $sizeKernelLog2}} +{{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} +{{genKernel $.FF 64 6}} {{define "genKernel FF sizeKernel sizeKernelLog2"}} @@ -425,3 +441,6 @@ func kerDITNP_{{.sizeKernel}}(a []{{ .FF }}.Element, twiddles [][]{{ .FF }}.Elem } {{end}} + + + diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 84ca5af4f0..e5f6c1a970 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index c8f1d282ba..21b71a0eba 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 32 { + kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -285,9 +291,15 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 32 { + kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -415,3 +427,81 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) + for offset := 0; offset < 32; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + for offset := 0; offset < 32; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 32; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 32; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) + } + for offset := 0; offset < 32; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) + } + for offset := 0; offset < 32; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) + } + innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) +} + +func kerDIFNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} From 9ffd3eb140885f3cc9b52108b8c0d7de427b4ca3 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Sat, 11 Jan 2025 17:06:02 -0600 Subject: [PATCH 17/25] perf: minor adjustements in allocations --- ecc/bls12-377/fr/fft/fft.go | 48 +++---------------- ecc/bls12-377/fr/fft/options.go | 12 +++-- ecc/bls12-377/fr/sis/sis.go | 26 +++------- ecc/bls12-381/fr/fft/fft.go | 48 +++---------------- ecc/bls12-381/fr/fft/options.go | 12 +++-- ecc/bls24-315/fr/fft/fft.go | 48 +++---------------- ecc/bls24-315/fr/fft/options.go | 12 +++-- ecc/bls24-317/fr/fft/fft.go | 48 +++---------------- ecc/bls24-317/fr/fft/options.go | 12 +++-- ecc/bn254/fr/fft/fft.go | 48 +++---------------- ecc/bn254/fr/fft/options.go | 12 +++-- ecc/bw6-633/fr/fft/fft.go | 48 +++---------------- ecc/bw6-633/fr/fft/options.go | 12 +++-- ecc/bw6-761/fr/fft/fft.go | 48 +++---------------- ecc/bw6-761/fr/fft/options.go | 12 +++-- field/babybear/fft/fft.go | 48 +++---------------- field/babybear/fft/options.go | 12 +++-- field/babybear/sis/sis.go | 26 +++------- .../internal/templates/fft/fft.go.tmpl | 7 ++- .../internal/templates/fft/options.go.tmpl | 13 +++-- .../internal/templates/sis/sis.go.tmpl | 24 +++------- field/goldilocks/fft/fft.go | 48 +++---------------- field/goldilocks/fft/options.go | 12 +++-- field/goldilocks/sis/sis.go | 26 +++------- field/koalabear/fft/fft.go | 48 +++---------------- field/koalabear/fft/options.go | 12 +++-- field/koalabear/sis/sis.go | 26 +++------- 27 files changed, 175 insertions(+), 573 deletions(-) diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index b9da0e69f9..9ab74a303b 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bls12-377/fr/fft/options.go b/ecc/bls12-377/fr/fft/options.go index b4b1c04b6d..a562b0ae72 100644 --- a/ecc/bls12-377/fr/fft/options.go +++ b/ecc/bls12-377/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 5be811d821..6732067489 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -38,8 +38,8 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]fr.Element) - twiddlesCoset []fr.Element // used in conjunction with the smallFFT; + smallFFT func([]fr.Element) + cosetTable []fr.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -100,16 +100,9 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } - r.smallFFT = func(p []fr.Element) { - r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - - // if we have a FFT kernel of the size of the domain cardinality, we use it. - if r.Domain.Cardinality == 64 { - r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(a []fr.Element) { - FFT64(a, r.twiddlesCoset) - } + r.cosetTable, err = r.Domain.CosetTable() + if err != nil { + return nil, err } // filling A @@ -188,13 +181,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - r.smallFFT(k) - // k.Mul(k, fr.Vector(r.cosetTable)) - // if r.Domain.KernelDIF != nil { - // r.Domain.KernelDIF(k) - // } else { - // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) - // } + k.Mul(k, fr.Vector(r.cosetTable)) + r.Domain.FFT(k, fft.DIF) mulModAcc(res, r.Ag[polId], k) } diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index d617c893f5..17f7023b91 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bls12-381/fr/fft/options.go b/ecc/bls12-381/fr/fft/options.go index 72b1911fae..e705081cda 100644 --- a/ecc/bls12-381/fr/fft/options.go +++ b/ecc/bls12-381/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index de7715c9fb..8630671963 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bls24-315/fr/fft/options.go b/ecc/bls24-315/fr/fft/options.go index 86b79fa17f..8538f4cdaa 100644 --- a/ecc/bls24-315/fr/fft/options.go +++ b/ecc/bls24-315/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 2dbebe61c0..42efabd2ff 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bls24-317/fr/fft/options.go b/ecc/bls24-317/fr/fft/options.go index 7f213a41fa..9a73619358 100644 --- a/ecc/bls24-317/fr/fft/options.go +++ b/ecc/bls24-317/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 7ee78bda92..4def2f70d5 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bn254/fr/fft/options.go b/ecc/bn254/fr/fft/options.go index ae1c9b40f5..87e5bae69a 100644 --- a/ecc/bn254/fr/fft/options.go +++ b/ecc/bn254/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 122c890456..51f49f4836 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bw6-633/fr/fft/options.go b/ecc/bw6-633/fr/fft/options.go index 6b4552ec29..3b9f572b47 100644 --- a/ecc/bw6-633/fr/fft/options.go +++ b/ecc/bw6-633/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index 9e196c8efb..a11ce4eeef 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []fr.Element, decimation Decimation, opts ...Option) // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/ecc/bw6-761/fr/fft/options.go b/ecc/bw6-761/fr/fft/options.go index 0ccbce8ba5..276471bd1c 100644 --- a/ecc/bw6-761/fr/fft/options.go +++ b/ecc/bw6-761/fr/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index 27c6c6a123..6ad4e6f884 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []babybear.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []babybear.Element, decimation Decimation, opts ...O // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []babybear.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/field/babybear/fft/options.go b/field/babybear/fft/options.go index da964b5c87..d6fcb9c156 100644 --- a/field/babybear/fft/options.go +++ b/field/babybear/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index e951b4cd7b..83116b97ed 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -38,8 +38,8 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]babybear.Element) - twiddlesCoset []babybear.Element // used in conjunction with the smallFFT; + smallFFT func([]babybear.Element) + cosetTable []babybear.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -100,16 +100,9 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } - r.smallFFT = func(p []babybear.Element) { - r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - - // if we have a FFT kernel of the size of the domain cardinality, we use it. - if r.Domain.Cardinality == 64 { - r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(a []babybear.Element) { - FFT64(a, r.twiddlesCoset) - } + r.cosetTable, err = r.Domain.CosetTable() + if err != nil { + return nil, err } // filling A @@ -188,13 +181,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - r.smallFFT(k) - // k.Mul(k, fr.Vector(r.cosetTable)) - // if r.Domain.KernelDIF != nil { - // r.Domain.KernelDIF(k) - // } else { - // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) - // } + k.Mul(k, fr.Vector(r.cosetTable)) + r.Domain.FFT(k, fft.DIF) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index 8ca7a3e74c..c239e9701b 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,7 +11,7 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} -{{ $sizeKernel2Log2 := 5}} +{{ $sizeKernel2Log2 := 6}} {{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} // Decimation is used in the FFT call to select decimation in time or in frequency @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []{{ .FF }}.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -111,7 +111,7 @@ func (domain *Domain) FFT(a []{{ .FF }}.Element, decimation Decimation, opts ... // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []{{ .FF }}.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -381,7 +381,6 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta {{genKernel $.FF $sizeKernel $sizeKernelLog2}} {{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} -{{genKernel $.FF 64 6}} {{define "genKernel FF sizeKernel sizeKernelLog2"}} diff --git a/field/generator/internal/templates/fft/options.go.tmpl b/field/generator/internal/templates/fft/options.go.tmpl index 6e57878ec4..87fe89ff4a 100644 --- a/field/generator/internal/templates/fft/options.go.tmpl +++ b/field/generator/internal/templates/fft/options.go.tmpl @@ -7,7 +7,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -16,8 +16,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig)fftConfig { opt.coset = true + return opt } } @@ -28,20 +29,22 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } + // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index cb235cc9ed..573087bdf9 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -39,7 +39,7 @@ type RSis struct { maxNbElementsToHash int smallFFT func([]{{ .FF }}.Element) - twiddlesCoset []{{ .FF }}.Element // used in conjunction with the smallFFT; + cosetTable []{{ .FF }}.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -100,16 +100,9 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } - r.smallFFT = func(p []{{ .FF }}.Element) { - r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - - // if we have a FFT kernel of the size of the domain cardinality, we use it. - if r.Domain.Cardinality == 64 { - r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(a []{{ .FF }}.Element) { - FFT64(a, r.twiddlesCoset) - } + r.cosetTable, err = r.Domain.CosetTable() + if err != nil { + return nil, err } @@ -190,13 +183,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - r.smallFFT(k) - // k.Mul(k, fr.Vector(r.cosetTable)) - // if r.Domain.KernelDIF != nil { - // r.Domain.KernelDIF(k) - // } else { - // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) - // } + k.Mul(k, fr.Vector(r.cosetTable)) + r.Domain.FFT(k, fft.DIF) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index e5f6c1a970..140d40be8b 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []goldilocks.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []goldilocks.Element, decimation Decimation, opts .. // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []goldilocks.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/field/goldilocks/fft/options.go b/field/goldilocks/fft/options.go index dca1d37633..db171a3724 100644 --- a/field/goldilocks/fft/options.go +++ b/field/goldilocks/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 51b8256ea0..a734bc6d2b 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -38,8 +38,8 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]goldilocks.Element) - twiddlesCoset []goldilocks.Element // used in conjunction with the smallFFT; + smallFFT func([]goldilocks.Element) + cosetTable []goldilocks.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -100,16 +100,9 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } - r.smallFFT = func(p []goldilocks.Element) { - r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - - // if we have a FFT kernel of the size of the domain cardinality, we use it. - if r.Domain.Cardinality == 64 { - r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(a []goldilocks.Element) { - FFT64(a, r.twiddlesCoset) - } + r.cosetTable, err = r.Domain.CosetTable() + if err != nil { + return nil, err } // filling A @@ -188,13 +181,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - r.smallFFT(k) - // k.Mul(k, fr.Vector(r.cosetTable)) - // if r.Domain.KernelDIF != nil { - // r.Domain.KernelDIF(k) - // } else { - // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) - // } + k.Mul(k, fr.Vector(r.cosetTable)) + r.Domain.FFT(k, fft.DIF) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index 21b71a0eba..850876deab 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -31,7 +31,7 @@ const butterflyThreshold = 16 func (domain *Domain) FFT(a []koalabear.Element, decimation Decimation, opts ...Option) { // perf note; this option pattern actually allocates on the heap and comes at a cost when // doing many small FFTs! - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -109,7 +109,7 @@ func (domain *Domain) FFT(a []koalabear.Element, decimation Decimation, opts ... // coset sets the shift of the fft (0 = no shift, standard fft) // len(a) must be a power of 2, and w must be a len(a)th root of unity in field F. func (domain *Domain) FFTInverse(a []koalabear.Element, decimation Decimation, opts ...Option) { - opt := fftOptions(opts...) + opt := fftOptions(opts) // find the stage where we should stop spawning go routines in our recursive calls // (ie when we have as many go routines running as we have available CPUs) @@ -204,8 +204,8 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E if n == 256 { kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) return - } else if n == 32 { - kerDIFNP_32(a, twiddles, stage-twiddlesStartStage) + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) return } @@ -292,8 +292,8 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E if n == 1 { return } else if stage >= twiddlesStartStage { - if n == 32 { - kerDITNP_32(a, twiddles, stage-twiddlesStartStage) + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) return } else if n == 256 { kerDITNP_256(a, twiddles, stage-twiddlesStartStage) @@ -428,42 +428,6 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } -func kerDIFNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) - for offset := 0; offset < 32; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - for offset := 0; offset < 32; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_32(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 32; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 32; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+3], 0, 2, 2) - } - for offset := 0; offset < 32; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+2], 0, 4, 4) - } - for offset := 0; offset < 32; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+1], 0, 8, 8) - } - innerDITWithTwiddles(a[:32], twiddles[stage+0], 0, 16, 16) -} - func kerDIFNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl diff --git a/field/koalabear/fft/options.go b/field/koalabear/fft/options.go index e29ff3b6bd..7c4ba3ffb9 100644 --- a/field/koalabear/fft/options.go +++ b/field/koalabear/fft/options.go @@ -14,7 +14,7 @@ import ( // Option defines option for altering the behavior of FFT methods. // See the descriptions of functions returning instances of this type for // particular options. -type Option func(*fftConfig) +type Option func(fftConfig) fftConfig type fftConfig struct { coset bool @@ -23,8 +23,9 @@ type fftConfig struct { // OnCoset if provided, FFT(a) returns the evaluation of a on a coset. func OnCoset() Option { - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.coset = true + return opt } } @@ -35,20 +36,21 @@ func WithNbTasks(nbTasks int) Option { } else if nbTasks > 512 { nbTasks = 512 } - return func(opt *fftConfig) { + return func(opt fftConfig) fftConfig { opt.nbTasks = nbTasks + return opt } } // default options -func fftOptions(opts ...Option) fftConfig { +func fftOptions(opts []Option) fftConfig { // apply options opt := fftConfig{ coset: false, nbTasks: runtime.NumCPU(), } for _, option := range opts { - option(&opt) + opt = option(opt) } return opt } diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index cfba8a332c..49c1645ef0 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -38,8 +38,8 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]koalabear.Element) - twiddlesCoset []koalabear.Element // used in conjunction with the smallFFT; + smallFFT func([]koalabear.Element) + cosetTable []koalabear.Element // used in conjunction with the smallFFT; } // NewRSis creates an instance of RSis. @@ -100,16 +100,9 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R maxNbElementsToHash: maxNbElementsToHash, } - r.smallFFT = func(p []koalabear.Element) { - r.Domain.FFT(p, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - } - - // if we have a FFT kernel of the size of the domain cardinality, we use it. - if r.Domain.Cardinality == 64 { - r.twiddlesCoset = PrecomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(a []koalabear.Element) { - FFT64(a, r.twiddlesCoset) - } + r.cosetTable, err = r.Domain.CosetTable() + if err != nil { + return nil, err } // filling A @@ -188,13 +181,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - r.smallFFT(k) - // k.Mul(k, fr.Vector(r.cosetTable)) - // if r.Domain.KernelDIF != nil { - // r.Domain.KernelDIF(k) - // } else { - // r.Domain.FFT(k, fft.DIF, fft.WithNbTasks(1)) - // } + k.Mul(k, fr.Vector(r.cosetTable)) + r.Domain.FFT(k, fft.DIF) mulModAcc(res, r.Ag[polId], k) } From c96ec1581ca8af924d382b0462e367a2f9e36c94 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Sun, 12 Jan 2025 17:15:42 -0600 Subject: [PATCH 18/25] refactor: checkkpoint --- ecc/bls12-377/fr/sis/sis.go | 19 ++++++++++++++++--- ecc/bn254/fr/sis/sis_fft.go | 5 +++++ field/babybear/sis/sis.go | 19 ++++++++++++++++--- .../internal/templates/sis/sis.go.tmpl | 18 +++++++++++++++--- field/goldilocks/sis/sis.go | 19 ++++++++++++++++--- field/koalabear/sis/sis.go | 19 ++++++++++++++++--- 6 files changed, 84 insertions(+), 15 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 6732067489..1d0ef1fb22 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -38,7 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]fr.Element) + smallFFT func(fr.Vector) cosetTable []fr.Element // used in conjunction with the smallFFT; } @@ -105,6 +105,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + if r.Domain.Cardinality == 64 { + twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(p fr.Vector) { + FFT64(p, twiddlesCoset) + } + } else { + r.smallFFT = func(p fr.Vector) { + p.Mul(p, fr.Vector(r.cosetTable)) + r.Domain.FFT(p, fft.DIF) + } + } + // filling A a := make([]fr.Element, n*r.Degree) ag := make([]fr.Element, n*r.Degree) @@ -181,8 +193,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - k.Mul(k, fr.Vector(r.cosetTable)) - r.Domain.FFT(k, fft.DIF) + // k.Mul(k, fr.Vector(r.cosetTable)) + // r.Domain.FFT(k, fft.DIF) + r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) } diff --git a/ecc/bn254/fr/sis/sis_fft.go b/ecc/bn254/fr/sis/sis_fft.go index 05cded5aa0..297952c6b7 100644 --- a/ecc/bn254/fr/sis/sis_fft.go +++ b/ecc/bn254/fr/sis/sis_fft.go @@ -10,6 +10,11 @@ import ( "math/big" ) +// Option 1; we put the FFT unrolled in SIS. +// Option 2; we keep the FFT in fft, but have more optimized kernels. +// with option 2 we likely add N multiplications unless we mutate the cosets +// or expose the kernels to supply the twiddleCosets as arguments. + // FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 83116b97ed..feff0fa40b 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -38,7 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]babybear.Element) + smallFFT func(babybear.Vector) cosetTable []babybear.Element // used in conjunction with the smallFFT; } @@ -105,6 +105,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + if r.Domain.Cardinality == 64 { + twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(p babybear.Vector) { + FFT64(p, twiddlesCoset) + } + } else { + r.smallFFT = func(p babybear.Vector) { + p.Mul(p, babybear.Vector(r.cosetTable)) + r.Domain.FFT(p, fft.DIF) + } + } + // filling A a := make([]babybear.Element, n*r.Degree) ag := make([]babybear.Element, n*r.Degree) @@ -181,8 +193,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - k.Mul(k, fr.Vector(r.cosetTable)) - r.Domain.FFT(k, fft.DIF) + // k.Mul(k, fr.Vector(r.cosetTable)) + // r.Domain.FFT(k, fft.DIF) + r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 573087bdf9..c4ffe2ad66 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -38,7 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]{{ .FF }}.Element) + smallFFT func({{ .FF }}.Vector) cosetTable []{{ .FF }}.Element // used in conjunction with the smallFFT; } @@ -105,6 +105,17 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + if r.Domain.Cardinality == 64 { + twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(p {{ .FF }}.Vector) { + FFT64(p, twiddlesCoset) + } + } else { + r.smallFFT = func(p {{ .FF }}.Vector) { + p.Mul(p, {{.FF}}.Vector(r.cosetTable)) + r.Domain.FFT(p, fft.DIF) + } + } // filling A @@ -183,8 +194,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - k.Mul(k, fr.Vector(r.cosetTable)) - r.Domain.FFT(k, fft.DIF) + // k.Mul(k, fr.Vector(r.cosetTable)) + // r.Domain.FFT(k, fft.DIF) + r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index a734bc6d2b..253552f278 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -38,7 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]goldilocks.Element) + smallFFT func(goldilocks.Vector) cosetTable []goldilocks.Element // used in conjunction with the smallFFT; } @@ -105,6 +105,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + if r.Domain.Cardinality == 64 { + twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(p goldilocks.Vector) { + FFT64(p, twiddlesCoset) + } + } else { + r.smallFFT = func(p goldilocks.Vector) { + p.Mul(p, goldilocks.Vector(r.cosetTable)) + r.Domain.FFT(p, fft.DIF) + } + } + // filling A a := make([]goldilocks.Element, n*r.Degree) ag := make([]goldilocks.Element, n*r.Degree) @@ -181,8 +193,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - k.Mul(k, fr.Vector(r.cosetTable)) - r.Domain.FFT(k, fft.DIF) + // k.Mul(k, fr.Vector(r.cosetTable)) + // r.Domain.FFT(k, fft.DIF) + r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) } diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 49c1645ef0..fb36d12c97 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -38,7 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func([]koalabear.Element) + smallFFT func(koalabear.Vector) cosetTable []koalabear.Element // used in conjunction with the smallFFT; } @@ -105,6 +105,18 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + if r.Domain.Cardinality == 64 { + twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + r.smallFFT = func(p koalabear.Vector) { + FFT64(p, twiddlesCoset) + } + } else { + r.smallFFT = func(p koalabear.Vector) { + p.Mul(p, koalabear.Vector(r.cosetTable)) + r.Domain.FFT(p, fft.DIF) + } + } + // filling A a := make([]koalabear.Element, n*r.Degree) ag := make([]koalabear.Element, n*r.Degree) @@ -181,8 +193,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // for perf, we use directly what's exposed; - k.Mul(k, fr.Vector(r.cosetTable)) - r.Domain.FFT(k, fft.DIF) + // k.Mul(k, fr.Vector(r.cosetTable)) + // r.Domain.FFT(k, fft.DIF) + r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) } From 789b304334df804655458c73b039cca59ed77ae5 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Mon, 13 Jan 2025 19:01:43 +0000 Subject: [PATCH 19/25] style: add comments --- ecc/bls12-377/fr/sis/sis.go | 13 +++++++------ ecc/bls12-377/fr/sis/sis_fft.go | 10 +++++----- field/babybear/sis/sis.go | 13 +++++++------ field/babybear/sis/sis_fft.go | 10 +++++----- field/generator/internal/templates/sis/fft.go.tmpl | 10 +++++----- field/generator/internal/templates/sis/sis.go.tmpl | 13 +++++++------ field/goldilocks/sis/sis.go | 13 +++++++------ field/goldilocks/sis/sis_fft.go | 10 +++++----- field/koalabear/sis/sis.go | 13 +++++++------ field/koalabear/sis/sis_fft.go | 10 +++++----- 10 files changed, 60 insertions(+), 55 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 1d0ef1fb22..456a7916b7 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -105,10 +105,13 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + // perf note: we have a dedicated path for 64, as it correspond to the parameters + // used by linea-monorepo prover with bls12377 curve. + // once the linea prover switches to smaller fields, this path can be removed. if r.Domain.Cardinality == 64 { - twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) r.smallFFT = func(p fr.Vector) { - FFT64(p, twiddlesCoset) + fft64(p, twiddlesCoset) } } else { r.smallFFT = func(p fr.Vector) { @@ -191,10 +194,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { return } - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - // for perf, we use directly what's exposed; - // k.Mul(k, fr.Vector(r.cosetTable)) - // r.Domain.FFT(k, fft.DIF) + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) diff --git a/ecc/bls12-377/fr/sis/sis_fft.go b/ecc/bls12-377/fr/sis/sis_fft.go index f4f4db3abb..a6731836e9 100644 --- a/ecc/bls12-377/fr/sis/sis_fft.go +++ b/ecc/bls12-377/fr/sis/sis_fft.go @@ -10,10 +10,10 @@ import ( "math/big" ) -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// fft64 unrolls an FFT with domain.Cardinality == 64 // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []fr.Element, twiddlesCoset []fr.Element) { +// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset +func fft64(a []fr.Element, twiddlesCoset []fr.Element) { a[32].Mul(&a[32], &twiddlesCoset[0]) a[33].Mul(&a[33], &twiddlesCoset[0]) @@ -401,9 +401,9 @@ func FFT64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[62], &a[63]) } -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table // it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { +func precomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { toReturn := make([]fr.Element, 63) var r, s fr.Element e := new(big.Int) diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index feff0fa40b..6c5a893a19 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -105,10 +105,13 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + // perf note: we have a dedicated path for 64, as it correspond to the parameters + // used by linea-monorepo prover with bls12377 curve. + // once the linea prover switches to smaller fields, this path can be removed. if r.Domain.Cardinality == 64 { - twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) r.smallFFT = func(p babybear.Vector) { - FFT64(p, twiddlesCoset) + fft64(p, twiddlesCoset) } } else { r.smallFFT = func(p babybear.Vector) { @@ -191,10 +194,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { return } - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - // for perf, we use directly what's exposed; - // k.Mul(k, fr.Vector(r.cosetTable)) - // r.Domain.FFT(k, fft.DIF) + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) diff --git a/field/babybear/sis/sis_fft.go b/field/babybear/sis/sis_fft.go index 0b30e84b20..b609f67415 100644 --- a/field/babybear/sis/sis_fft.go +++ b/field/babybear/sis/sis_fft.go @@ -10,10 +10,10 @@ import ( "math/big" ) -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// fft64 unrolls an FFT with domain.Cardinality == 64 // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []babybear.Element, twiddlesCoset []babybear.Element) { +// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset +func fft64(a []babybear.Element, twiddlesCoset []babybear.Element) { a[32].Mul(&a[32], &twiddlesCoset[0]) a[33].Mul(&a[33], &twiddlesCoset[0]) @@ -401,9 +401,9 @@ func FFT64(a []babybear.Element, twiddlesCoset []babybear.Element) { babybear.Butterfly(&a[62], &a[63]) } -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table // it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter babybear.Element) []babybear.Element { +func precomputeTwiddlesCoset(generator, shifter babybear.Element) []babybear.Element { toReturn := make([]babybear.Element, 63) var r, s babybear.Element e := new(big.Int) diff --git a/field/generator/internal/templates/sis/fft.go.tmpl b/field/generator/internal/templates/sis/fft.go.tmpl index 777eb8610a..5a7afde5e1 100644 --- a/field/generator/internal/templates/sis/fft.go.tmpl +++ b/field/generator/internal/templates/sis/fft.go.tmpl @@ -3,10 +3,10 @@ import ( "math/big" ) -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// fft64 unrolls an FFT with domain.Cardinality == 64 // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []{{ .FF }}.Element, twiddlesCoset []{{ .FF }}.Element) { +// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset +func fft64(a []{{ .FF }}.Element, twiddlesCoset []{{ .FF }}.Element) { {{- /* notes: this function can be updated with larger n @@ -47,9 +47,9 @@ func FFT64(a []{{ .FF }}.Element, twiddlesCoset []{{ .FF }}.Element) { {{- end}} } -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table // it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter {{ .FF }}.Element) []{{ .FF }}.Element { +func precomputeTwiddlesCoset(generator, shifter {{ .FF }}.Element) []{{ .FF }}.Element { toReturn := make([]{{ .FF }}.Element, 63) var r, s {{ .FF }}.Element e := new(big.Int) diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index c4ffe2ad66..1a38efea1d 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -105,10 +105,13 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + // perf note: we have a dedicated path for 64, as it correspond to the parameters + // used by linea-monorepo prover with bls12377 curve. + // once the linea prover switches to smaller fields, this path can be removed. if r.Domain.Cardinality == 64 { - twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) r.smallFFT = func(p {{ .FF }}.Vector) { - FFT64(p, twiddlesCoset) + fft64(p, twiddlesCoset) } } else { r.smallFFT = func(p {{ .FF }}.Vector) { @@ -192,10 +195,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { return } - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - // for perf, we use directly what's exposed; - // k.Mul(k, fr.Vector(r.cosetTable)) - // r.Domain.FFT(k, fft.DIF) + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 253552f278..08de0f5ab0 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -105,10 +105,13 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + // perf note: we have a dedicated path for 64, as it correspond to the parameters + // used by linea-monorepo prover with bls12377 curve. + // once the linea prover switches to smaller fields, this path can be removed. if r.Domain.Cardinality == 64 { - twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) r.smallFFT = func(p goldilocks.Vector) { - FFT64(p, twiddlesCoset) + fft64(p, twiddlesCoset) } } else { r.smallFFT = func(p goldilocks.Vector) { @@ -191,10 +194,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) return } - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - // for perf, we use directly what's exposed; - // k.Mul(k, fr.Vector(r.cosetTable)) - // r.Domain.FFT(k, fft.DIF) + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) diff --git a/field/goldilocks/sis/sis_fft.go b/field/goldilocks/sis/sis_fft.go index 321a84e905..f22f578847 100644 --- a/field/goldilocks/sis/sis_fft.go +++ b/field/goldilocks/sis/sis_fft.go @@ -10,10 +10,10 @@ import ( "math/big" ) -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// fft64 unrolls an FFT with domain.Cardinality == 64 // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { +// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset +func fft64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { a[32].Mul(&a[32], &twiddlesCoset[0]) a[33].Mul(&a[33], &twiddlesCoset[0]) @@ -401,9 +401,9 @@ func FFT64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { goldilocks.Butterfly(&a[62], &a[63]) } -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table // it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter goldilocks.Element) []goldilocks.Element { +func precomputeTwiddlesCoset(generator, shifter goldilocks.Element) []goldilocks.Element { toReturn := make([]goldilocks.Element, 63) var r, s goldilocks.Element e := new(big.Int) diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index fb36d12c97..39f0b9f63a 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -105,10 +105,13 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R return nil, err } + // perf note: we have a dedicated path for 64, as it correspond to the parameters + // used by linea-monorepo prover with bls12377 curve. + // once the linea prover switches to smaller fields, this path can be removed. if r.Domain.Cardinality == 64 { - twiddlesCoset := PrecomputeTwiddlesCoset(r.Domain.Generator, shift) + twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) r.smallFFT = func(p koalabear.Vector) { - FFT64(p, twiddlesCoset) + fft64(p, twiddlesCoset) } } else { r.smallFFT = func(p koalabear.Vector) { @@ -191,10 +194,8 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { return } - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - // for perf, we use directly what's exposed; - // k.Mul(k, fr.Vector(r.cosetTable)) - // r.Domain.FFT(k, fft.DIF) + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) mulModAcc(res, r.Ag[polId], k) diff --git a/field/koalabear/sis/sis_fft.go b/field/koalabear/sis/sis_fft.go index 7706135c9c..80e63acf11 100644 --- a/field/koalabear/sis/sis_fft.go +++ b/field/koalabear/sis/sis_fft.go @@ -10,10 +10,10 @@ import ( "math/big" ) -// FFT64 is generated by gnark-crypto and contains the unrolled code for FFT (DIF) on 64 elements +// fft64 unrolls an FFT with domain.Cardinality == 64 // equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see PrecomputeTwiddlesCoset -func FFT64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { +// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset +func fft64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { a[32].Mul(&a[32], &twiddlesCoset[0]) a[33].Mul(&a[33], &twiddlesCoset[0]) @@ -401,9 +401,9 @@ func FFT64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { koalabear.Butterfly(&a[62], &a[63]) } -// PrecomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table // it then return all elements in the correct order for the unrolled FFT. -func PrecomputeTwiddlesCoset(generator, shifter koalabear.Element) []koalabear.Element { +func precomputeTwiddlesCoset(generator, shifter koalabear.Element) []koalabear.Element { toReturn := make([]koalabear.Element, 63) var r, s koalabear.Element e := new(big.Int) From c21fcb5b37ee990e6c038597e4d33e7fb287970b Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Mon, 13 Jan 2025 19:03:19 +0000 Subject: [PATCH 20/25] test: restored unrolled FFT test --- ecc/bls12-377/fr/sis/sis_test.go | 29 +++++++++++++++++ field/babybear/sis/sis_test.go | 29 +++++++++++++++++ .../internal/templates/sis/sis.test.go.tmpl | 31 +++++++++++++++++++ field/goldilocks/sis/sis_test.go | 29 +++++++++++++++++ field/koalabear/sis/sis_test.go | 29 +++++++++++++++++ 5 files changed, 147 insertions(+) diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index 88dc2b7276..3f57ee2431 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -265,3 +265,32 @@ func benchmarkSIS(b *testing.B, input []fr.Element, sparse bool, logTwoBound, lo }) } + +func TestUnrolledFFT(t *testing.T) { + assert := require.New(t) + + var shift fr.Element + shift.SetRandom() + + const size = 64 + domain := fft.NewDomain(size, fft.WithShift(shift)) + + k1 := make([]fr.Element, size) + for i := 0; i < size; i++ { + k1[i].SetRandom() + } + k2 := make([]fr.Element, size) + copy(k2, k1) + + // default FFT + domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + + // unrolled FFT + twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + fft64(k2, twiddlesCoset) + + // compare results + for i := 0; i < size; i++ { + assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + } +} diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index 6d041515e7..cab86f4ea9 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -265,3 +265,32 @@ func benchmarkSIS(b *testing.B, input []babybear.Element, sparse bool, logTwoBou }) } + +func TestUnrolledFFT(t *testing.T) { + assert := require.New(t) + + var shift babybear.Element + shift.SetRandom() + + const size = 64 + domain := fft.NewDomain(size, fft.WithShift(shift)) + + k1 := make([]babybear.Element, size) + for i := 0; i < size; i++ { + k1[i].SetRandom() + } + k2 := make([]babybear.Element, size) + copy(k2, k1) + + // default FFT + domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + + // unrolled FFT + twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + fft64(k2, twiddlesCoset) + + // compare results + for i := 0; i < size; i++ { + assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + } +} diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 9233eaf850..48d985bdec 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -262,3 +262,34 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo }) } + + + +func TestUnrolledFFT(t *testing.T) { + assert := require.New(t) + + var shift {{ .FF }}.Element + shift.SetRandom() + + const size = 64 + domain := fft.NewDomain(size, fft.WithShift(shift)) + + k1 := make([]{{ .FF }}.Element, size) + for i := 0; i < size; i++ { + k1[i].SetRandom() + } + k2 := make([]{{ .FF }}.Element, size) + copy(k2, k1) + + // default FFT + domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + + // unrolled FFT + twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + fft64(k2, twiddlesCoset) + + // compare results + for i := 0; i < size; i++ { + assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + } +} diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 480cbb8c33..6c13d45602 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -265,3 +265,32 @@ func benchmarkSIS(b *testing.B, input []goldilocks.Element, sparse bool, logTwoB }) } + +func TestUnrolledFFT(t *testing.T) { + assert := require.New(t) + + var shift goldilocks.Element + shift.SetRandom() + + const size = 64 + domain := fft.NewDomain(size, fft.WithShift(shift)) + + k1 := make([]goldilocks.Element, size) + for i := 0; i < size; i++ { + k1[i].SetRandom() + } + k2 := make([]goldilocks.Element, size) + copy(k2, k1) + + // default FFT + domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + + // unrolled FFT + twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + fft64(k2, twiddlesCoset) + + // compare results + for i := 0; i < size; i++ { + assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + } +} diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index d05364dea6..33c8fb1d48 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -265,3 +265,32 @@ func benchmarkSIS(b *testing.B, input []koalabear.Element, sparse bool, logTwoBo }) } + +func TestUnrolledFFT(t *testing.T) { + assert := require.New(t) + + var shift koalabear.Element + shift.SetRandom() + + const size = 64 + domain := fft.NewDomain(size, fft.WithShift(shift)) + + k1 := make([]koalabear.Element, size) + for i := 0; i < size; i++ { + k1[i].SetRandom() + } + k2 := make([]koalabear.Element, size) + copy(k2, k1) + + // default FFT + domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + + // unrolled FFT + twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + fft64(k2, twiddlesCoset) + + // compare results + for i := 0; i < size; i++ { + assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + } +} From a71664b65ccca1d357a359a44d70186be08b6a59 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Mon, 13 Jan 2025 19:20:56 +0000 Subject: [PATCH 21/25] style: code cleaning --- ecc/bls12-377/fr/sis/sis.go | 34 +++++++---------- field/babybear/sis/sis.go | 34 +++++++---------- .../internal/templates/sis/sis.go.tmpl | 37 +++++++------------ field/goldilocks/sis/sis.go | 34 +++++++---------- field/koalabear/sis/sis.go | 34 +++++++---------- 5 files changed, 65 insertions(+), 108 deletions(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 456a7916b7..2fa03fd1b7 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -119,7 +119,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r.Domain.FFT(p, fft.DIF) } } - // filling A a := make([]fr.Element, n*r.Degree) ag := make([]fr.Element, n*r.Degree) @@ -135,7 +134,8 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(r.Ag[i]) } }) @@ -198,20 +198,11 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) - mulModAcc(res, r.Ag[polId], k) -} - -// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -// then accumulates the mulMod result in res. -// qLagrangeCosetBitReversed and res are mutated. -// pLagrangeCosetBitReversed is not mutated. -func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed fr.Vector) { - qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) - res.Add(res, qLagrangeCosetBitReversed) + // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. + // k and r.Ag[polId] are in evaluation form on √(g) * + // we accumulate the result in res; the FFT inverse is done once every multiplications are done. + k.Mul(k, fr.Vector(r.Ag[polId])) + res.Add(res, k) } func deriveRandomElementFromSeed(seed, i, j int64) fr.Element { @@ -236,15 +227,18 @@ type ElementIterator interface { Next() (fr.Element, bool) } +// VectorIterator iterates over a vector of field element. type VectorIterator struct { v fr.Vector i int } +// NewVectorIterator creates a new VectorIterator func NewVectorIterator(v fr.Vector) *VectorIterator { return &VectorIterator{v: v} } +// Next returns the next element of the vector. func (vi *VectorIterator) Next() (fr.Element, bool) { if vi.i == len(vi.v) { return fr.Element{}, false @@ -253,7 +247,7 @@ func (vi *VectorIterator) Next() (fr.Element, bool) { return vi.v[vi.i-1], true } -// LimbIterator iterates over a vector of field element, limb by limb. +// LimbIterator iterates over a stream of field elements, limb by limb. type LimbIterator struct { it ElementIterator buf [fr.Bytes]byte @@ -264,8 +258,7 @@ type LimbIterator struct { } // NewLimbIterator creates a new LimbIterator -// v: the vector to read -// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// it is an iterator over a stream of field elements // The elements are interpreted in little endian. // The limb is also in little endian. func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { @@ -291,8 +284,6 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { } // NextLimb returns the next limb of the vector. -// This does not perform any bound check, may trigger an out of bound panic. -// If underlying vector is "out of limb" func (vr *LimbIterator) NextLimb() (uint64, bool) { if vr.j == fr.Bytes { next, ok := vr.it.Next() @@ -305,6 +296,7 @@ func (vr *LimbIterator) NextLimb() (uint64, bool) { return vr.next(vr.buf[:], &vr.j), true } +// Reset resets the iterator with a new ElementIterator. func (vr *LimbIterator) Reset(it ElementIterator) { vr.it = it vr.j = fr.Bytes diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 6c5a893a19..2f3f2b3dd0 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -119,7 +119,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r.Domain.FFT(p, fft.DIF) } } - // filling A a := make([]babybear.Element, n*r.Degree) ag := make([]babybear.Element, n*r.Degree) @@ -135,7 +134,8 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(r.Ag[i]) } }) @@ -198,20 +198,11 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) - mulModAcc(res, r.Ag[polId], k) -} - -// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -// then accumulates the mulMod result in res. -// qLagrangeCosetBitReversed and res are mutated. -// pLagrangeCosetBitReversed is not mutated. -func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed babybear.Vector) { - qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) - res.Add(res, qLagrangeCosetBitReversed) + // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. + // k and r.Ag[polId] are in evaluation form on √(g) * + // we accumulate the result in res; the FFT inverse is done once every multiplications are done. + k.Mul(k, babybear.Vector(r.Ag[polId])) + res.Add(res, k) } func deriveRandomElementFromSeed(seed, i, j int64) babybear.Element { @@ -236,15 +227,18 @@ type ElementIterator interface { Next() (babybear.Element, bool) } +// VectorIterator iterates over a vector of field element. type VectorIterator struct { v babybear.Vector i int } +// NewVectorIterator creates a new VectorIterator func NewVectorIterator(v babybear.Vector) *VectorIterator { return &VectorIterator{v: v} } +// Next returns the next element of the vector. func (vi *VectorIterator) Next() (babybear.Element, bool) { if vi.i == len(vi.v) { return babybear.Element{}, false @@ -253,7 +247,7 @@ func (vi *VectorIterator) Next() (babybear.Element, bool) { return vi.v[vi.i-1], true } -// LimbIterator iterates over a vector of field element, limb by limb. +// LimbIterator iterates over a stream of field elements, limb by limb. type LimbIterator struct { it ElementIterator buf [babybear.Bytes]byte @@ -264,8 +258,7 @@ type LimbIterator struct { } // NewLimbIterator creates a new LimbIterator -// v: the vector to read -// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// it is an iterator over a stream of field elements // The elements are interpreted in little endian. // The limb is also in little endian. func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { @@ -287,8 +280,6 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { } // NextLimb returns the next limb of the vector. -// This does not perform any bound check, may trigger an out of bound panic. -// If underlying vector is "out of limb" func (vr *LimbIterator) NextLimb() (uint32, bool) { if vr.j == babybear.Bytes { next, ok := vr.it.Next() @@ -301,6 +292,7 @@ func (vr *LimbIterator) NextLimb() (uint32, bool) { return vr.next(vr.buf[:], &vr.j), true } +// Reset resets the iterator with a new ElementIterator. func (vr *LimbIterator) Reset(it ElementIterator) { vr.it = it vr.j = babybear.Bytes diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index 1a38efea1d..6f2269190b 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -119,8 +119,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r.Domain.FFT(p, fft.DIF) } } - - // filling A a := make([]{{ .FF }}.Element, n*r.Degree) ag := make([]{{ .FF }}.Element, n*r.Degree) @@ -136,7 +134,8 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(r.Ag[i]) } }) @@ -198,24 +197,14 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { // this is equivalent to: // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) - - mulModAcc(res, r.Ag[polId], k) -} -// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -// then accumulates the mulMod result in res. -// qLagrangeCosetBitReversed and res are mutated. -// pLagrangeCosetBitReversed is not mutated. -func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed {{.FF}}.Vector) { - qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) - res.Add(res, qLagrangeCosetBitReversed) + // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. + // k and r.Ag[polId] are in evaluation form on √(g) * + // we accumulate the result in res; the FFT inverse is done once every multiplications are done. + k.Mul(k, {{.FF}}.Vector(r.Ag[polId])) + res.Add(res, k) } - func deriveRandomElementFromSeed(seed, i, j int64) {{ .FF }}.Element { var buf [3 + 3*8]byte copy(buf[:3], "SIS") @@ -239,15 +228,18 @@ type ElementIterator interface { Next() ({{ .FF }}.Element, bool) } +// VectorIterator iterates over a vector of field element. type VectorIterator struct { v {{ .FF }}.Vector i int } +// NewVectorIterator creates a new VectorIterator func NewVectorIterator(v {{ .FF }}.Vector) *VectorIterator { return &VectorIterator{v: v} } +// Next returns the next element of the vector. func (vi *VectorIterator) Next() ({{ .FF }}.Element, bool) { if vi.i == len(vi.v) { return {{ .FF }}.Element{}, false @@ -258,7 +250,7 @@ func (vi *VectorIterator) Next() ({{ .FF }}.Element, bool) { -// LimbIterator iterates over a vector of field element, limb by limb. +// LimbIterator iterates over a stream of field elements, limb by limb. type LimbIterator struct { it ElementIterator buf [{{ .FF }}.Bytes]byte @@ -269,8 +261,7 @@ type LimbIterator struct { } // NewLimbIterator creates a new LimbIterator -// v: the vector to read -// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// it is an iterator over a stream of field elements // The elements are interpreted in little endian. // The limb is also in little endian. func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { @@ -297,8 +288,6 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { } // NextLimb returns the next limb of the vector. -// This does not perform any bound check, may trigger an out of bound panic. -// If underlying vector is "out of limb" func (vr *LimbIterator) NextLimb() ({{$tReturn}}, bool) { if vr.j == {{ .FF }}.Bytes { next, ok := vr.it.Next() @@ -311,13 +300,13 @@ func (vr *LimbIterator) NextLimb() ({{$tReturn}}, bool) { return vr.next(vr.buf[:], &vr.j), true } +// Reset resets the iterator with a new ElementIterator. func (vr *LimbIterator) Reset(it ElementIterator) { vr.it = it vr.j = {{ .FF }}.Bytes } - func nextUint8(buf []byte, pos *int) {{$tReturn}} { r := {{$tReturn}} (buf[*pos]) *pos++ diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index 08de0f5ab0..c810ffffd1 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -119,7 +119,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r.Domain.FFT(p, fft.DIF) } } - // filling A a := make([]goldilocks.Element, n*r.Degree) ag := make([]goldilocks.Element, n*r.Degree) @@ -135,7 +134,8 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(r.Ag[i]) } }) @@ -198,20 +198,11 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) - mulModAcc(res, r.Ag[polId], k) -} - -// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -// then accumulates the mulMod result in res. -// qLagrangeCosetBitReversed and res are mutated. -// pLagrangeCosetBitReversed is not mutated. -func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed goldilocks.Vector) { - qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) - res.Add(res, qLagrangeCosetBitReversed) + // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. + // k and r.Ag[polId] are in evaluation form on √(g) * + // we accumulate the result in res; the FFT inverse is done once every multiplications are done. + k.Mul(k, goldilocks.Vector(r.Ag[polId])) + res.Add(res, k) } func deriveRandomElementFromSeed(seed, i, j int64) goldilocks.Element { @@ -236,15 +227,18 @@ type ElementIterator interface { Next() (goldilocks.Element, bool) } +// VectorIterator iterates over a vector of field element. type VectorIterator struct { v goldilocks.Vector i int } +// NewVectorIterator creates a new VectorIterator func NewVectorIterator(v goldilocks.Vector) *VectorIterator { return &VectorIterator{v: v} } +// Next returns the next element of the vector. func (vi *VectorIterator) Next() (goldilocks.Element, bool) { if vi.i == len(vi.v) { return goldilocks.Element{}, false @@ -253,7 +247,7 @@ func (vi *VectorIterator) Next() (goldilocks.Element, bool) { return vi.v[vi.i-1], true } -// LimbIterator iterates over a vector of field element, limb by limb. +// LimbIterator iterates over a stream of field elements, limb by limb. type LimbIterator struct { it ElementIterator buf [goldilocks.Bytes]byte @@ -264,8 +258,7 @@ type LimbIterator struct { } // NewLimbIterator creates a new LimbIterator -// v: the vector to read -// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// it is an iterator over a stream of field elements // The elements are interpreted in little endian. // The limb is also in little endian. func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { @@ -291,8 +284,6 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { } // NextLimb returns the next limb of the vector. -// This does not perform any bound check, may trigger an out of bound panic. -// If underlying vector is "out of limb" func (vr *LimbIterator) NextLimb() (uint64, bool) { if vr.j == goldilocks.Bytes { next, ok := vr.it.Next() @@ -305,6 +296,7 @@ func (vr *LimbIterator) NextLimb() (uint64, bool) { return vr.next(vr.buf[:], &vr.j), true } +// Reset resets the iterator with a new ElementIterator. func (vr *LimbIterator) Reset(it ElementIterator) { vr.it = it vr.j = goldilocks.Bytes diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index 39f0b9f63a..ec5364b58e 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -119,7 +119,6 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R r.Domain.FFT(p, fft.DIF) } } - // filling A a := make([]koalabear.Element, n*r.Degree) ag := make([]koalabear.Element, n*r.Degree) @@ -135,7 +134,8 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(r.Ag[i]) } }) @@ -198,20 +198,11 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) r.smallFFT(k) - mulModAcc(res, r.Ag[polId], k) -} - -// mulModAcc computes p * q in ℤ_{p}[X]/Xᵈ+1. -// Is assumed that pLagrangeShifted and qLagrangeShifted are of the correct sizes -// and that they are in evaluation form on √(g) * -// The result is not FFTinversed. The fft inverse is done once every -// multiplications are done. -// then accumulates the mulMod result in res. -// qLagrangeCosetBitReversed and res are mutated. -// pLagrangeCosetBitReversed is not mutated. -func mulModAcc(res, pLagrangeCosetBitReversed, qLagrangeCosetBitReversed koalabear.Vector) { - qLagrangeCosetBitReversed.Mul(qLagrangeCosetBitReversed, pLagrangeCosetBitReversed) - res.Add(res, qLagrangeCosetBitReversed) + // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. + // k and r.Ag[polId] are in evaluation form on √(g) * + // we accumulate the result in res; the FFT inverse is done once every multiplications are done. + k.Mul(k, koalabear.Vector(r.Ag[polId])) + res.Add(res, k) } func deriveRandomElementFromSeed(seed, i, j int64) koalabear.Element { @@ -236,15 +227,18 @@ type ElementIterator interface { Next() (koalabear.Element, bool) } +// VectorIterator iterates over a vector of field element. type VectorIterator struct { v koalabear.Vector i int } +// NewVectorIterator creates a new VectorIterator func NewVectorIterator(v koalabear.Vector) *VectorIterator { return &VectorIterator{v: v} } +// Next returns the next element of the vector. func (vi *VectorIterator) Next() (koalabear.Element, bool) { if vi.i == len(vi.v) { return koalabear.Element{}, false @@ -253,7 +247,7 @@ func (vi *VectorIterator) Next() (koalabear.Element, bool) { return vi.v[vi.i-1], true } -// LimbIterator iterates over a vector of field element, limb by limb. +// LimbIterator iterates over a stream of field elements, limb by limb. type LimbIterator struct { it ElementIterator buf [koalabear.Bytes]byte @@ -264,8 +258,7 @@ type LimbIterator struct { } // NewLimbIterator creates a new LimbIterator -// v: the vector to read -// limbSize: the size of the limb in bytes (1, 2, 4 or 8) +// it is an iterator over a stream of field elements // The elements are interpreted in little endian. // The limb is also in little endian. func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { @@ -287,8 +280,6 @@ func NewLimbIterator(it ElementIterator, limbSize int) *LimbIterator { } // NextLimb returns the next limb of the vector. -// This does not perform any bound check, may trigger an out of bound panic. -// If underlying vector is "out of limb" func (vr *LimbIterator) NextLimb() (uint32, bool) { if vr.j == koalabear.Bytes { next, ok := vr.it.Next() @@ -301,6 +292,7 @@ func (vr *LimbIterator) NextLimb() (uint32, bool) { return vr.next(vr.buf[:], &vr.j), true } +// Reset resets the iterator with a new ElementIterator. func (vr *LimbIterator) Reset(it ElementIterator) { vr.it = it vr.j = koalabear.Bytes From 5ed9c488ecd1d96ccf9e7a8b56a289a89cb0f2ff Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Mon, 13 Jan 2025 19:23:55 +0000 Subject: [PATCH 22/25] refactor: remove unrelated fft changes --- ecc/bls12-377/fr/fft/fft.go | 75 +++---------------- ecc/bls12-381/fr/fft/fft.go | 75 +++---------------- ecc/bls24-315/fr/fft/fft.go | 75 +++---------------- ecc/bls24-317/fr/fft/fft.go | 75 +++---------------- ecc/bn254/fr/fft/fft.go | 75 +++---------------- ecc/bw6-633/fr/fft/fft.go | 75 +++---------------- ecc/bw6-761/fr/fft/fft.go | 75 +++---------------- field/babybear/fft/fft.go | 75 +++---------------- .../internal/templates/fft/fft.go.tmpl | 37 +++------ field/goldilocks/fft/fft.go | 75 +++---------------- field/koalabear/fft/fft.go | 75 +++---------------- 11 files changed, 121 insertions(+), 666 deletions(-) diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index 9ab74a303b..9d9801f0ab 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index 17f7023b91..5e5c18c38b 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index 8630671963..f70fe454a5 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 42efabd2ff..664b50ba47 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 4def2f70d5..3042d342c3 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 51f49f4836..02cbc1d8e1 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index a11ce4eeef..cc8f7fb631 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := fr.Vector(a[start+m : end+m]) - v2 := fr.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := fr.Vector(a[start+m:end+m]) + // v2 := fr.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - fr.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index 6ad4e6f884..aaf0d41a6f 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []babybear.Element, twiddles []babybear.Element, sta } for i := start; i < end; i++ { babybear.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := babybear.Vector(a[start+m : end+m]) - v2 := babybear.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := babybear.Vector(a[start+m:end+m]) + // v2 := babybear.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []babybear.Element, at, w babybear.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - babybear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index c239e9701b..e1fee64a28 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,9 +11,6 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} -{{ $sizeKernel2Log2 := 6}} -{{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} - // Decimation is used in the FFT call to select decimation in time or in frequency type Decimation uint8 @@ -203,15 +200,9 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == {{$sizeKernel}} { - kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return - } else if n == {{$sizeKernel2}} { - kerDIFNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { + kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -268,11 +259,12 @@ func innerDIFWithTwiddles(a []{{ .FF }}.Element, twiddles []{{ .FF }}.Element, s } for i := start; i < end; i++ { {{ .FF }}.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := {{ .FF }}.Vector(a[start+m:end+m]) - v2 := {{ .FF }}.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := {{ .FF }}.Vector(a[start+m:end+m]) + // v2 := {{ .FF }}.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, start, end, m int) { @@ -295,15 +287,9 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == {{$sizeKernel2}} { - kerDITNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) - return - } else if n == {{$sizeKernel}} { - kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { + kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -380,7 +366,6 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta } {{genKernel $.FF $sizeKernel $sizeKernelLog2}} -{{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} {{define "genKernel FF sizeKernel sizeKernelLog2"}} diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 140d40be8b..74e6aeb0ff 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []goldilocks.Element, twiddles []goldilocks.Element, } for i := start; i < end; i++ { goldilocks.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := goldilocks.Vector(a[start+m : end+m]) - v2 := goldilocks.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := goldilocks.Vector(a[start+m:end+m]) + // v2 := goldilocks.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []goldilocks.Element, at, w goldilocks.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - goldilocks.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index 850876deab..c502976b8e 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -200,15 +200,9 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 256 { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 64 { - kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -265,11 +259,12 @@ func innerDIFWithTwiddles(a []koalabear.Element, twiddles []koalabear.Element, s } for i := start; i < end; i++ { koalabear.Butterfly(&a[i], &a[i+m]) + a[i+m].Mul(&a[i+m], &twiddles[i]) } - // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - v1 := koalabear.Vector(a[start+m : end+m]) - v2 := koalabear.Vector(twiddles[start:end]) - v1.Mul(v1, v2) + // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + // v1 := koalabear.Vector(a[start+m:end+m]) + // v2 := koalabear.Vector(twiddles[start:end]) + // v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []koalabear.Element, at, w koalabear.Element, start, end, m int) { @@ -291,15 +286,9 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if stage >= twiddlesStartStage { - if n == 64 { - kerDITNP_64(a, twiddles, stage-twiddlesStartStage) - return - } else if n == 256 { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return - } - + } else if n == 256 && stage >= twiddlesStartStage { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return } m := n >> 1 @@ -427,45 +416,3 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } - -func kerDIFNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) - for offset := 0; offset < 64; offset += 32 { - innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - for offset := 0; offset < 64; offset += 16 { - innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 8 { - innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 4 { - innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } -} - -func kerDITNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { - // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl - - for offset := 0; offset < 64; offset += 2 { - koalabear.Butterfly(&a[offset], &a[offset+1]) - } - for offset := 0; offset < 64; offset += 4 { - innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) - } - for offset := 0; offset < 64; offset += 8 { - innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) - } - for offset := 0; offset < 64; offset += 16 { - innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) - } - for offset := 0; offset < 64; offset += 32 { - innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) - } - innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) -} From 3e3505b2f27d65dfecfe875d7899845a7c9f4645 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Mon, 13 Jan 2025 19:31:28 +0000 Subject: [PATCH 23/25] Revert "refactor: remove unrelated fft changes" This reverts commit 5ed9c488ecd1d96ccf9e7a8b56a289a89cb0f2ff. --- ecc/bls12-377/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bls12-381/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bls24-315/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bls24-317/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bn254/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bw6-633/fr/fft/fft.go | 75 ++++++++++++++++--- ecc/bw6-761/fr/fft/fft.go | 75 ++++++++++++++++--- field/babybear/fft/fft.go | 75 ++++++++++++++++--- .../internal/templates/fft/fft.go.tmpl | 37 ++++++--- field/goldilocks/fft/fft.go | 75 ++++++++++++++++--- field/koalabear/fft/fft.go | 75 ++++++++++++++++--- 11 files changed, 666 insertions(+), 121 deletions(-) diff --git a/ecc/bls12-377/fr/fft/fft.go b/ecc/bls12-377/fr/fft/fft.go index 9d9801f0ab..9ab74a303b 100644 --- a/ecc/bls12-377/fr/fft/fft.go +++ b/ecc/bls12-377/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls12-381/fr/fft/fft.go b/ecc/bls12-381/fr/fft/fft.go index 5e5c18c38b..17f7023b91 100644 --- a/ecc/bls12-381/fr/fft/fft.go +++ b/ecc/bls12-381/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls24-315/fr/fft/fft.go b/ecc/bls24-315/fr/fft/fft.go index f70fe454a5..8630671963 100644 --- a/ecc/bls24-315/fr/fft/fft.go +++ b/ecc/bls24-315/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bls24-317/fr/fft/fft.go b/ecc/bls24-317/fr/fft/fft.go index 664b50ba47..42efabd2ff 100644 --- a/ecc/bls24-317/fr/fft/fft.go +++ b/ecc/bls24-317/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bn254/fr/fft/fft.go b/ecc/bn254/fr/fft/fft.go index 3042d342c3..4def2f70d5 100644 --- a/ecc/bn254/fr/fft/fft.go +++ b/ecc/bn254/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bw6-633/fr/fft/fft.go b/ecc/bw6-633/fr/fft/fft.go index 02cbc1d8e1..51f49f4836 100644 --- a/ecc/bw6-633/fr/fft/fft.go +++ b/ecc/bw6-633/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/ecc/bw6-761/fr/fft/fft.go b/ecc/bw6-761/fr/fft/fft.go index cc8f7fb631..a11ce4eeef 100644 --- a/ecc/bw6-761/fr/fft/fft.go +++ b/ecc/bw6-761/fr/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []fr.Element, twiddles []fr.Element, start, end, m i } for i := start; i < end; i++ { fr.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := fr.Vector(a[start+m:end+m]) - // v2 := fr.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := fr.Vector(a[start+m : end+m]) + v2 := fr.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []fr.Element, at, w fr.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []fr.Element, w fr.Element, twiddles [][]fr.Element, twiddlesStart n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []fr.Element, twiddles [][]fr.Element, stage int) { } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []fr.Element, twiddles [][]fr.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + fr.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/babybear/fft/fft.go b/field/babybear/fft/fft.go index aaf0d41a6f..6ad4e6f884 100644 --- a/field/babybear/fft/fft.go +++ b/field/babybear/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []babybear.Element, twiddles []babybear.Element, sta } for i := start; i < end; i++ { babybear.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := babybear.Vector(a[start+m:end+m]) - // v2 := babybear.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := babybear.Vector(a[start+m : end+m]) + v2 := babybear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []babybear.Element, at, w babybear.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []babybear.Element, w babybear.Element, twiddles [][]babybear.Elem n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []babybear.Element, twiddles [][]babybear.Element, stage int } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []babybear.Element, twiddles [][]babybear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + babybear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/generator/internal/templates/fft/fft.go.tmpl b/field/generator/internal/templates/fft/fft.go.tmpl index e1fee64a28..c239e9701b 100644 --- a/field/generator/internal/templates/fft/fft.go.tmpl +++ b/field/generator/internal/templates/fft/fft.go.tmpl @@ -11,6 +11,9 @@ import ( {{ $sizeKernelLog2 := 8}} {{ $sizeKernel := shl 1 $sizeKernelLog2}} +{{ $sizeKernel2Log2 := 6}} +{{ $sizeKernel2 := shl 1 $sizeKernel2Log2}} + // Decimation is used in the FFT call to select decimation in time or in frequency type Decimation uint8 @@ -200,9 +203,15 @@ func difFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel}} { + kerDIFNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel2}} { + kerDIFNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +268,11 @@ func innerDIFWithTwiddles(a []{{ .FF }}.Element, twiddles []{{ .FF }}.Element, s } for i := start; i < end; i++ { {{ .FF }}.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := {{ .FF }}.Vector(a[start+m:end+m]) - // v2 := {{ .FF }}.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := {{ .FF }}.Vector(a[start+m:end+m]) + v2 := {{ .FF }}.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, start, end, m int) { @@ -287,9 +295,15 @@ func ditFFT(a []{{ .FF }}.Element, w {{ .FF }}.Element, twiddles [][]{{ .FF }}.E n := len(a) if n == 1 { return - } else if n == {{$sizeKernel}} && stage >= twiddlesStartStage { - kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == {{$sizeKernel2}} { + kerDITNP_{{$sizeKernel2}}(a, twiddles, stage-twiddlesStartStage) + return + } else if n == {{$sizeKernel}} { + kerDITNP_{{$sizeKernel}}(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -366,6 +380,7 @@ func innerDITWithoutTwiddles(a []{{ .FF }}.Element, at, w {{ .FF }}.Element, sta } {{genKernel $.FF $sizeKernel $sizeKernelLog2}} +{{genKernel $.FF $sizeKernel2 $sizeKernel2Log2}} {{define "genKernel FF sizeKernel sizeKernelLog2"}} diff --git a/field/goldilocks/fft/fft.go b/field/goldilocks/fft/fft.go index 74e6aeb0ff..140d40be8b 100644 --- a/field/goldilocks/fft/fft.go +++ b/field/goldilocks/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []goldilocks.Element, twiddles []goldilocks.Element, } for i := start; i < end; i++ { goldilocks.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := goldilocks.Vector(a[start+m:end+m]) - // v2 := goldilocks.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := goldilocks.Vector(a[start+m : end+m]) + v2 := goldilocks.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []goldilocks.Element, at, w goldilocks.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []goldilocks.Element, w goldilocks.Element, twiddles [][]goldilock n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []goldilocks.Element, twiddles [][]goldilocks.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + goldilocks.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} diff --git a/field/koalabear/fft/fft.go b/field/koalabear/fft/fft.go index c502976b8e..850876deab 100644 --- a/field/koalabear/fft/fft.go +++ b/field/koalabear/fft/fft.go @@ -200,9 +200,15 @@ func difFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 256 { + kerDIFNP_256(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 64 { + kerDIFNP_64(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -259,12 +265,11 @@ func innerDIFWithTwiddles(a []koalabear.Element, twiddles []koalabear.Element, s } for i := start; i < end; i++ { koalabear.Butterfly(&a[i], &a[i+m]) - a[i+m].Mul(&a[i+m], &twiddles[i]) } - // // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q - // v1 := koalabear.Vector(a[start+m:end+m]) - // v2 := koalabear.Vector(twiddles[start:end]) - // v1.Mul(v1, v2) + // TODO @gbotrel: here the butterfly for most cases could leave the result not reduced mod q + v1 := koalabear.Vector(a[start+m : end+m]) + v2 := koalabear.Vector(twiddles[start:end]) + v1.Mul(v1, v2) } func innerDIFWithoutTwiddles(a []koalabear.Element, at, w koalabear.Element, start, end, m int) { @@ -286,9 +291,15 @@ func ditFFT(a []koalabear.Element, w koalabear.Element, twiddles [][]koalabear.E n := len(a) if n == 1 { return - } else if n == 256 && stage >= twiddlesStartStage { - kerDITNP_256(a, twiddles, stage-twiddlesStartStage) - return + } else if stage >= twiddlesStartStage { + if n == 64 { + kerDITNP_64(a, twiddles, stage-twiddlesStartStage) + return + } else if n == 256 { + kerDITNP_256(a, twiddles, stage-twiddlesStartStage) + return + } + } m := n >> 1 @@ -416,3 +427,45 @@ func kerDITNP_256(a []koalabear.Element, twiddles [][]koalabear.Element, stage i } innerDITWithTwiddles(a[:256], twiddles[stage+0], 0, 128, 128) } + +func kerDIFNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + innerDIFWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) + for offset := 0; offset < 64; offset += 32 { + innerDIFWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + for offset := 0; offset < 64; offset += 16 { + innerDIFWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 8 { + innerDIFWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 4 { + innerDIFWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } +} + +func kerDITNP_64(a []koalabear.Element, twiddles [][]koalabear.Element, stage int) { + // code unrolled & generated by internal/generator/fft/template/fft.go.tmpl + + for offset := 0; offset < 64; offset += 2 { + koalabear.Butterfly(&a[offset], &a[offset+1]) + } + for offset := 0; offset < 64; offset += 4 { + innerDITWithTwiddles(a[offset:offset+4], twiddles[stage+4], 0, 2, 2) + } + for offset := 0; offset < 64; offset += 8 { + innerDITWithTwiddles(a[offset:offset+8], twiddles[stage+3], 0, 4, 4) + } + for offset := 0; offset < 64; offset += 16 { + innerDITWithTwiddles(a[offset:offset+16], twiddles[stage+2], 0, 8, 8) + } + for offset := 0; offset < 64; offset += 32 { + innerDITWithTwiddles(a[offset:offset+32], twiddles[stage+1], 0, 16, 16) + } + innerDITWithTwiddles(a[:64], twiddles[stage+0], 0, 32, 32) +} From eda21ee58300657586b6454a5a262b2098bddab5 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 14 Jan 2025 02:31:02 +0000 Subject: [PATCH 24/25] refactor: ports unrolled fft from linea sis --- ecc/bls12-377/fr/sis/sis.go | 53 +- ecc/bls12-377/fr/sis/sis_fft.go | 5720 ++++++++++++++++- ecc/bls12-377/fr/sis/sis_test.go | 54 +- field/babybear/sis/sis.go | 42 +- field/babybear/sis/sis_fft.go | 556 -- field/babybear/sis/sis_test.go | 29 - field/generator/generator_sis.go | 179 +- .../internal/templates/sis/fft.go.tmpl | 58 +- .../internal/templates/sis/sis.go.tmpl | 74 +- .../internal/templates/sis/sis.test.go.tmpl | 62 +- field/goldilocks/sis/sis.go | 42 +- field/goldilocks/sis/sis_fft.go | 556 -- field/goldilocks/sis/sis_test.go | 29 - field/koalabear/sis/sis.go | 42 +- field/koalabear/sis/sis_fft.go | 556 -- field/koalabear/sis/sis_test.go | 29 - 16 files changed, 5722 insertions(+), 2359 deletions(-) delete mode 100644 field/babybear/sis/sis_fft.go delete mode 100644 field/goldilocks/sis/sis_fft.go delete mode 100644 field/koalabear/sis/sis_fft.go diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 2fa03fd1b7..36b62a9294 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -37,9 +37,9 @@ type RSis struct { Domain *fft.Domain maxNbElementsToHash int + smallFFT func(k fr.Vector, mask uint64) - smallFFT func(fr.Vector) - cosetTable []fr.Element // used in conjunction with the smallFFT; + kz fr.Vector // zeroes used to zeroize the limbs buffer faster. } // NewRSis creates an instance of RSis. @@ -97,28 +97,30 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]fr.Element, n), Ag: make([][]fr.Element, n), + kz: make(fr.Vector, degree), maxNbElementsToHash: maxNbElementsToHash, } - - r.cosetTable, err = r.Domain.CosetTable() - if err != nil { - return nil, err - } - - // perf note: we have a dedicated path for 64, as it correspond to the parameters - // used by linea-monorepo prover with bls12377 curve. - // once the linea prover switches to smaller fields, this path can be removed. - if r.Domain.Cardinality == 64 { + if r.Degree == 64 { + // precompute twiddles for the unrolled FFT twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(p fr.Vector) { - fft64(p, twiddlesCoset) + r.smallFFT = func(k fr.Vector, mask uint64) { + if mask == ^uint64(0) { + mask = uint64(len(partialFFT_64) - 1) + } + partialFFT_64[mask](k, twiddlesCoset) } } else { - r.smallFFT = func(p fr.Vector) { - p.Mul(p, fr.Vector(r.cosetTable)) - r.Domain.FFT(p, fft.DIF) + cosetTable, err := r.Domain.CosetTable() + if err != nil { + return nil, err + } + + r.smallFFT = func(k fr.Vector, mask uint64) { + k.Mul(k, fr.Vector(cosetTable)) + r.Domain.FFT(k, fft.DIF) } } + // filling A a := make([]fr.Element, n*r.Degree) ag := make([]fr.Element, n*r.Degree) @@ -134,8 +136,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(r.Ag[i]) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -163,7 +164,7 @@ func (r *RSis) Hash(v, res []fr.Element) error { // inner hash it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) for i := 0; i < len(r.Ag); i++ { - r.InnerHash(it, res, k, i) + r.InnerHash(it, res, k, r.kz, i, ^uint64(0)) } // reduces mod Xᵈ+1 @@ -172,20 +173,15 @@ func (r *RSis) Hash(v, res []fr.Element) error { return nil } -func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { +func (r *RSis) InnerHash(it *LimbIterator, res, k, kz fr.Vector, polId int, mask uint64) { + copy(k, kz) zero := uint64(0) for j := 0; j < r.Degree; j++ { l, ok := it.NextLimb() if !ok { - // we need to pad; note that we should use a deterministic padding - // other than 0, but it is not an issue for the current use cases. - for m := j; m < r.Degree; m++ { - k[m].SetZero() - } break } zero |= l - k[j].SetZero() k[j][0] = l } if zero == 0 { @@ -193,10 +189,9 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k fr.Vector, polId int) { // we can skip this, FFT(0) = 0 return } - // this is equivalent to: // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(k) + r.smallFFT(k, mask) // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. // k and r.Ag[polId] are in evaluation form on √(g) * diff --git a/ecc/bls12-377/fr/sis/sis_fft.go b/ecc/bls12-377/fr/sis/sis_fft.go index a6731836e9..e7d86e8f2d 100644 --- a/ecc/bls12-377/fr/sis/sis_fft.go +++ b/ecc/bls12-377/fr/sis/sis_fft.go @@ -10,43 +10,5217 @@ import ( "math/big" ) -// fft64 unrolls an FFT with domain.Cardinality == 64 -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset -func fft64(a []fr.Element, twiddlesCoset []fr.Element) { +// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table +// it then return all elements in the correct order for the unrolled FFT. +func precomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { + toReturn := make([]fr.Element, 63) + var r, s fr.Element + e := new(big.Int) + + s = shifter + for k := 0; k < 5; k++ { + s.Square(&s) + } + toReturn[0] = s + s = shifter + for k := 0; k < 4; k++ { + s.Square(&s) + } + toReturn[1] = s + r.Exp(generator, e.SetUint64(uint64(1<<4*1))) + toReturn[2].Mul(&r, &s) + s = shifter + for k := 0; k < 3; k++ { + s.Square(&s) + } + toReturn[3] = s + r.Exp(generator, e.SetUint64(uint64(1<<3*2))) + toReturn[4].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*1))) + toReturn[5].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<3*3))) + toReturn[6].Mul(&r, &s) + s = shifter + for k := 0; k < 2; k++ { + s.Square(&s) + } + toReturn[7] = s + r.Exp(generator, e.SetUint64(uint64(1<<2*4))) + toReturn[8].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*2))) + toReturn[9].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*6))) + toReturn[10].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*1))) + toReturn[11].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*5))) + toReturn[12].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*3))) + toReturn[13].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<2*7))) + toReturn[14].Mul(&r, &s) + s = shifter + for k := 0; k < 1; k++ { + s.Square(&s) + } + toReturn[15] = s + r.Exp(generator, e.SetUint64(uint64(1<<1*8))) + toReturn[16].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*4))) + toReturn[17].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*12))) + toReturn[18].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*2))) + toReturn[19].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*10))) + toReturn[20].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*6))) + toReturn[21].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*14))) + toReturn[22].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*1))) + toReturn[23].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*9))) + toReturn[24].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*5))) + toReturn[25].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*13))) + toReturn[26].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*3))) + toReturn[27].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*11))) + toReturn[28].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*7))) + toReturn[29].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<1*15))) + toReturn[30].Mul(&r, &s) + s = shifter + for k := 0; k < 0; k++ { + s.Square(&s) + } + toReturn[31] = s + r.Exp(generator, e.SetUint64(uint64(1<<0*16))) + toReturn[32].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*8))) + toReturn[33].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*24))) + toReturn[34].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*4))) + toReturn[35].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*20))) + toReturn[36].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*12))) + toReturn[37].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*28))) + toReturn[38].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*2))) + toReturn[39].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*18))) + toReturn[40].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*10))) + toReturn[41].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*26))) + toReturn[42].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*6))) + toReturn[43].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*22))) + toReturn[44].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*14))) + toReturn[45].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*30))) + toReturn[46].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*1))) + toReturn[47].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*17))) + toReturn[48].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*9))) + toReturn[49].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*25))) + toReturn[50].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*5))) + toReturn[51].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*21))) + toReturn[52].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*13))) + toReturn[53].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*29))) + toReturn[54].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*3))) + toReturn[55].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*19))) + toReturn[56].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*11))) + toReturn[57].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*27))) + toReturn[58].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*7))) + toReturn[59].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*23))) + toReturn[60].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*15))) + toReturn[61].Mul(&r, &s) + r.Exp(generator, e.SetUint64(uint64(1<<0*31))) + toReturn[62].Mul(&r, &s) + return toReturn +} + +var partialFFT_64 = []func(a, twiddles fr.Vector){ + partialFFT_0, + partialFFT_1, + partialFFT_2, + partialFFT_3, + partialFFT_4, + partialFFT_5, + partialFFT_6, + partialFFT_7, + partialFFT_8, + partialFFT_9, + partialFFT_10, + partialFFT_11, + partialFFT_12, + partialFFT_13, + partialFFT_14, + partialFFT_15, +} + +func partialFFT_0(a, twiddles fr.Vector) { +} + +func partialFFT_1(a, twiddles fr.Vector) { + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_2(a, twiddles fr.Vector) { + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_3(a, twiddles fr.Vector) { + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_4(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_5(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_6(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_7(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_8(a, twiddles fr.Vector) { + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_9(a, twiddles fr.Vector) { + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_10(a, twiddles fr.Vector) { + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_11(a, twiddles fr.Vector) { + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_12(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_13(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} + +func partialFFT_14(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) + fr.Butterfly(&a[0], &a[32]) + fr.Butterfly(&a[1], &a[33]) + fr.Butterfly(&a[2], &a[34]) + fr.Butterfly(&a[3], &a[35]) + fr.Butterfly(&a[4], &a[36]) + fr.Butterfly(&a[5], &a[37]) + fr.Butterfly(&a[6], &a[38]) + fr.Butterfly(&a[7], &a[39]) + fr.Butterfly(&a[8], &a[40]) + fr.Butterfly(&a[9], &a[41]) + fr.Butterfly(&a[10], &a[42]) + fr.Butterfly(&a[11], &a[43]) + fr.Butterfly(&a[12], &a[44]) + fr.Butterfly(&a[13], &a[45]) + fr.Butterfly(&a[14], &a[46]) + fr.Butterfly(&a[15], &a[47]) + fr.Butterfly(&a[16], &a[48]) + fr.Butterfly(&a[17], &a[49]) + fr.Butterfly(&a[18], &a[50]) + fr.Butterfly(&a[19], &a[51]) + fr.Butterfly(&a[20], &a[52]) + fr.Butterfly(&a[21], &a[53]) + fr.Butterfly(&a[22], &a[54]) + fr.Butterfly(&a[23], &a[55]) + fr.Butterfly(&a[24], &a[56]) + fr.Butterfly(&a[25], &a[57]) + fr.Butterfly(&a[26], &a[58]) + fr.Butterfly(&a[27], &a[59]) + fr.Butterfly(&a[28], &a[60]) + fr.Butterfly(&a[29], &a[61]) + fr.Butterfly(&a[30], &a[62]) + fr.Butterfly(&a[31], &a[63]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) + fr.Butterfly(&a[0], &a[16]) + fr.Butterfly(&a[1], &a[17]) + fr.Butterfly(&a[2], &a[18]) + fr.Butterfly(&a[3], &a[19]) + fr.Butterfly(&a[4], &a[20]) + fr.Butterfly(&a[5], &a[21]) + fr.Butterfly(&a[6], &a[22]) + fr.Butterfly(&a[7], &a[23]) + fr.Butterfly(&a[8], &a[24]) + fr.Butterfly(&a[9], &a[25]) + fr.Butterfly(&a[10], &a[26]) + fr.Butterfly(&a[11], &a[27]) + fr.Butterfly(&a[12], &a[28]) + fr.Butterfly(&a[13], &a[29]) + fr.Butterfly(&a[14], &a[30]) + fr.Butterfly(&a[15], &a[31]) + fr.Butterfly(&a[32], &a[48]) + fr.Butterfly(&a[33], &a[49]) + fr.Butterfly(&a[34], &a[50]) + fr.Butterfly(&a[35], &a[51]) + fr.Butterfly(&a[36], &a[52]) + fr.Butterfly(&a[37], &a[53]) + fr.Butterfly(&a[38], &a[54]) + fr.Butterfly(&a[39], &a[55]) + fr.Butterfly(&a[40], &a[56]) + fr.Butterfly(&a[41], &a[57]) + fr.Butterfly(&a[42], &a[58]) + fr.Butterfly(&a[43], &a[59]) + fr.Butterfly(&a[44], &a[60]) + fr.Butterfly(&a[45], &a[61]) + fr.Butterfly(&a[46], &a[62]) + fr.Butterfly(&a[47], &a[63]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) + fr.Butterfly(&a[0], &a[8]) + fr.Butterfly(&a[1], &a[9]) + fr.Butterfly(&a[2], &a[10]) + fr.Butterfly(&a[3], &a[11]) + fr.Butterfly(&a[4], &a[12]) + fr.Butterfly(&a[5], &a[13]) + fr.Butterfly(&a[6], &a[14]) + fr.Butterfly(&a[7], &a[15]) + fr.Butterfly(&a[16], &a[24]) + fr.Butterfly(&a[17], &a[25]) + fr.Butterfly(&a[18], &a[26]) + fr.Butterfly(&a[19], &a[27]) + fr.Butterfly(&a[20], &a[28]) + fr.Butterfly(&a[21], &a[29]) + fr.Butterfly(&a[22], &a[30]) + fr.Butterfly(&a[23], &a[31]) + fr.Butterfly(&a[32], &a[40]) + fr.Butterfly(&a[33], &a[41]) + fr.Butterfly(&a[34], &a[42]) + fr.Butterfly(&a[35], &a[43]) + fr.Butterfly(&a[36], &a[44]) + fr.Butterfly(&a[37], &a[45]) + fr.Butterfly(&a[38], &a[46]) + fr.Butterfly(&a[39], &a[47]) + fr.Butterfly(&a[48], &a[56]) + fr.Butterfly(&a[49], &a[57]) + fr.Butterfly(&a[50], &a[58]) + fr.Butterfly(&a[51], &a[59]) + fr.Butterfly(&a[52], &a[60]) + fr.Butterfly(&a[53], &a[61]) + fr.Butterfly(&a[54], &a[62]) + fr.Butterfly(&a[55], &a[63]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) + fr.Butterfly(&a[0], &a[4]) + fr.Butterfly(&a[1], &a[5]) + fr.Butterfly(&a[2], &a[6]) + fr.Butterfly(&a[3], &a[7]) + fr.Butterfly(&a[8], &a[12]) + fr.Butterfly(&a[9], &a[13]) + fr.Butterfly(&a[10], &a[14]) + fr.Butterfly(&a[11], &a[15]) + fr.Butterfly(&a[16], &a[20]) + fr.Butterfly(&a[17], &a[21]) + fr.Butterfly(&a[18], &a[22]) + fr.Butterfly(&a[19], &a[23]) + fr.Butterfly(&a[24], &a[28]) + fr.Butterfly(&a[25], &a[29]) + fr.Butterfly(&a[26], &a[30]) + fr.Butterfly(&a[27], &a[31]) + fr.Butterfly(&a[32], &a[36]) + fr.Butterfly(&a[33], &a[37]) + fr.Butterfly(&a[34], &a[38]) + fr.Butterfly(&a[35], &a[39]) + fr.Butterfly(&a[40], &a[44]) + fr.Butterfly(&a[41], &a[45]) + fr.Butterfly(&a[42], &a[46]) + fr.Butterfly(&a[43], &a[47]) + fr.Butterfly(&a[48], &a[52]) + fr.Butterfly(&a[49], &a[53]) + fr.Butterfly(&a[50], &a[54]) + fr.Butterfly(&a[51], &a[55]) + fr.Butterfly(&a[56], &a[60]) + fr.Butterfly(&a[57], &a[61]) + fr.Butterfly(&a[58], &a[62]) + fr.Butterfly(&a[59], &a[63]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) + fr.Butterfly(&a[0], &a[2]) + fr.Butterfly(&a[1], &a[3]) + fr.Butterfly(&a[4], &a[6]) + fr.Butterfly(&a[5], &a[7]) + fr.Butterfly(&a[8], &a[10]) + fr.Butterfly(&a[9], &a[11]) + fr.Butterfly(&a[12], &a[14]) + fr.Butterfly(&a[13], &a[15]) + fr.Butterfly(&a[16], &a[18]) + fr.Butterfly(&a[17], &a[19]) + fr.Butterfly(&a[20], &a[22]) + fr.Butterfly(&a[21], &a[23]) + fr.Butterfly(&a[24], &a[26]) + fr.Butterfly(&a[25], &a[27]) + fr.Butterfly(&a[28], &a[30]) + fr.Butterfly(&a[29], &a[31]) + fr.Butterfly(&a[32], &a[34]) + fr.Butterfly(&a[33], &a[35]) + fr.Butterfly(&a[36], &a[38]) + fr.Butterfly(&a[37], &a[39]) + fr.Butterfly(&a[40], &a[42]) + fr.Butterfly(&a[41], &a[43]) + fr.Butterfly(&a[44], &a[46]) + fr.Butterfly(&a[45], &a[47]) + fr.Butterfly(&a[48], &a[50]) + fr.Butterfly(&a[49], &a[51]) + fr.Butterfly(&a[52], &a[54]) + fr.Butterfly(&a[53], &a[55]) + fr.Butterfly(&a[56], &a[58]) + fr.Butterfly(&a[57], &a[59]) + fr.Butterfly(&a[60], &a[62]) + fr.Butterfly(&a[61], &a[63]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) + fr.Butterfly(&a[0], &a[1]) + fr.Butterfly(&a[2], &a[3]) + fr.Butterfly(&a[4], &a[5]) + fr.Butterfly(&a[6], &a[7]) + fr.Butterfly(&a[8], &a[9]) + fr.Butterfly(&a[10], &a[11]) + fr.Butterfly(&a[12], &a[13]) + fr.Butterfly(&a[14], &a[15]) + fr.Butterfly(&a[16], &a[17]) + fr.Butterfly(&a[18], &a[19]) + fr.Butterfly(&a[20], &a[21]) + fr.Butterfly(&a[22], &a[23]) + fr.Butterfly(&a[24], &a[25]) + fr.Butterfly(&a[26], &a[27]) + fr.Butterfly(&a[28], &a[29]) + fr.Butterfly(&a[30], &a[31]) + fr.Butterfly(&a[32], &a[33]) + fr.Butterfly(&a[34], &a[35]) + fr.Butterfly(&a[36], &a[37]) + fr.Butterfly(&a[38], &a[39]) + fr.Butterfly(&a[40], &a[41]) + fr.Butterfly(&a[42], &a[43]) + fr.Butterfly(&a[44], &a[45]) + fr.Butterfly(&a[46], &a[47]) + fr.Butterfly(&a[48], &a[49]) + fr.Butterfly(&a[50], &a[51]) + fr.Butterfly(&a[52], &a[53]) + fr.Butterfly(&a[54], &a[55]) + fr.Butterfly(&a[56], &a[57]) + fr.Butterfly(&a[58], &a[59]) + fr.Butterfly(&a[60], &a[61]) + fr.Butterfly(&a[62], &a[63]) +} - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) +func partialFFT_15(a, twiddles fr.Vector) { + a[32].Mul(&a[32], &twiddles[0]) + a[33].Mul(&a[33], &twiddles[0]) + a[34].Mul(&a[34], &twiddles[0]) + a[35].Mul(&a[35], &twiddles[0]) + a[36].Mul(&a[36], &twiddles[0]) + a[37].Mul(&a[37], &twiddles[0]) + a[38].Mul(&a[38], &twiddles[0]) + a[39].Mul(&a[39], &twiddles[0]) + a[40].Mul(&a[40], &twiddles[0]) + a[41].Mul(&a[41], &twiddles[0]) + a[42].Mul(&a[42], &twiddles[0]) + a[43].Mul(&a[43], &twiddles[0]) + a[44].Mul(&a[44], &twiddles[0]) + a[45].Mul(&a[45], &twiddles[0]) + a[46].Mul(&a[46], &twiddles[0]) + a[47].Mul(&a[47], &twiddles[0]) + a[48].Mul(&a[48], &twiddles[0]) + a[49].Mul(&a[49], &twiddles[0]) + a[50].Mul(&a[50], &twiddles[0]) + a[51].Mul(&a[51], &twiddles[0]) + a[52].Mul(&a[52], &twiddles[0]) + a[53].Mul(&a[53], &twiddles[0]) + a[54].Mul(&a[54], &twiddles[0]) + a[55].Mul(&a[55], &twiddles[0]) + a[56].Mul(&a[56], &twiddles[0]) + a[57].Mul(&a[57], &twiddles[0]) + a[58].Mul(&a[58], &twiddles[0]) + a[59].Mul(&a[59], &twiddles[0]) + a[60].Mul(&a[60], &twiddles[0]) + a[61].Mul(&a[61], &twiddles[0]) + a[62].Mul(&a[62], &twiddles[0]) + a[63].Mul(&a[63], &twiddles[0]) fr.Butterfly(&a[0], &a[32]) fr.Butterfly(&a[1], &a[33]) fr.Butterfly(&a[2], &a[34]) @@ -79,38 +5253,38 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[29], &a[61]) fr.Butterfly(&a[30], &a[62]) fr.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) + a[16].Mul(&a[16], &twiddles[1]) + a[17].Mul(&a[17], &twiddles[1]) + a[18].Mul(&a[18], &twiddles[1]) + a[19].Mul(&a[19], &twiddles[1]) + a[20].Mul(&a[20], &twiddles[1]) + a[21].Mul(&a[21], &twiddles[1]) + a[22].Mul(&a[22], &twiddles[1]) + a[23].Mul(&a[23], &twiddles[1]) + a[24].Mul(&a[24], &twiddles[1]) + a[25].Mul(&a[25], &twiddles[1]) + a[26].Mul(&a[26], &twiddles[1]) + a[27].Mul(&a[27], &twiddles[1]) + a[28].Mul(&a[28], &twiddles[1]) + a[29].Mul(&a[29], &twiddles[1]) + a[30].Mul(&a[30], &twiddles[1]) + a[31].Mul(&a[31], &twiddles[1]) + a[48].Mul(&a[48], &twiddles[2]) + a[49].Mul(&a[49], &twiddles[2]) + a[50].Mul(&a[50], &twiddles[2]) + a[51].Mul(&a[51], &twiddles[2]) + a[52].Mul(&a[52], &twiddles[2]) + a[53].Mul(&a[53], &twiddles[2]) + a[54].Mul(&a[54], &twiddles[2]) + a[55].Mul(&a[55], &twiddles[2]) + a[56].Mul(&a[56], &twiddles[2]) + a[57].Mul(&a[57], &twiddles[2]) + a[58].Mul(&a[58], &twiddles[2]) + a[59].Mul(&a[59], &twiddles[2]) + a[60].Mul(&a[60], &twiddles[2]) + a[61].Mul(&a[61], &twiddles[2]) + a[62].Mul(&a[62], &twiddles[2]) + a[63].Mul(&a[63], &twiddles[2]) fr.Butterfly(&a[0], &a[16]) fr.Butterfly(&a[1], &a[17]) fr.Butterfly(&a[2], &a[18]) @@ -143,38 +5317,38 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[45], &a[61]) fr.Butterfly(&a[46], &a[62]) fr.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) + a[8].Mul(&a[8], &twiddles[3]) + a[9].Mul(&a[9], &twiddles[3]) + a[10].Mul(&a[10], &twiddles[3]) + a[11].Mul(&a[11], &twiddles[3]) + a[12].Mul(&a[12], &twiddles[3]) + a[13].Mul(&a[13], &twiddles[3]) + a[14].Mul(&a[14], &twiddles[3]) + a[15].Mul(&a[15], &twiddles[3]) + a[24].Mul(&a[24], &twiddles[4]) + a[25].Mul(&a[25], &twiddles[4]) + a[26].Mul(&a[26], &twiddles[4]) + a[27].Mul(&a[27], &twiddles[4]) + a[28].Mul(&a[28], &twiddles[4]) + a[29].Mul(&a[29], &twiddles[4]) + a[30].Mul(&a[30], &twiddles[4]) + a[31].Mul(&a[31], &twiddles[4]) + a[40].Mul(&a[40], &twiddles[5]) + a[41].Mul(&a[41], &twiddles[5]) + a[42].Mul(&a[42], &twiddles[5]) + a[43].Mul(&a[43], &twiddles[5]) + a[44].Mul(&a[44], &twiddles[5]) + a[45].Mul(&a[45], &twiddles[5]) + a[46].Mul(&a[46], &twiddles[5]) + a[47].Mul(&a[47], &twiddles[5]) + a[56].Mul(&a[56], &twiddles[6]) + a[57].Mul(&a[57], &twiddles[6]) + a[58].Mul(&a[58], &twiddles[6]) + a[59].Mul(&a[59], &twiddles[6]) + a[60].Mul(&a[60], &twiddles[6]) + a[61].Mul(&a[61], &twiddles[6]) + a[62].Mul(&a[62], &twiddles[6]) + a[63].Mul(&a[63], &twiddles[6]) fr.Butterfly(&a[0], &a[8]) fr.Butterfly(&a[1], &a[9]) fr.Butterfly(&a[2], &a[10]) @@ -207,38 +5381,38 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[53], &a[61]) fr.Butterfly(&a[54], &a[62]) fr.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) + a[4].Mul(&a[4], &twiddles[7]) + a[5].Mul(&a[5], &twiddles[7]) + a[6].Mul(&a[6], &twiddles[7]) + a[7].Mul(&a[7], &twiddles[7]) + a[12].Mul(&a[12], &twiddles[8]) + a[13].Mul(&a[13], &twiddles[8]) + a[14].Mul(&a[14], &twiddles[8]) + a[15].Mul(&a[15], &twiddles[8]) + a[20].Mul(&a[20], &twiddles[9]) + a[21].Mul(&a[21], &twiddles[9]) + a[22].Mul(&a[22], &twiddles[9]) + a[23].Mul(&a[23], &twiddles[9]) + a[28].Mul(&a[28], &twiddles[10]) + a[29].Mul(&a[29], &twiddles[10]) + a[30].Mul(&a[30], &twiddles[10]) + a[31].Mul(&a[31], &twiddles[10]) + a[36].Mul(&a[36], &twiddles[11]) + a[37].Mul(&a[37], &twiddles[11]) + a[38].Mul(&a[38], &twiddles[11]) + a[39].Mul(&a[39], &twiddles[11]) + a[44].Mul(&a[44], &twiddles[12]) + a[45].Mul(&a[45], &twiddles[12]) + a[46].Mul(&a[46], &twiddles[12]) + a[47].Mul(&a[47], &twiddles[12]) + a[52].Mul(&a[52], &twiddles[13]) + a[53].Mul(&a[53], &twiddles[13]) + a[54].Mul(&a[54], &twiddles[13]) + a[55].Mul(&a[55], &twiddles[13]) + a[60].Mul(&a[60], &twiddles[14]) + a[61].Mul(&a[61], &twiddles[14]) + a[62].Mul(&a[62], &twiddles[14]) + a[63].Mul(&a[63], &twiddles[14]) fr.Butterfly(&a[0], &a[4]) fr.Butterfly(&a[1], &a[5]) fr.Butterfly(&a[2], &a[6]) @@ -271,38 +5445,38 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[57], &a[61]) fr.Butterfly(&a[58], &a[62]) fr.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) + a[2].Mul(&a[2], &twiddles[15]) + a[3].Mul(&a[3], &twiddles[15]) + a[6].Mul(&a[6], &twiddles[16]) + a[7].Mul(&a[7], &twiddles[16]) + a[10].Mul(&a[10], &twiddles[17]) + a[11].Mul(&a[11], &twiddles[17]) + a[14].Mul(&a[14], &twiddles[18]) + a[15].Mul(&a[15], &twiddles[18]) + a[18].Mul(&a[18], &twiddles[19]) + a[19].Mul(&a[19], &twiddles[19]) + a[22].Mul(&a[22], &twiddles[20]) + a[23].Mul(&a[23], &twiddles[20]) + a[26].Mul(&a[26], &twiddles[21]) + a[27].Mul(&a[27], &twiddles[21]) + a[30].Mul(&a[30], &twiddles[22]) + a[31].Mul(&a[31], &twiddles[22]) + a[34].Mul(&a[34], &twiddles[23]) + a[35].Mul(&a[35], &twiddles[23]) + a[38].Mul(&a[38], &twiddles[24]) + a[39].Mul(&a[39], &twiddles[24]) + a[42].Mul(&a[42], &twiddles[25]) + a[43].Mul(&a[43], &twiddles[25]) + a[46].Mul(&a[46], &twiddles[26]) + a[47].Mul(&a[47], &twiddles[26]) + a[50].Mul(&a[50], &twiddles[27]) + a[51].Mul(&a[51], &twiddles[27]) + a[54].Mul(&a[54], &twiddles[28]) + a[55].Mul(&a[55], &twiddles[28]) + a[58].Mul(&a[58], &twiddles[29]) + a[59].Mul(&a[59], &twiddles[29]) + a[62].Mul(&a[62], &twiddles[30]) + a[63].Mul(&a[63], &twiddles[30]) fr.Butterfly(&a[0], &a[2]) fr.Butterfly(&a[1], &a[3]) fr.Butterfly(&a[4], &a[6]) @@ -335,38 +5509,38 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[57], &a[59]) fr.Butterfly(&a[60], &a[62]) fr.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) + a[1].Mul(&a[1], &twiddles[31]) + a[3].Mul(&a[3], &twiddles[32]) + a[5].Mul(&a[5], &twiddles[33]) + a[7].Mul(&a[7], &twiddles[34]) + a[9].Mul(&a[9], &twiddles[35]) + a[11].Mul(&a[11], &twiddles[36]) + a[13].Mul(&a[13], &twiddles[37]) + a[15].Mul(&a[15], &twiddles[38]) + a[17].Mul(&a[17], &twiddles[39]) + a[19].Mul(&a[19], &twiddles[40]) + a[21].Mul(&a[21], &twiddles[41]) + a[23].Mul(&a[23], &twiddles[42]) + a[25].Mul(&a[25], &twiddles[43]) + a[27].Mul(&a[27], &twiddles[44]) + a[29].Mul(&a[29], &twiddles[45]) + a[31].Mul(&a[31], &twiddles[46]) + a[33].Mul(&a[33], &twiddles[47]) + a[35].Mul(&a[35], &twiddles[48]) + a[37].Mul(&a[37], &twiddles[49]) + a[39].Mul(&a[39], &twiddles[50]) + a[41].Mul(&a[41], &twiddles[51]) + a[43].Mul(&a[43], &twiddles[52]) + a[45].Mul(&a[45], &twiddles[53]) + a[47].Mul(&a[47], &twiddles[54]) + a[49].Mul(&a[49], &twiddles[55]) + a[51].Mul(&a[51], &twiddles[56]) + a[53].Mul(&a[53], &twiddles[57]) + a[55].Mul(&a[55], &twiddles[58]) + a[57].Mul(&a[57], &twiddles[59]) + a[59].Mul(&a[59], &twiddles[60]) + a[61].Mul(&a[61], &twiddles[61]) + a[63].Mul(&a[63], &twiddles[62]) fr.Butterfly(&a[0], &a[1]) fr.Butterfly(&a[2], &a[3]) fr.Butterfly(&a[4], &a[5]) @@ -400,157 +5574,3 @@ func fft64(a []fr.Element, twiddlesCoset []fr.Element) { fr.Butterfly(&a[60], &a[61]) fr.Butterfly(&a[62], &a[63]) } - -// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func precomputeTwiddlesCoset(generator, shifter fr.Element) []fr.Element { - toReturn := make([]fr.Element, 63) - var r, s fr.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/ecc/bls12-377/fr/sis/sis_test.go b/ecc/bls12-377/fr/sis/sis_test.go index 3f57ee2431..9835765c37 100644 --- a/ecc/bls12-377/fr/sis/sis_test.go +++ b/ecc/bls12-377/fr/sis/sis_test.go @@ -266,31 +266,49 @@ func benchmarkSIS(b *testing.B, input []fr.Element, sparse bool, logTwoBound, lo }) } -func TestUnrolledFFT(t *testing.T) { +func TestPartialFFT(t *testing.T) { assert := require.New(t) - var shift fr.Element - shift.SetRandom() + var ( + domain = fft.NewDomain(64) + twiddles = precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + ) - const size = 64 - domain := fft.NewDomain(size, fft.WithShift(shift)) + for mask := 0; mask < 16; mask++ { - k1 := make([]fr.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() + var ( + a = vec123456() + b = vec123456() + ) + + zeroizeWithMask(a, mask) + zeroizeWithMask(b, mask) + + domain.FFT(a, fft.DIF, fft.OnCoset()) + partialFFT_64[mask](b, twiddles) + for i := range a { + assert.True(a[i].Equal(&b[i]), "mismatch at index %d", i) + } } - k2 := make([]fr.Element, size) - copy(k2, k1) - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +} - // unrolled FFT - twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - fft64(k2, twiddlesCoset) +func vec123456() []fr.Element { + vec := make([]fr.Element, 64) + for i := range vec { + vec[i].SetInt64(int64(i)) + } + return vec +} - // compare results - for i := 0; i < size; i++ { - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) +func zeroizeWithMask(v []fr.Element, mask int) { + for i := 0; i < 4; i++ { + if (mask>>i)&1 == 1 { + continue + } + + for j := 0; j < 16; j++ { + v[16*i+j].SetZero() + } } } diff --git a/field/babybear/sis/sis.go b/field/babybear/sis/sis.go index 2f3f2b3dd0..9222343f8b 100644 --- a/field/babybear/sis/sis.go +++ b/field/babybear/sis/sis.go @@ -38,8 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func(babybear.Vector) - cosetTable []babybear.Element // used in conjunction with the smallFFT; + kz babybear.Vector // zeroes used to zeroize the limbs buffer faster. } // NewRSis creates an instance of RSis. @@ -97,28 +96,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]babybear.Element, n), Ag: make([][]babybear.Element, n), + kz: make(babybear.Vector, degree), maxNbElementsToHash: maxNbElementsToHash, } - r.cosetTable, err = r.Domain.CosetTable() - if err != nil { - return nil, err - } - - // perf note: we have a dedicated path for 64, as it correspond to the parameters - // used by linea-monorepo prover with bls12377 curve. - // once the linea prover switches to smaller fields, this path can be removed. - if r.Domain.Cardinality == 64 { - twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(p babybear.Vector) { - fft64(p, twiddlesCoset) - } - } else { - r.smallFFT = func(p babybear.Vector) { - p.Mul(p, babybear.Vector(r.cosetTable)) - r.Domain.FFT(p, fft.DIF) - } - } // filling A a := make([]babybear.Element, n*r.Degree) ag := make([]babybear.Element, n*r.Degree) @@ -134,8 +115,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(r.Ag[i]) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -163,7 +143,7 @@ func (r *RSis) Hash(v, res []babybear.Element) error { // inner hash it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) for i := 0; i < len(r.Ag); i++ { - r.InnerHash(it, res, k, i) + r.InnerHash(it, res, k, r.kz, i, ^uint64(0)) } // reduces mod Xᵈ+1 @@ -172,20 +152,15 @@ func (r *RSis) Hash(v, res []babybear.Element) error { return nil } -func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { +func (r *RSis) InnerHash(it *LimbIterator, res, k, kz babybear.Vector, polId int, mask uint64) { + copy(k, kz) zero := uint32(0) for j := 0; j < r.Degree; j++ { l, ok := it.NextLimb() if !ok { - // we need to pad; note that we should use a deterministic padding - // other than 0, but it is not an issue for the current use cases. - for m := j; m < r.Degree; m++ { - k[m].SetZero() - } break } zero |= l - k[j].SetZero() k[j][0] = l } if zero == 0 { @@ -193,10 +168,7 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k babybear.Vector, polId int) { // we can skip this, FFT(0) = 0 return } - - // this is equivalent to: - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(k) + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. // k and r.Ag[polId] are in evaluation form on √(g) * diff --git a/field/babybear/sis/sis_fft.go b/field/babybear/sis/sis_fft.go deleted file mode 100644 index b609f67415..0000000000 --- a/field/babybear/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/babybear" - "math/big" -) - -// fft64 unrolls an FFT with domain.Cardinality == 64 -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset -func fft64(a []babybear.Element, twiddlesCoset []babybear.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - babybear.Butterfly(&a[0], &a[32]) - babybear.Butterfly(&a[1], &a[33]) - babybear.Butterfly(&a[2], &a[34]) - babybear.Butterfly(&a[3], &a[35]) - babybear.Butterfly(&a[4], &a[36]) - babybear.Butterfly(&a[5], &a[37]) - babybear.Butterfly(&a[6], &a[38]) - babybear.Butterfly(&a[7], &a[39]) - babybear.Butterfly(&a[8], &a[40]) - babybear.Butterfly(&a[9], &a[41]) - babybear.Butterfly(&a[10], &a[42]) - babybear.Butterfly(&a[11], &a[43]) - babybear.Butterfly(&a[12], &a[44]) - babybear.Butterfly(&a[13], &a[45]) - babybear.Butterfly(&a[14], &a[46]) - babybear.Butterfly(&a[15], &a[47]) - babybear.Butterfly(&a[16], &a[48]) - babybear.Butterfly(&a[17], &a[49]) - babybear.Butterfly(&a[18], &a[50]) - babybear.Butterfly(&a[19], &a[51]) - babybear.Butterfly(&a[20], &a[52]) - babybear.Butterfly(&a[21], &a[53]) - babybear.Butterfly(&a[22], &a[54]) - babybear.Butterfly(&a[23], &a[55]) - babybear.Butterfly(&a[24], &a[56]) - babybear.Butterfly(&a[25], &a[57]) - babybear.Butterfly(&a[26], &a[58]) - babybear.Butterfly(&a[27], &a[59]) - babybear.Butterfly(&a[28], &a[60]) - babybear.Butterfly(&a[29], &a[61]) - babybear.Butterfly(&a[30], &a[62]) - babybear.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - babybear.Butterfly(&a[0], &a[16]) - babybear.Butterfly(&a[1], &a[17]) - babybear.Butterfly(&a[2], &a[18]) - babybear.Butterfly(&a[3], &a[19]) - babybear.Butterfly(&a[4], &a[20]) - babybear.Butterfly(&a[5], &a[21]) - babybear.Butterfly(&a[6], &a[22]) - babybear.Butterfly(&a[7], &a[23]) - babybear.Butterfly(&a[8], &a[24]) - babybear.Butterfly(&a[9], &a[25]) - babybear.Butterfly(&a[10], &a[26]) - babybear.Butterfly(&a[11], &a[27]) - babybear.Butterfly(&a[12], &a[28]) - babybear.Butterfly(&a[13], &a[29]) - babybear.Butterfly(&a[14], &a[30]) - babybear.Butterfly(&a[15], &a[31]) - babybear.Butterfly(&a[32], &a[48]) - babybear.Butterfly(&a[33], &a[49]) - babybear.Butterfly(&a[34], &a[50]) - babybear.Butterfly(&a[35], &a[51]) - babybear.Butterfly(&a[36], &a[52]) - babybear.Butterfly(&a[37], &a[53]) - babybear.Butterfly(&a[38], &a[54]) - babybear.Butterfly(&a[39], &a[55]) - babybear.Butterfly(&a[40], &a[56]) - babybear.Butterfly(&a[41], &a[57]) - babybear.Butterfly(&a[42], &a[58]) - babybear.Butterfly(&a[43], &a[59]) - babybear.Butterfly(&a[44], &a[60]) - babybear.Butterfly(&a[45], &a[61]) - babybear.Butterfly(&a[46], &a[62]) - babybear.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - babybear.Butterfly(&a[0], &a[8]) - babybear.Butterfly(&a[1], &a[9]) - babybear.Butterfly(&a[2], &a[10]) - babybear.Butterfly(&a[3], &a[11]) - babybear.Butterfly(&a[4], &a[12]) - babybear.Butterfly(&a[5], &a[13]) - babybear.Butterfly(&a[6], &a[14]) - babybear.Butterfly(&a[7], &a[15]) - babybear.Butterfly(&a[16], &a[24]) - babybear.Butterfly(&a[17], &a[25]) - babybear.Butterfly(&a[18], &a[26]) - babybear.Butterfly(&a[19], &a[27]) - babybear.Butterfly(&a[20], &a[28]) - babybear.Butterfly(&a[21], &a[29]) - babybear.Butterfly(&a[22], &a[30]) - babybear.Butterfly(&a[23], &a[31]) - babybear.Butterfly(&a[32], &a[40]) - babybear.Butterfly(&a[33], &a[41]) - babybear.Butterfly(&a[34], &a[42]) - babybear.Butterfly(&a[35], &a[43]) - babybear.Butterfly(&a[36], &a[44]) - babybear.Butterfly(&a[37], &a[45]) - babybear.Butterfly(&a[38], &a[46]) - babybear.Butterfly(&a[39], &a[47]) - babybear.Butterfly(&a[48], &a[56]) - babybear.Butterfly(&a[49], &a[57]) - babybear.Butterfly(&a[50], &a[58]) - babybear.Butterfly(&a[51], &a[59]) - babybear.Butterfly(&a[52], &a[60]) - babybear.Butterfly(&a[53], &a[61]) - babybear.Butterfly(&a[54], &a[62]) - babybear.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - babybear.Butterfly(&a[0], &a[4]) - babybear.Butterfly(&a[1], &a[5]) - babybear.Butterfly(&a[2], &a[6]) - babybear.Butterfly(&a[3], &a[7]) - babybear.Butterfly(&a[8], &a[12]) - babybear.Butterfly(&a[9], &a[13]) - babybear.Butterfly(&a[10], &a[14]) - babybear.Butterfly(&a[11], &a[15]) - babybear.Butterfly(&a[16], &a[20]) - babybear.Butterfly(&a[17], &a[21]) - babybear.Butterfly(&a[18], &a[22]) - babybear.Butterfly(&a[19], &a[23]) - babybear.Butterfly(&a[24], &a[28]) - babybear.Butterfly(&a[25], &a[29]) - babybear.Butterfly(&a[26], &a[30]) - babybear.Butterfly(&a[27], &a[31]) - babybear.Butterfly(&a[32], &a[36]) - babybear.Butterfly(&a[33], &a[37]) - babybear.Butterfly(&a[34], &a[38]) - babybear.Butterfly(&a[35], &a[39]) - babybear.Butterfly(&a[40], &a[44]) - babybear.Butterfly(&a[41], &a[45]) - babybear.Butterfly(&a[42], &a[46]) - babybear.Butterfly(&a[43], &a[47]) - babybear.Butterfly(&a[48], &a[52]) - babybear.Butterfly(&a[49], &a[53]) - babybear.Butterfly(&a[50], &a[54]) - babybear.Butterfly(&a[51], &a[55]) - babybear.Butterfly(&a[56], &a[60]) - babybear.Butterfly(&a[57], &a[61]) - babybear.Butterfly(&a[58], &a[62]) - babybear.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - babybear.Butterfly(&a[0], &a[2]) - babybear.Butterfly(&a[1], &a[3]) - babybear.Butterfly(&a[4], &a[6]) - babybear.Butterfly(&a[5], &a[7]) - babybear.Butterfly(&a[8], &a[10]) - babybear.Butterfly(&a[9], &a[11]) - babybear.Butterfly(&a[12], &a[14]) - babybear.Butterfly(&a[13], &a[15]) - babybear.Butterfly(&a[16], &a[18]) - babybear.Butterfly(&a[17], &a[19]) - babybear.Butterfly(&a[20], &a[22]) - babybear.Butterfly(&a[21], &a[23]) - babybear.Butterfly(&a[24], &a[26]) - babybear.Butterfly(&a[25], &a[27]) - babybear.Butterfly(&a[28], &a[30]) - babybear.Butterfly(&a[29], &a[31]) - babybear.Butterfly(&a[32], &a[34]) - babybear.Butterfly(&a[33], &a[35]) - babybear.Butterfly(&a[36], &a[38]) - babybear.Butterfly(&a[37], &a[39]) - babybear.Butterfly(&a[40], &a[42]) - babybear.Butterfly(&a[41], &a[43]) - babybear.Butterfly(&a[44], &a[46]) - babybear.Butterfly(&a[45], &a[47]) - babybear.Butterfly(&a[48], &a[50]) - babybear.Butterfly(&a[49], &a[51]) - babybear.Butterfly(&a[52], &a[54]) - babybear.Butterfly(&a[53], &a[55]) - babybear.Butterfly(&a[56], &a[58]) - babybear.Butterfly(&a[57], &a[59]) - babybear.Butterfly(&a[60], &a[62]) - babybear.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - babybear.Butterfly(&a[0], &a[1]) - babybear.Butterfly(&a[2], &a[3]) - babybear.Butterfly(&a[4], &a[5]) - babybear.Butterfly(&a[6], &a[7]) - babybear.Butterfly(&a[8], &a[9]) - babybear.Butterfly(&a[10], &a[11]) - babybear.Butterfly(&a[12], &a[13]) - babybear.Butterfly(&a[14], &a[15]) - babybear.Butterfly(&a[16], &a[17]) - babybear.Butterfly(&a[18], &a[19]) - babybear.Butterfly(&a[20], &a[21]) - babybear.Butterfly(&a[22], &a[23]) - babybear.Butterfly(&a[24], &a[25]) - babybear.Butterfly(&a[26], &a[27]) - babybear.Butterfly(&a[28], &a[29]) - babybear.Butterfly(&a[30], &a[31]) - babybear.Butterfly(&a[32], &a[33]) - babybear.Butterfly(&a[34], &a[35]) - babybear.Butterfly(&a[36], &a[37]) - babybear.Butterfly(&a[38], &a[39]) - babybear.Butterfly(&a[40], &a[41]) - babybear.Butterfly(&a[42], &a[43]) - babybear.Butterfly(&a[44], &a[45]) - babybear.Butterfly(&a[46], &a[47]) - babybear.Butterfly(&a[48], &a[49]) - babybear.Butterfly(&a[50], &a[51]) - babybear.Butterfly(&a[52], &a[53]) - babybear.Butterfly(&a[54], &a[55]) - babybear.Butterfly(&a[56], &a[57]) - babybear.Butterfly(&a[58], &a[59]) - babybear.Butterfly(&a[60], &a[61]) - babybear.Butterfly(&a[62], &a[63]) -} - -// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func precomputeTwiddlesCoset(generator, shifter babybear.Element) []babybear.Element { - toReturn := make([]babybear.Element, 63) - var r, s babybear.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/babybear/sis/sis_test.go b/field/babybear/sis/sis_test.go index cab86f4ea9..6d041515e7 100644 --- a/field/babybear/sis/sis_test.go +++ b/field/babybear/sis/sis_test.go @@ -265,32 +265,3 @@ func benchmarkSIS(b *testing.B, input []babybear.Element, sparse bool, logTwoBou }) } - -func TestUnrolledFFT(t *testing.T) { - assert := require.New(t) - - var shift babybear.Element - shift.SetRandom() - - const size = 64 - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]babybear.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]babybear.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - fft64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/field/generator/generator_sis.go b/field/generator/generator_sis.go index f8f456b29c..4a12b19379 100644 --- a/field/generator/generator_sis.go +++ b/field/generator/generator_sis.go @@ -1,7 +1,10 @@ package generator import ( + "fmt" + "math/big" "path/filepath" + "strings" "github.com/consensys/bavard" "github.com/consensys/gnark-crypto/field/generator/config" @@ -17,24 +20,34 @@ func generateSIS(F *config.Field, outputDir string) error { outputDir = filepath.Join(outputDir, "sis") entries := []bavard.Entry{ - {File: filepath.Join(outputDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}, {File: filepath.Join(outputDir, "sis.go"), Templates: []string{"sis.go.tmpl"}}, {File: filepath.Join(outputDir, "sis_test.go"), Templates: []string{"sis.test.go.tmpl"}}, } - - funcs := make(map[string]interface{}) - funcs["bitReverse"] = bitReverse - - bavardOpts := []func(*bavard.Bavard) error{bavard.Funcs(funcs)} + // only on field byte size == 32, we unroll a 64-wide FFT (used in linea for bls12-377) + if F.NbBytes == 32 { + entries = append(entries, bavard.Entry{File: filepath.Join(outputDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}) + // TODO @gbotrel : add test for fft + } type sisTemplateData struct { FF string FieldPackagePath string + HasUnrolledFFT bool } data := &sisTemplateData{ FF: F.PackageName, FieldPackagePath: fieldImportPath, + HasUnrolledFFT: F.NbBytes == 32, + } + + funcs := make(map[string]interface{}) + funcs["bitReverse"] = bitReverse + funcs["pow"] = pow + + bavardOpts := []func(*bavard.Bavard) error{bavard.Funcs(funcs)} + if data.HasUnrolledFFT { + funcs["partialFFT"] = partialFFT } bgen := bavard.NewBatchGenerator("Consensys Software Inc.", 2020, "consensys/gnark-crypto") @@ -51,3 +64,157 @@ func generateSIS(F *config.Field, outputDir string) error { return runFormatters(outputDir) } + +// From linea-monorepo/prover/crypto/ringsis/templates/partial_fft.go at 6e15740 + +func partialFFT(domainSize, numField int, mask int64) string { + + gen := initializePartialFFTCodeGen(int64(domainSize), int64(numField), mask) + + gen.header() + gen.indent() + + var ( + numStages int = log2Ceil(int(domainSize)) + numSplits int = 1 + splitSize int = int(domainSize) + ) + + for level := 0; level < numStages; level++ { + for s := 0; s < numSplits; s++ { + for k := 0; k < splitSize/2; k++ { + gen.twiddleMulLine(s*splitSize+splitSize/2+k, numSplits-1+s) + } + } + + for s := 0; s < numSplits; s++ { + for k := 0; k < splitSize/2; k++ { + gen.butterFlyLine(s*splitSize+k, s*splitSize+splitSize/2+k) + } + } + + splitSize /= 2 + numSplits *= 2 + } + + gen.desindent() + gen.tail() + return gen.Builder.String() +} + +func initializePartialFFTCodeGen(domainSize, numField, mask int64) PartialFFTCodeGen { + res := PartialFFTCodeGen{ + DomainSize: int(domainSize), + NumField: int(numField), + Mask: int(mask), + IsZero: make([]bool, domainSize), + Builder: &strings.Builder{}, + NumIndent: 0, + } + + for i := range res.IsZero { + var ( + fieldSize = domainSize / numField + bit = i / int(fieldSize) + isZero = ((mask >> bit) & 1) == 0 + ) + + res.IsZero[i] = isZero + } + + return res +} + +type PartialFFTCodeGen struct { + DomainSize int + NumField int + Mask int + Builder *strings.Builder + NumIndent int + IsZero []bool +} + +func (p *PartialFFTCodeGen) header() { + writeIndent(p.Builder, p.NumIndent) + line := fmt.Sprintf("func partialFFT_%v(a, twiddles fr.Vector) {\n", p.Mask) + p.Builder.WriteString(line) +} + +func (p *PartialFFTCodeGen) tail() { + writeIndent(p.Builder, p.NumIndent) + p.Builder.WriteString("}\n") +} + +func (p *PartialFFTCodeGen) butterFlyLine(i, j int) { + allZeroes := p.IsZero[i] && p.IsZero[j] + if allZeroes { + return + } + + p.IsZero[i] = false + p.IsZero[j] = false + + writeIndent(p.Builder, p.NumIndent) + + line := fmt.Sprintf("fr.Butterfly(&a[%v], &a[%v])\n", i, j) + if _, err := p.Builder.WriteString(line); err != nil { + panic(err) + } +} + +func (p *PartialFFTCodeGen) twiddleMulLine(i, twidPos int) { + if p.IsZero[i] { + return + } + + writeIndent(p.Builder, p.NumIndent) + + line := fmt.Sprintf("a[%v].Mul(&a[%v], &twiddles[%v])\n", i, i, twidPos) + if _, err := p.Builder.WriteString(line); err != nil { + panic(err) + } +} + +func (p *PartialFFTCodeGen) desindent() { + p.NumIndent-- +} + +func (p *PartialFFTCodeGen) indent() { + p.NumIndent++ +} + +func writeIndent(w *strings.Builder, n int) { + for i := 0; i < n; i++ { + w.WriteString("\t") + } +} + +func log2Floor(a int) int { + res := 0 + for i := a; i > 1; i = i >> 1 { + res++ + } + return res +} + +func log2Ceil(a int) int { + floor := log2Floor(a) + if a != 1< copy(r.Ag[i], r.A[i]) - // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(r.Ag[i]) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -163,7 +172,7 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { // inner hash it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) for i := 0; i < len(r.Ag); i++ { - r.InnerHash(it, res, k, i) + r.InnerHash(it, res, k, r.kz, i, ^uint64(0)) } // reduces mod Xᵈ+1 @@ -172,20 +181,15 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error { return nil } -func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { +func (r *RSis) InnerHash(it *LimbIterator, res, k, kz {{ .FF }}.Vector, polId int, mask uint64) { + copy(k, kz) zero := {{$tReturn}}(0) for j := 0; j < r.Degree; j++ { l, ok := it.NextLimb() if !ok { - // we need to pad; note that we should use a deterministic padding - // other than 0, but it is not an issue for the current use cases. - for m := j; m < r.Degree; m++ { - k[m].SetZero() - } break } zero |= l - k[j].SetZero() k[j][0] = l } if zero == 0 { @@ -194,9 +198,13 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k {{ .FF }}.Vector, polId int) { return } - // this is equivalent to: - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(k) + {{- if .HasUnrolledFFT}} + // this is equivalent to: + // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + r.smallFFT(k, mask) + {{- else}} + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) + {{- end}} // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. // k and r.Ag[polId] are in evaluation form on √(g) * diff --git a/field/generator/internal/templates/sis/sis.test.go.tmpl b/field/generator/internal/templates/sis/sis.test.go.tmpl index 48d985bdec..e1ef0c7bb3 100644 --- a/field/generator/internal/templates/sis/sis.test.go.tmpl +++ b/field/generator/internal/templates/sis/sis.test.go.tmpl @@ -263,33 +263,59 @@ func benchmarkSIS(b *testing.B, input []{{ .FF }}.Element, sparse bool, logTwoBo }) } +{{- if .HasUnrolledFFT}} -func TestUnrolledFFT(t *testing.T) { +{{- $bitPerField := 256}} +{{- $limbPerField := 16}} +{{- $fieldPerPoly := 4}} +{{- $numMask := pow 2 $fieldPerPoly}} + +func TestPartialFFT(t *testing.T) { assert := require.New(t) - var shift {{ .FF }}.Element - shift.SetRandom() + var ( + domain = fft.NewDomain(64) + twiddles = precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) + ) + + for mask := 0; mask < {{$numMask}}; mask++ { + + var ( + a = vec123456() + b = vec123456() + ) - const size = 64 - domain := fft.NewDomain(size, fft.WithShift(shift)) + zeroizeWithMask(a, mask) + zeroizeWithMask(b, mask) - k1 := make([]{{ .FF }}.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() + domain.FFT(a, fft.DIF, fft.OnCoset()) + partialFFT_64[mask](b, twiddles) + for i := range a { + assert.True(a[i].Equal(&b[i]), "mismatch at index %d", i) + } } - k2 := make([]{{ .FF }}.Element, size) - copy(k2, k1) - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) +} + +func vec123456() []fr.Element { + vec := make([]fr.Element, 64) + for i := range vec { + vec[i].SetInt64(int64(i)) + } + return vec +} - // unrolled FFT - twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - fft64(k2, twiddlesCoset) +func zeroizeWithMask(v []fr.Element, mask int) { + for i := 0; i < {{$fieldPerPoly}}; i++ { + if (mask>>i)&1 == 1 { + continue + } - // compare results - for i := 0; i < size; i++ { - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) + for j := 0; j < {{$limbPerField}}; j++ { + v[{{$limbPerField}}*i+j].SetZero() + } } } + +{{- end}} diff --git a/field/goldilocks/sis/sis.go b/field/goldilocks/sis/sis.go index c810ffffd1..b12abde180 100644 --- a/field/goldilocks/sis/sis.go +++ b/field/goldilocks/sis/sis.go @@ -38,8 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func(goldilocks.Vector) - cosetTable []goldilocks.Element // used in conjunction with the smallFFT; + kz goldilocks.Vector // zeroes used to zeroize the limbs buffer faster. } // NewRSis creates an instance of RSis. @@ -97,28 +96,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]goldilocks.Element, n), Ag: make([][]goldilocks.Element, n), + kz: make(goldilocks.Vector, degree), maxNbElementsToHash: maxNbElementsToHash, } - r.cosetTable, err = r.Domain.CosetTable() - if err != nil { - return nil, err - } - - // perf note: we have a dedicated path for 64, as it correspond to the parameters - // used by linea-monorepo prover with bls12377 curve. - // once the linea prover switches to smaller fields, this path can be removed. - if r.Domain.Cardinality == 64 { - twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(p goldilocks.Vector) { - fft64(p, twiddlesCoset) - } - } else { - r.smallFFT = func(p goldilocks.Vector) { - p.Mul(p, goldilocks.Vector(r.cosetTable)) - r.Domain.FFT(p, fft.DIF) - } - } // filling A a := make([]goldilocks.Element, n*r.Degree) ag := make([]goldilocks.Element, n*r.Degree) @@ -134,8 +115,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(r.Ag[i]) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -163,7 +143,7 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { // inner hash it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) for i := 0; i < len(r.Ag); i++ { - r.InnerHash(it, res, k, i) + r.InnerHash(it, res, k, r.kz, i, ^uint64(0)) } // reduces mod Xᵈ+1 @@ -172,20 +152,15 @@ func (r *RSis) Hash(v, res []goldilocks.Element) error { return nil } -func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) { +func (r *RSis) InnerHash(it *LimbIterator, res, k, kz goldilocks.Vector, polId int, mask uint64) { + copy(k, kz) zero := uint64(0) for j := 0; j < r.Degree; j++ { l, ok := it.NextLimb() if !ok { - // we need to pad; note that we should use a deterministic padding - // other than 0, but it is not an issue for the current use cases. - for m := j; m < r.Degree; m++ { - k[m].SetZero() - } break } zero |= l - k[j].SetZero() k[j][0] = l } if zero == 0 { @@ -193,10 +168,7 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k goldilocks.Vector, polId int) // we can skip this, FFT(0) = 0 return } - - // this is equivalent to: - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(k) + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. // k and r.Ag[polId] are in evaluation form on √(g) * diff --git a/field/goldilocks/sis/sis_fft.go b/field/goldilocks/sis/sis_fft.go deleted file mode 100644 index f22f578847..0000000000 --- a/field/goldilocks/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/goldilocks" - "math/big" -) - -// fft64 unrolls an FFT with domain.Cardinality == 64 -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset -func fft64(a []goldilocks.Element, twiddlesCoset []goldilocks.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - goldilocks.Butterfly(&a[0], &a[32]) - goldilocks.Butterfly(&a[1], &a[33]) - goldilocks.Butterfly(&a[2], &a[34]) - goldilocks.Butterfly(&a[3], &a[35]) - goldilocks.Butterfly(&a[4], &a[36]) - goldilocks.Butterfly(&a[5], &a[37]) - goldilocks.Butterfly(&a[6], &a[38]) - goldilocks.Butterfly(&a[7], &a[39]) - goldilocks.Butterfly(&a[8], &a[40]) - goldilocks.Butterfly(&a[9], &a[41]) - goldilocks.Butterfly(&a[10], &a[42]) - goldilocks.Butterfly(&a[11], &a[43]) - goldilocks.Butterfly(&a[12], &a[44]) - goldilocks.Butterfly(&a[13], &a[45]) - goldilocks.Butterfly(&a[14], &a[46]) - goldilocks.Butterfly(&a[15], &a[47]) - goldilocks.Butterfly(&a[16], &a[48]) - goldilocks.Butterfly(&a[17], &a[49]) - goldilocks.Butterfly(&a[18], &a[50]) - goldilocks.Butterfly(&a[19], &a[51]) - goldilocks.Butterfly(&a[20], &a[52]) - goldilocks.Butterfly(&a[21], &a[53]) - goldilocks.Butterfly(&a[22], &a[54]) - goldilocks.Butterfly(&a[23], &a[55]) - goldilocks.Butterfly(&a[24], &a[56]) - goldilocks.Butterfly(&a[25], &a[57]) - goldilocks.Butterfly(&a[26], &a[58]) - goldilocks.Butterfly(&a[27], &a[59]) - goldilocks.Butterfly(&a[28], &a[60]) - goldilocks.Butterfly(&a[29], &a[61]) - goldilocks.Butterfly(&a[30], &a[62]) - goldilocks.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - goldilocks.Butterfly(&a[0], &a[16]) - goldilocks.Butterfly(&a[1], &a[17]) - goldilocks.Butterfly(&a[2], &a[18]) - goldilocks.Butterfly(&a[3], &a[19]) - goldilocks.Butterfly(&a[4], &a[20]) - goldilocks.Butterfly(&a[5], &a[21]) - goldilocks.Butterfly(&a[6], &a[22]) - goldilocks.Butterfly(&a[7], &a[23]) - goldilocks.Butterfly(&a[8], &a[24]) - goldilocks.Butterfly(&a[9], &a[25]) - goldilocks.Butterfly(&a[10], &a[26]) - goldilocks.Butterfly(&a[11], &a[27]) - goldilocks.Butterfly(&a[12], &a[28]) - goldilocks.Butterfly(&a[13], &a[29]) - goldilocks.Butterfly(&a[14], &a[30]) - goldilocks.Butterfly(&a[15], &a[31]) - goldilocks.Butterfly(&a[32], &a[48]) - goldilocks.Butterfly(&a[33], &a[49]) - goldilocks.Butterfly(&a[34], &a[50]) - goldilocks.Butterfly(&a[35], &a[51]) - goldilocks.Butterfly(&a[36], &a[52]) - goldilocks.Butterfly(&a[37], &a[53]) - goldilocks.Butterfly(&a[38], &a[54]) - goldilocks.Butterfly(&a[39], &a[55]) - goldilocks.Butterfly(&a[40], &a[56]) - goldilocks.Butterfly(&a[41], &a[57]) - goldilocks.Butterfly(&a[42], &a[58]) - goldilocks.Butterfly(&a[43], &a[59]) - goldilocks.Butterfly(&a[44], &a[60]) - goldilocks.Butterfly(&a[45], &a[61]) - goldilocks.Butterfly(&a[46], &a[62]) - goldilocks.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - goldilocks.Butterfly(&a[0], &a[8]) - goldilocks.Butterfly(&a[1], &a[9]) - goldilocks.Butterfly(&a[2], &a[10]) - goldilocks.Butterfly(&a[3], &a[11]) - goldilocks.Butterfly(&a[4], &a[12]) - goldilocks.Butterfly(&a[5], &a[13]) - goldilocks.Butterfly(&a[6], &a[14]) - goldilocks.Butterfly(&a[7], &a[15]) - goldilocks.Butterfly(&a[16], &a[24]) - goldilocks.Butterfly(&a[17], &a[25]) - goldilocks.Butterfly(&a[18], &a[26]) - goldilocks.Butterfly(&a[19], &a[27]) - goldilocks.Butterfly(&a[20], &a[28]) - goldilocks.Butterfly(&a[21], &a[29]) - goldilocks.Butterfly(&a[22], &a[30]) - goldilocks.Butterfly(&a[23], &a[31]) - goldilocks.Butterfly(&a[32], &a[40]) - goldilocks.Butterfly(&a[33], &a[41]) - goldilocks.Butterfly(&a[34], &a[42]) - goldilocks.Butterfly(&a[35], &a[43]) - goldilocks.Butterfly(&a[36], &a[44]) - goldilocks.Butterfly(&a[37], &a[45]) - goldilocks.Butterfly(&a[38], &a[46]) - goldilocks.Butterfly(&a[39], &a[47]) - goldilocks.Butterfly(&a[48], &a[56]) - goldilocks.Butterfly(&a[49], &a[57]) - goldilocks.Butterfly(&a[50], &a[58]) - goldilocks.Butterfly(&a[51], &a[59]) - goldilocks.Butterfly(&a[52], &a[60]) - goldilocks.Butterfly(&a[53], &a[61]) - goldilocks.Butterfly(&a[54], &a[62]) - goldilocks.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - goldilocks.Butterfly(&a[0], &a[4]) - goldilocks.Butterfly(&a[1], &a[5]) - goldilocks.Butterfly(&a[2], &a[6]) - goldilocks.Butterfly(&a[3], &a[7]) - goldilocks.Butterfly(&a[8], &a[12]) - goldilocks.Butterfly(&a[9], &a[13]) - goldilocks.Butterfly(&a[10], &a[14]) - goldilocks.Butterfly(&a[11], &a[15]) - goldilocks.Butterfly(&a[16], &a[20]) - goldilocks.Butterfly(&a[17], &a[21]) - goldilocks.Butterfly(&a[18], &a[22]) - goldilocks.Butterfly(&a[19], &a[23]) - goldilocks.Butterfly(&a[24], &a[28]) - goldilocks.Butterfly(&a[25], &a[29]) - goldilocks.Butterfly(&a[26], &a[30]) - goldilocks.Butterfly(&a[27], &a[31]) - goldilocks.Butterfly(&a[32], &a[36]) - goldilocks.Butterfly(&a[33], &a[37]) - goldilocks.Butterfly(&a[34], &a[38]) - goldilocks.Butterfly(&a[35], &a[39]) - goldilocks.Butterfly(&a[40], &a[44]) - goldilocks.Butterfly(&a[41], &a[45]) - goldilocks.Butterfly(&a[42], &a[46]) - goldilocks.Butterfly(&a[43], &a[47]) - goldilocks.Butterfly(&a[48], &a[52]) - goldilocks.Butterfly(&a[49], &a[53]) - goldilocks.Butterfly(&a[50], &a[54]) - goldilocks.Butterfly(&a[51], &a[55]) - goldilocks.Butterfly(&a[56], &a[60]) - goldilocks.Butterfly(&a[57], &a[61]) - goldilocks.Butterfly(&a[58], &a[62]) - goldilocks.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - goldilocks.Butterfly(&a[0], &a[2]) - goldilocks.Butterfly(&a[1], &a[3]) - goldilocks.Butterfly(&a[4], &a[6]) - goldilocks.Butterfly(&a[5], &a[7]) - goldilocks.Butterfly(&a[8], &a[10]) - goldilocks.Butterfly(&a[9], &a[11]) - goldilocks.Butterfly(&a[12], &a[14]) - goldilocks.Butterfly(&a[13], &a[15]) - goldilocks.Butterfly(&a[16], &a[18]) - goldilocks.Butterfly(&a[17], &a[19]) - goldilocks.Butterfly(&a[20], &a[22]) - goldilocks.Butterfly(&a[21], &a[23]) - goldilocks.Butterfly(&a[24], &a[26]) - goldilocks.Butterfly(&a[25], &a[27]) - goldilocks.Butterfly(&a[28], &a[30]) - goldilocks.Butterfly(&a[29], &a[31]) - goldilocks.Butterfly(&a[32], &a[34]) - goldilocks.Butterfly(&a[33], &a[35]) - goldilocks.Butterfly(&a[36], &a[38]) - goldilocks.Butterfly(&a[37], &a[39]) - goldilocks.Butterfly(&a[40], &a[42]) - goldilocks.Butterfly(&a[41], &a[43]) - goldilocks.Butterfly(&a[44], &a[46]) - goldilocks.Butterfly(&a[45], &a[47]) - goldilocks.Butterfly(&a[48], &a[50]) - goldilocks.Butterfly(&a[49], &a[51]) - goldilocks.Butterfly(&a[52], &a[54]) - goldilocks.Butterfly(&a[53], &a[55]) - goldilocks.Butterfly(&a[56], &a[58]) - goldilocks.Butterfly(&a[57], &a[59]) - goldilocks.Butterfly(&a[60], &a[62]) - goldilocks.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - goldilocks.Butterfly(&a[0], &a[1]) - goldilocks.Butterfly(&a[2], &a[3]) - goldilocks.Butterfly(&a[4], &a[5]) - goldilocks.Butterfly(&a[6], &a[7]) - goldilocks.Butterfly(&a[8], &a[9]) - goldilocks.Butterfly(&a[10], &a[11]) - goldilocks.Butterfly(&a[12], &a[13]) - goldilocks.Butterfly(&a[14], &a[15]) - goldilocks.Butterfly(&a[16], &a[17]) - goldilocks.Butterfly(&a[18], &a[19]) - goldilocks.Butterfly(&a[20], &a[21]) - goldilocks.Butterfly(&a[22], &a[23]) - goldilocks.Butterfly(&a[24], &a[25]) - goldilocks.Butterfly(&a[26], &a[27]) - goldilocks.Butterfly(&a[28], &a[29]) - goldilocks.Butterfly(&a[30], &a[31]) - goldilocks.Butterfly(&a[32], &a[33]) - goldilocks.Butterfly(&a[34], &a[35]) - goldilocks.Butterfly(&a[36], &a[37]) - goldilocks.Butterfly(&a[38], &a[39]) - goldilocks.Butterfly(&a[40], &a[41]) - goldilocks.Butterfly(&a[42], &a[43]) - goldilocks.Butterfly(&a[44], &a[45]) - goldilocks.Butterfly(&a[46], &a[47]) - goldilocks.Butterfly(&a[48], &a[49]) - goldilocks.Butterfly(&a[50], &a[51]) - goldilocks.Butterfly(&a[52], &a[53]) - goldilocks.Butterfly(&a[54], &a[55]) - goldilocks.Butterfly(&a[56], &a[57]) - goldilocks.Butterfly(&a[58], &a[59]) - goldilocks.Butterfly(&a[60], &a[61]) - goldilocks.Butterfly(&a[62], &a[63]) -} - -// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func precomputeTwiddlesCoset(generator, shifter goldilocks.Element) []goldilocks.Element { - toReturn := make([]goldilocks.Element, 63) - var r, s goldilocks.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/goldilocks/sis/sis_test.go b/field/goldilocks/sis/sis_test.go index 6c13d45602..480cbb8c33 100644 --- a/field/goldilocks/sis/sis_test.go +++ b/field/goldilocks/sis/sis_test.go @@ -265,32 +265,3 @@ func benchmarkSIS(b *testing.B, input []goldilocks.Element, sparse bool, logTwoB }) } - -func TestUnrolledFFT(t *testing.T) { - assert := require.New(t) - - var shift goldilocks.Element - shift.SetRandom() - - const size = 64 - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]goldilocks.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]goldilocks.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - fft64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} diff --git a/field/koalabear/sis/sis.go b/field/koalabear/sis/sis.go index ec5364b58e..85aeddb23c 100644 --- a/field/koalabear/sis/sis.go +++ b/field/koalabear/sis/sis.go @@ -38,8 +38,7 @@ type RSis struct { maxNbElementsToHash int - smallFFT func(koalabear.Vector) - cosetTable []koalabear.Element // used in conjunction with the smallFFT; + kz koalabear.Vector // zeroes used to zeroize the limbs buffer faster. } // NewRSis creates an instance of RSis. @@ -97,28 +96,10 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R Domain: fft.NewDomain(uint64(degree), fft.WithShift(shift)), A: make([][]koalabear.Element, n), Ag: make([][]koalabear.Element, n), + kz: make(koalabear.Vector, degree), maxNbElementsToHash: maxNbElementsToHash, } - r.cosetTable, err = r.Domain.CosetTable() - if err != nil { - return nil, err - } - - // perf note: we have a dedicated path for 64, as it correspond to the parameters - // used by linea-monorepo prover with bls12377 curve. - // once the linea prover switches to smaller fields, this path can be removed. - if r.Domain.Cardinality == 64 { - twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) - r.smallFFT = func(p koalabear.Vector) { - fft64(p, twiddlesCoset) - } - } else { - r.smallFFT = func(p koalabear.Vector) { - p.Mul(p, koalabear.Vector(r.cosetTable)) - r.Domain.FFT(p, fft.DIF) - } - } // filling A a := make([]koalabear.Element, n*r.Degree) ag := make([]koalabear.Element, n*r.Degree) @@ -134,8 +115,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R // fill Ag the evaluation form of the polynomials in A on the coset √(g) * copy(r.Ag[i], r.A[i]) - // r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(r.Ag[i]) + r.Domain.FFT(r.Ag[i], fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) } }) @@ -163,7 +143,7 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { // inner hash it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8) for i := 0; i < len(r.Ag); i++ { - r.InnerHash(it, res, k, i) + r.InnerHash(it, res, k, r.kz, i, ^uint64(0)) } // reduces mod Xᵈ+1 @@ -172,20 +152,15 @@ func (r *RSis) Hash(v, res []koalabear.Element) error { return nil } -func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { +func (r *RSis) InnerHash(it *LimbIterator, res, k, kz koalabear.Vector, polId int, mask uint64) { + copy(k, kz) zero := uint32(0) for j := 0; j < r.Degree; j++ { l, ok := it.NextLimb() if !ok { - // we need to pad; note that we should use a deterministic padding - // other than 0, but it is not an issue for the current use cases. - for m := j; m < r.Degree; m++ { - k[m].SetZero() - } break } zero |= l - k[j].SetZero() k[j][0] = l } if zero == 0 { @@ -193,10 +168,7 @@ func (r *RSis) InnerHash(it *LimbIterator, res, k koalabear.Vector, polId int) { // we can skip this, FFT(0) = 0 return } - - // this is equivalent to: - // r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - r.smallFFT(k) + r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) // we compute k * r.Ag[polId] in ℤ_{p}[X]/Xᵈ+1. // k and r.Ag[polId] are in evaluation form on √(g) * diff --git a/field/koalabear/sis/sis_fft.go b/field/koalabear/sis/sis_fft.go deleted file mode 100644 index 80e63acf11..0000000000 --- a/field/koalabear/sis/sis_fft.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2020-2025 Consensys Software Inc. -// Licensed under the Apache License, Version 2.0. See the LICENSE file for details. - -// Code generated by consensys/gnark-crypto DO NOT EDIT - -package sis - -import ( - "github.com/consensys/gnark-crypto/field/koalabear" - "math/big" -) - -// fft64 unrolls an FFT with domain.Cardinality == 64 -// equivalent code: r.Domain.FFT(k, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) -// twiddlesCoset must be pre-computed from twiddles and coset table, see precomputeTwiddlesCoset -func fft64(a []koalabear.Element, twiddlesCoset []koalabear.Element) { - - a[32].Mul(&a[32], &twiddlesCoset[0]) - a[33].Mul(&a[33], &twiddlesCoset[0]) - a[34].Mul(&a[34], &twiddlesCoset[0]) - a[35].Mul(&a[35], &twiddlesCoset[0]) - a[36].Mul(&a[36], &twiddlesCoset[0]) - a[37].Mul(&a[37], &twiddlesCoset[0]) - a[38].Mul(&a[38], &twiddlesCoset[0]) - a[39].Mul(&a[39], &twiddlesCoset[0]) - a[40].Mul(&a[40], &twiddlesCoset[0]) - a[41].Mul(&a[41], &twiddlesCoset[0]) - a[42].Mul(&a[42], &twiddlesCoset[0]) - a[43].Mul(&a[43], &twiddlesCoset[0]) - a[44].Mul(&a[44], &twiddlesCoset[0]) - a[45].Mul(&a[45], &twiddlesCoset[0]) - a[46].Mul(&a[46], &twiddlesCoset[0]) - a[47].Mul(&a[47], &twiddlesCoset[0]) - a[48].Mul(&a[48], &twiddlesCoset[0]) - a[49].Mul(&a[49], &twiddlesCoset[0]) - a[50].Mul(&a[50], &twiddlesCoset[0]) - a[51].Mul(&a[51], &twiddlesCoset[0]) - a[52].Mul(&a[52], &twiddlesCoset[0]) - a[53].Mul(&a[53], &twiddlesCoset[0]) - a[54].Mul(&a[54], &twiddlesCoset[0]) - a[55].Mul(&a[55], &twiddlesCoset[0]) - a[56].Mul(&a[56], &twiddlesCoset[0]) - a[57].Mul(&a[57], &twiddlesCoset[0]) - a[58].Mul(&a[58], &twiddlesCoset[0]) - a[59].Mul(&a[59], &twiddlesCoset[0]) - a[60].Mul(&a[60], &twiddlesCoset[0]) - a[61].Mul(&a[61], &twiddlesCoset[0]) - a[62].Mul(&a[62], &twiddlesCoset[0]) - a[63].Mul(&a[63], &twiddlesCoset[0]) - koalabear.Butterfly(&a[0], &a[32]) - koalabear.Butterfly(&a[1], &a[33]) - koalabear.Butterfly(&a[2], &a[34]) - koalabear.Butterfly(&a[3], &a[35]) - koalabear.Butterfly(&a[4], &a[36]) - koalabear.Butterfly(&a[5], &a[37]) - koalabear.Butterfly(&a[6], &a[38]) - koalabear.Butterfly(&a[7], &a[39]) - koalabear.Butterfly(&a[8], &a[40]) - koalabear.Butterfly(&a[9], &a[41]) - koalabear.Butterfly(&a[10], &a[42]) - koalabear.Butterfly(&a[11], &a[43]) - koalabear.Butterfly(&a[12], &a[44]) - koalabear.Butterfly(&a[13], &a[45]) - koalabear.Butterfly(&a[14], &a[46]) - koalabear.Butterfly(&a[15], &a[47]) - koalabear.Butterfly(&a[16], &a[48]) - koalabear.Butterfly(&a[17], &a[49]) - koalabear.Butterfly(&a[18], &a[50]) - koalabear.Butterfly(&a[19], &a[51]) - koalabear.Butterfly(&a[20], &a[52]) - koalabear.Butterfly(&a[21], &a[53]) - koalabear.Butterfly(&a[22], &a[54]) - koalabear.Butterfly(&a[23], &a[55]) - koalabear.Butterfly(&a[24], &a[56]) - koalabear.Butterfly(&a[25], &a[57]) - koalabear.Butterfly(&a[26], &a[58]) - koalabear.Butterfly(&a[27], &a[59]) - koalabear.Butterfly(&a[28], &a[60]) - koalabear.Butterfly(&a[29], &a[61]) - koalabear.Butterfly(&a[30], &a[62]) - koalabear.Butterfly(&a[31], &a[63]) - a[16].Mul(&a[16], &twiddlesCoset[1]) - a[17].Mul(&a[17], &twiddlesCoset[1]) - a[18].Mul(&a[18], &twiddlesCoset[1]) - a[19].Mul(&a[19], &twiddlesCoset[1]) - a[20].Mul(&a[20], &twiddlesCoset[1]) - a[21].Mul(&a[21], &twiddlesCoset[1]) - a[22].Mul(&a[22], &twiddlesCoset[1]) - a[23].Mul(&a[23], &twiddlesCoset[1]) - a[24].Mul(&a[24], &twiddlesCoset[1]) - a[25].Mul(&a[25], &twiddlesCoset[1]) - a[26].Mul(&a[26], &twiddlesCoset[1]) - a[27].Mul(&a[27], &twiddlesCoset[1]) - a[28].Mul(&a[28], &twiddlesCoset[1]) - a[29].Mul(&a[29], &twiddlesCoset[1]) - a[30].Mul(&a[30], &twiddlesCoset[1]) - a[31].Mul(&a[31], &twiddlesCoset[1]) - a[48].Mul(&a[48], &twiddlesCoset[2]) - a[49].Mul(&a[49], &twiddlesCoset[2]) - a[50].Mul(&a[50], &twiddlesCoset[2]) - a[51].Mul(&a[51], &twiddlesCoset[2]) - a[52].Mul(&a[52], &twiddlesCoset[2]) - a[53].Mul(&a[53], &twiddlesCoset[2]) - a[54].Mul(&a[54], &twiddlesCoset[2]) - a[55].Mul(&a[55], &twiddlesCoset[2]) - a[56].Mul(&a[56], &twiddlesCoset[2]) - a[57].Mul(&a[57], &twiddlesCoset[2]) - a[58].Mul(&a[58], &twiddlesCoset[2]) - a[59].Mul(&a[59], &twiddlesCoset[2]) - a[60].Mul(&a[60], &twiddlesCoset[2]) - a[61].Mul(&a[61], &twiddlesCoset[2]) - a[62].Mul(&a[62], &twiddlesCoset[2]) - a[63].Mul(&a[63], &twiddlesCoset[2]) - koalabear.Butterfly(&a[0], &a[16]) - koalabear.Butterfly(&a[1], &a[17]) - koalabear.Butterfly(&a[2], &a[18]) - koalabear.Butterfly(&a[3], &a[19]) - koalabear.Butterfly(&a[4], &a[20]) - koalabear.Butterfly(&a[5], &a[21]) - koalabear.Butterfly(&a[6], &a[22]) - koalabear.Butterfly(&a[7], &a[23]) - koalabear.Butterfly(&a[8], &a[24]) - koalabear.Butterfly(&a[9], &a[25]) - koalabear.Butterfly(&a[10], &a[26]) - koalabear.Butterfly(&a[11], &a[27]) - koalabear.Butterfly(&a[12], &a[28]) - koalabear.Butterfly(&a[13], &a[29]) - koalabear.Butterfly(&a[14], &a[30]) - koalabear.Butterfly(&a[15], &a[31]) - koalabear.Butterfly(&a[32], &a[48]) - koalabear.Butterfly(&a[33], &a[49]) - koalabear.Butterfly(&a[34], &a[50]) - koalabear.Butterfly(&a[35], &a[51]) - koalabear.Butterfly(&a[36], &a[52]) - koalabear.Butterfly(&a[37], &a[53]) - koalabear.Butterfly(&a[38], &a[54]) - koalabear.Butterfly(&a[39], &a[55]) - koalabear.Butterfly(&a[40], &a[56]) - koalabear.Butterfly(&a[41], &a[57]) - koalabear.Butterfly(&a[42], &a[58]) - koalabear.Butterfly(&a[43], &a[59]) - koalabear.Butterfly(&a[44], &a[60]) - koalabear.Butterfly(&a[45], &a[61]) - koalabear.Butterfly(&a[46], &a[62]) - koalabear.Butterfly(&a[47], &a[63]) - a[8].Mul(&a[8], &twiddlesCoset[3]) - a[9].Mul(&a[9], &twiddlesCoset[3]) - a[10].Mul(&a[10], &twiddlesCoset[3]) - a[11].Mul(&a[11], &twiddlesCoset[3]) - a[12].Mul(&a[12], &twiddlesCoset[3]) - a[13].Mul(&a[13], &twiddlesCoset[3]) - a[14].Mul(&a[14], &twiddlesCoset[3]) - a[15].Mul(&a[15], &twiddlesCoset[3]) - a[24].Mul(&a[24], &twiddlesCoset[4]) - a[25].Mul(&a[25], &twiddlesCoset[4]) - a[26].Mul(&a[26], &twiddlesCoset[4]) - a[27].Mul(&a[27], &twiddlesCoset[4]) - a[28].Mul(&a[28], &twiddlesCoset[4]) - a[29].Mul(&a[29], &twiddlesCoset[4]) - a[30].Mul(&a[30], &twiddlesCoset[4]) - a[31].Mul(&a[31], &twiddlesCoset[4]) - a[40].Mul(&a[40], &twiddlesCoset[5]) - a[41].Mul(&a[41], &twiddlesCoset[5]) - a[42].Mul(&a[42], &twiddlesCoset[5]) - a[43].Mul(&a[43], &twiddlesCoset[5]) - a[44].Mul(&a[44], &twiddlesCoset[5]) - a[45].Mul(&a[45], &twiddlesCoset[5]) - a[46].Mul(&a[46], &twiddlesCoset[5]) - a[47].Mul(&a[47], &twiddlesCoset[5]) - a[56].Mul(&a[56], &twiddlesCoset[6]) - a[57].Mul(&a[57], &twiddlesCoset[6]) - a[58].Mul(&a[58], &twiddlesCoset[6]) - a[59].Mul(&a[59], &twiddlesCoset[6]) - a[60].Mul(&a[60], &twiddlesCoset[6]) - a[61].Mul(&a[61], &twiddlesCoset[6]) - a[62].Mul(&a[62], &twiddlesCoset[6]) - a[63].Mul(&a[63], &twiddlesCoset[6]) - koalabear.Butterfly(&a[0], &a[8]) - koalabear.Butterfly(&a[1], &a[9]) - koalabear.Butterfly(&a[2], &a[10]) - koalabear.Butterfly(&a[3], &a[11]) - koalabear.Butterfly(&a[4], &a[12]) - koalabear.Butterfly(&a[5], &a[13]) - koalabear.Butterfly(&a[6], &a[14]) - koalabear.Butterfly(&a[7], &a[15]) - koalabear.Butterfly(&a[16], &a[24]) - koalabear.Butterfly(&a[17], &a[25]) - koalabear.Butterfly(&a[18], &a[26]) - koalabear.Butterfly(&a[19], &a[27]) - koalabear.Butterfly(&a[20], &a[28]) - koalabear.Butterfly(&a[21], &a[29]) - koalabear.Butterfly(&a[22], &a[30]) - koalabear.Butterfly(&a[23], &a[31]) - koalabear.Butterfly(&a[32], &a[40]) - koalabear.Butterfly(&a[33], &a[41]) - koalabear.Butterfly(&a[34], &a[42]) - koalabear.Butterfly(&a[35], &a[43]) - koalabear.Butterfly(&a[36], &a[44]) - koalabear.Butterfly(&a[37], &a[45]) - koalabear.Butterfly(&a[38], &a[46]) - koalabear.Butterfly(&a[39], &a[47]) - koalabear.Butterfly(&a[48], &a[56]) - koalabear.Butterfly(&a[49], &a[57]) - koalabear.Butterfly(&a[50], &a[58]) - koalabear.Butterfly(&a[51], &a[59]) - koalabear.Butterfly(&a[52], &a[60]) - koalabear.Butterfly(&a[53], &a[61]) - koalabear.Butterfly(&a[54], &a[62]) - koalabear.Butterfly(&a[55], &a[63]) - a[4].Mul(&a[4], &twiddlesCoset[7]) - a[5].Mul(&a[5], &twiddlesCoset[7]) - a[6].Mul(&a[6], &twiddlesCoset[7]) - a[7].Mul(&a[7], &twiddlesCoset[7]) - a[12].Mul(&a[12], &twiddlesCoset[8]) - a[13].Mul(&a[13], &twiddlesCoset[8]) - a[14].Mul(&a[14], &twiddlesCoset[8]) - a[15].Mul(&a[15], &twiddlesCoset[8]) - a[20].Mul(&a[20], &twiddlesCoset[9]) - a[21].Mul(&a[21], &twiddlesCoset[9]) - a[22].Mul(&a[22], &twiddlesCoset[9]) - a[23].Mul(&a[23], &twiddlesCoset[9]) - a[28].Mul(&a[28], &twiddlesCoset[10]) - a[29].Mul(&a[29], &twiddlesCoset[10]) - a[30].Mul(&a[30], &twiddlesCoset[10]) - a[31].Mul(&a[31], &twiddlesCoset[10]) - a[36].Mul(&a[36], &twiddlesCoset[11]) - a[37].Mul(&a[37], &twiddlesCoset[11]) - a[38].Mul(&a[38], &twiddlesCoset[11]) - a[39].Mul(&a[39], &twiddlesCoset[11]) - a[44].Mul(&a[44], &twiddlesCoset[12]) - a[45].Mul(&a[45], &twiddlesCoset[12]) - a[46].Mul(&a[46], &twiddlesCoset[12]) - a[47].Mul(&a[47], &twiddlesCoset[12]) - a[52].Mul(&a[52], &twiddlesCoset[13]) - a[53].Mul(&a[53], &twiddlesCoset[13]) - a[54].Mul(&a[54], &twiddlesCoset[13]) - a[55].Mul(&a[55], &twiddlesCoset[13]) - a[60].Mul(&a[60], &twiddlesCoset[14]) - a[61].Mul(&a[61], &twiddlesCoset[14]) - a[62].Mul(&a[62], &twiddlesCoset[14]) - a[63].Mul(&a[63], &twiddlesCoset[14]) - koalabear.Butterfly(&a[0], &a[4]) - koalabear.Butterfly(&a[1], &a[5]) - koalabear.Butterfly(&a[2], &a[6]) - koalabear.Butterfly(&a[3], &a[7]) - koalabear.Butterfly(&a[8], &a[12]) - koalabear.Butterfly(&a[9], &a[13]) - koalabear.Butterfly(&a[10], &a[14]) - koalabear.Butterfly(&a[11], &a[15]) - koalabear.Butterfly(&a[16], &a[20]) - koalabear.Butterfly(&a[17], &a[21]) - koalabear.Butterfly(&a[18], &a[22]) - koalabear.Butterfly(&a[19], &a[23]) - koalabear.Butterfly(&a[24], &a[28]) - koalabear.Butterfly(&a[25], &a[29]) - koalabear.Butterfly(&a[26], &a[30]) - koalabear.Butterfly(&a[27], &a[31]) - koalabear.Butterfly(&a[32], &a[36]) - koalabear.Butterfly(&a[33], &a[37]) - koalabear.Butterfly(&a[34], &a[38]) - koalabear.Butterfly(&a[35], &a[39]) - koalabear.Butterfly(&a[40], &a[44]) - koalabear.Butterfly(&a[41], &a[45]) - koalabear.Butterfly(&a[42], &a[46]) - koalabear.Butterfly(&a[43], &a[47]) - koalabear.Butterfly(&a[48], &a[52]) - koalabear.Butterfly(&a[49], &a[53]) - koalabear.Butterfly(&a[50], &a[54]) - koalabear.Butterfly(&a[51], &a[55]) - koalabear.Butterfly(&a[56], &a[60]) - koalabear.Butterfly(&a[57], &a[61]) - koalabear.Butterfly(&a[58], &a[62]) - koalabear.Butterfly(&a[59], &a[63]) - a[2].Mul(&a[2], &twiddlesCoset[15]) - a[3].Mul(&a[3], &twiddlesCoset[15]) - a[6].Mul(&a[6], &twiddlesCoset[16]) - a[7].Mul(&a[7], &twiddlesCoset[16]) - a[10].Mul(&a[10], &twiddlesCoset[17]) - a[11].Mul(&a[11], &twiddlesCoset[17]) - a[14].Mul(&a[14], &twiddlesCoset[18]) - a[15].Mul(&a[15], &twiddlesCoset[18]) - a[18].Mul(&a[18], &twiddlesCoset[19]) - a[19].Mul(&a[19], &twiddlesCoset[19]) - a[22].Mul(&a[22], &twiddlesCoset[20]) - a[23].Mul(&a[23], &twiddlesCoset[20]) - a[26].Mul(&a[26], &twiddlesCoset[21]) - a[27].Mul(&a[27], &twiddlesCoset[21]) - a[30].Mul(&a[30], &twiddlesCoset[22]) - a[31].Mul(&a[31], &twiddlesCoset[22]) - a[34].Mul(&a[34], &twiddlesCoset[23]) - a[35].Mul(&a[35], &twiddlesCoset[23]) - a[38].Mul(&a[38], &twiddlesCoset[24]) - a[39].Mul(&a[39], &twiddlesCoset[24]) - a[42].Mul(&a[42], &twiddlesCoset[25]) - a[43].Mul(&a[43], &twiddlesCoset[25]) - a[46].Mul(&a[46], &twiddlesCoset[26]) - a[47].Mul(&a[47], &twiddlesCoset[26]) - a[50].Mul(&a[50], &twiddlesCoset[27]) - a[51].Mul(&a[51], &twiddlesCoset[27]) - a[54].Mul(&a[54], &twiddlesCoset[28]) - a[55].Mul(&a[55], &twiddlesCoset[28]) - a[58].Mul(&a[58], &twiddlesCoset[29]) - a[59].Mul(&a[59], &twiddlesCoset[29]) - a[62].Mul(&a[62], &twiddlesCoset[30]) - a[63].Mul(&a[63], &twiddlesCoset[30]) - koalabear.Butterfly(&a[0], &a[2]) - koalabear.Butterfly(&a[1], &a[3]) - koalabear.Butterfly(&a[4], &a[6]) - koalabear.Butterfly(&a[5], &a[7]) - koalabear.Butterfly(&a[8], &a[10]) - koalabear.Butterfly(&a[9], &a[11]) - koalabear.Butterfly(&a[12], &a[14]) - koalabear.Butterfly(&a[13], &a[15]) - koalabear.Butterfly(&a[16], &a[18]) - koalabear.Butterfly(&a[17], &a[19]) - koalabear.Butterfly(&a[20], &a[22]) - koalabear.Butterfly(&a[21], &a[23]) - koalabear.Butterfly(&a[24], &a[26]) - koalabear.Butterfly(&a[25], &a[27]) - koalabear.Butterfly(&a[28], &a[30]) - koalabear.Butterfly(&a[29], &a[31]) - koalabear.Butterfly(&a[32], &a[34]) - koalabear.Butterfly(&a[33], &a[35]) - koalabear.Butterfly(&a[36], &a[38]) - koalabear.Butterfly(&a[37], &a[39]) - koalabear.Butterfly(&a[40], &a[42]) - koalabear.Butterfly(&a[41], &a[43]) - koalabear.Butterfly(&a[44], &a[46]) - koalabear.Butterfly(&a[45], &a[47]) - koalabear.Butterfly(&a[48], &a[50]) - koalabear.Butterfly(&a[49], &a[51]) - koalabear.Butterfly(&a[52], &a[54]) - koalabear.Butterfly(&a[53], &a[55]) - koalabear.Butterfly(&a[56], &a[58]) - koalabear.Butterfly(&a[57], &a[59]) - koalabear.Butterfly(&a[60], &a[62]) - koalabear.Butterfly(&a[61], &a[63]) - a[1].Mul(&a[1], &twiddlesCoset[31]) - a[3].Mul(&a[3], &twiddlesCoset[32]) - a[5].Mul(&a[5], &twiddlesCoset[33]) - a[7].Mul(&a[7], &twiddlesCoset[34]) - a[9].Mul(&a[9], &twiddlesCoset[35]) - a[11].Mul(&a[11], &twiddlesCoset[36]) - a[13].Mul(&a[13], &twiddlesCoset[37]) - a[15].Mul(&a[15], &twiddlesCoset[38]) - a[17].Mul(&a[17], &twiddlesCoset[39]) - a[19].Mul(&a[19], &twiddlesCoset[40]) - a[21].Mul(&a[21], &twiddlesCoset[41]) - a[23].Mul(&a[23], &twiddlesCoset[42]) - a[25].Mul(&a[25], &twiddlesCoset[43]) - a[27].Mul(&a[27], &twiddlesCoset[44]) - a[29].Mul(&a[29], &twiddlesCoset[45]) - a[31].Mul(&a[31], &twiddlesCoset[46]) - a[33].Mul(&a[33], &twiddlesCoset[47]) - a[35].Mul(&a[35], &twiddlesCoset[48]) - a[37].Mul(&a[37], &twiddlesCoset[49]) - a[39].Mul(&a[39], &twiddlesCoset[50]) - a[41].Mul(&a[41], &twiddlesCoset[51]) - a[43].Mul(&a[43], &twiddlesCoset[52]) - a[45].Mul(&a[45], &twiddlesCoset[53]) - a[47].Mul(&a[47], &twiddlesCoset[54]) - a[49].Mul(&a[49], &twiddlesCoset[55]) - a[51].Mul(&a[51], &twiddlesCoset[56]) - a[53].Mul(&a[53], &twiddlesCoset[57]) - a[55].Mul(&a[55], &twiddlesCoset[58]) - a[57].Mul(&a[57], &twiddlesCoset[59]) - a[59].Mul(&a[59], &twiddlesCoset[60]) - a[61].Mul(&a[61], &twiddlesCoset[61]) - a[63].Mul(&a[63], &twiddlesCoset[62]) - koalabear.Butterfly(&a[0], &a[1]) - koalabear.Butterfly(&a[2], &a[3]) - koalabear.Butterfly(&a[4], &a[5]) - koalabear.Butterfly(&a[6], &a[7]) - koalabear.Butterfly(&a[8], &a[9]) - koalabear.Butterfly(&a[10], &a[11]) - koalabear.Butterfly(&a[12], &a[13]) - koalabear.Butterfly(&a[14], &a[15]) - koalabear.Butterfly(&a[16], &a[17]) - koalabear.Butterfly(&a[18], &a[19]) - koalabear.Butterfly(&a[20], &a[21]) - koalabear.Butterfly(&a[22], &a[23]) - koalabear.Butterfly(&a[24], &a[25]) - koalabear.Butterfly(&a[26], &a[27]) - koalabear.Butterfly(&a[28], &a[29]) - koalabear.Butterfly(&a[30], &a[31]) - koalabear.Butterfly(&a[32], &a[33]) - koalabear.Butterfly(&a[34], &a[35]) - koalabear.Butterfly(&a[36], &a[37]) - koalabear.Butterfly(&a[38], &a[39]) - koalabear.Butterfly(&a[40], &a[41]) - koalabear.Butterfly(&a[42], &a[43]) - koalabear.Butterfly(&a[44], &a[45]) - koalabear.Butterfly(&a[46], &a[47]) - koalabear.Butterfly(&a[48], &a[49]) - koalabear.Butterfly(&a[50], &a[51]) - koalabear.Butterfly(&a[52], &a[53]) - koalabear.Butterfly(&a[54], &a[55]) - koalabear.Butterfly(&a[56], &a[57]) - koalabear.Butterfly(&a[58], &a[59]) - koalabear.Butterfly(&a[60], &a[61]) - koalabear.Butterfly(&a[62], &a[63]) -} - -// precomputeTwiddlesCoset precomputes twiddlesCoset from twiddles and coset table -// it then return all elements in the correct order for the unrolled FFT. -func precomputeTwiddlesCoset(generator, shifter koalabear.Element) []koalabear.Element { - toReturn := make([]koalabear.Element, 63) - var r, s koalabear.Element - e := new(big.Int) - - s = shifter - for k := 0; k < 5; k++ { - s.Square(&s) - } - toReturn[0] = s - s = shifter - for k := 0; k < 4; k++ { - s.Square(&s) - } - toReturn[1] = s - r.Exp(generator, e.SetUint64(uint64(1<<4*1))) - toReturn[2].Mul(&r, &s) - s = shifter - for k := 0; k < 3; k++ { - s.Square(&s) - } - toReturn[3] = s - r.Exp(generator, e.SetUint64(uint64(1<<3*2))) - toReturn[4].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*1))) - toReturn[5].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<3*3))) - toReturn[6].Mul(&r, &s) - s = shifter - for k := 0; k < 2; k++ { - s.Square(&s) - } - toReturn[7] = s - r.Exp(generator, e.SetUint64(uint64(1<<2*4))) - toReturn[8].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*2))) - toReturn[9].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*6))) - toReturn[10].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*1))) - toReturn[11].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*5))) - toReturn[12].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*3))) - toReturn[13].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<2*7))) - toReturn[14].Mul(&r, &s) - s = shifter - for k := 0; k < 1; k++ { - s.Square(&s) - } - toReturn[15] = s - r.Exp(generator, e.SetUint64(uint64(1<<1*8))) - toReturn[16].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*4))) - toReturn[17].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*12))) - toReturn[18].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*2))) - toReturn[19].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*10))) - toReturn[20].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*6))) - toReturn[21].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*14))) - toReturn[22].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*1))) - toReturn[23].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*9))) - toReturn[24].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*5))) - toReturn[25].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*13))) - toReturn[26].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*3))) - toReturn[27].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*11))) - toReturn[28].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*7))) - toReturn[29].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<1*15))) - toReturn[30].Mul(&r, &s) - s = shifter - for k := 0; k < 0; k++ { - s.Square(&s) - } - toReturn[31] = s - r.Exp(generator, e.SetUint64(uint64(1<<0*16))) - toReturn[32].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*8))) - toReturn[33].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*24))) - toReturn[34].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*4))) - toReturn[35].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*20))) - toReturn[36].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*12))) - toReturn[37].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*28))) - toReturn[38].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*2))) - toReturn[39].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*18))) - toReturn[40].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*10))) - toReturn[41].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*26))) - toReturn[42].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*6))) - toReturn[43].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*22))) - toReturn[44].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*14))) - toReturn[45].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*30))) - toReturn[46].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*1))) - toReturn[47].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*17))) - toReturn[48].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*9))) - toReturn[49].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*25))) - toReturn[50].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*5))) - toReturn[51].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*21))) - toReturn[52].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*13))) - toReturn[53].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*29))) - toReturn[54].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*3))) - toReturn[55].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*19))) - toReturn[56].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*11))) - toReturn[57].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*27))) - toReturn[58].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*7))) - toReturn[59].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*23))) - toReturn[60].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*15))) - toReturn[61].Mul(&r, &s) - r.Exp(generator, e.SetUint64(uint64(1<<0*31))) - toReturn[62].Mul(&r, &s) - return toReturn -} diff --git a/field/koalabear/sis/sis_test.go b/field/koalabear/sis/sis_test.go index 33c8fb1d48..d05364dea6 100644 --- a/field/koalabear/sis/sis_test.go +++ b/field/koalabear/sis/sis_test.go @@ -265,32 +265,3 @@ func benchmarkSIS(b *testing.B, input []koalabear.Element, sparse bool, logTwoBo }) } - -func TestUnrolledFFT(t *testing.T) { - assert := require.New(t) - - var shift koalabear.Element - shift.SetRandom() - - const size = 64 - domain := fft.NewDomain(size, fft.WithShift(shift)) - - k1 := make([]koalabear.Element, size) - for i := 0; i < size; i++ { - k1[i].SetRandom() - } - k2 := make([]koalabear.Element, size) - copy(k2, k1) - - // default FFT - domain.FFT(k1, fft.DIF, fft.OnCoset(), fft.WithNbTasks(1)) - - // unrolled FFT - twiddlesCoset := precomputeTwiddlesCoset(domain.Generator, domain.FrMultiplicativeGen) - fft64(k2, twiddlesCoset) - - // compare results - for i := 0; i < size; i++ { - assert.True(k1[i].Equal(&k2[i]), "i = %d", i) - } -} From ccd53f9008627ce31e586312dade18eebe528c84 Mon Sep 17 00:00:00 2001 From: Gautam Botrel Date: Tue, 14 Jan 2025 02:55:40 +0000 Subject: [PATCH 25/25] style: code comment --- ecc/bls12-377/fr/sis/sis.go | 1 + field/generator/generator_sis.go | 1 - field/generator/internal/templates/sis/sis.go.tmpl | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ecc/bls12-377/fr/sis/sis.go b/ecc/bls12-377/fr/sis/sis.go index 36b62a9294..b808d3c17d 100644 --- a/ecc/bls12-377/fr/sis/sis.go +++ b/ecc/bls12-377/fr/sis/sis.go @@ -100,6 +100,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R kz: make(fr.Vector, degree), maxNbElementsToHash: maxNbElementsToHash, } + // for degree == 64 we have a special fast path with a set of unrolled FFTs. if r.Degree == 64 { // precompute twiddles for the unrolled FFT twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift) diff --git a/field/generator/generator_sis.go b/field/generator/generator_sis.go index 4a12b19379..78d320040d 100644 --- a/field/generator/generator_sis.go +++ b/field/generator/generator_sis.go @@ -26,7 +26,6 @@ func generateSIS(F *config.Field, outputDir string) error { // only on field byte size == 32, we unroll a 64-wide FFT (used in linea for bls12-377) if F.NbBytes == 32 { entries = append(entries, bavard.Entry{File: filepath.Join(outputDir, "sis_fft.go"), Templates: []string{"fft.go.tmpl"}}) - // TODO @gbotrel : add test for fft } type sisTemplateData struct { diff --git a/field/generator/internal/templates/sis/sis.go.tmpl b/field/generator/internal/templates/sis/sis.go.tmpl index ad9c7529eb..3004a5dc87 100644 --- a/field/generator/internal/templates/sis/sis.go.tmpl +++ b/field/generator/internal/templates/sis/sis.go.tmpl @@ -106,6 +106,7 @@ func NewRSis(seed int64, logTwoDegree, logTwoBound, maxNbElementsToHash int) (*R {{- if .HasUnrolledFFT}} + // for degree == 64 we have a special fast path with a set of unrolled FFTs. if r.Degree == 64 { // precompute twiddles for the unrolled FFT twiddlesCoset := precomputeTwiddlesCoset(r.Domain.Generator, shift)