diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 02826c211..79bdd3c8b 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -5,7 +5,8 @@ on: branches: [ main ] pull_request: branches: [ main, debugger ] - + workflow_dispatch: + env: CARGO_TERM_COLOR: always @@ -45,12 +46,18 @@ jobs: # We do everything in release mode so tests run quickly and steps cache each other. # Check the submitted change meets style guidelines + - name: Print cargo version + run: cargo --version + + - name: Print clang version + run: clang --version + - name: Cargo Format run: cargo fmt --check # Check that common feature permutations compile - name: Core compile check - run: cargo check --release + run: cargo check --release -vv - name: Full compile check run: cargo check --release --features deterministic,linkedproofs,logproof @@ -129,9 +136,9 @@ jobs: ~/.cargo/registry/cache/ ~/.cargo/git/db/ target/ - key: ${{ runner.os }}-cargo-doc-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-cargo-emscripten-${{ hashFiles('**/Cargo.lock') }} restore-keys: | - ${{ runner.os }}-cargo-doc- + ${{ runner.os }}-cargo-emscripten- ${{ runner.os }}-cargo- - name: Install gcc-multilib (32-bit headers) run: | diff --git a/Cargo.lock b/Cargo.lock index d28aa6aa3..b89e48452 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -286,25 +286,22 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.66.1" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ "bitflags 2.4.2", "cexpr", "clang-sys", - "lazy_static", - "lazycell", + "itertools 0.10.5", "log", - "peeking_take_while", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.0", "shlex", "syn 2.0.49", - "which", ] [[package]] @@ -848,7 +845,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16e44ab292b1dddfdaf7be62cfd8877df52f2f3fde5858d95bab606be259f20" dependencies = [ "bitflags 2.4.2", - "libloading 0.8.1", + "libloading 0.7.4", "winapi", ] @@ -1387,15 +1384,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "http" version = "0.2.11" @@ -1585,12 +1573,6 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libc" version = "0.2.153" @@ -1793,7 +1775,7 @@ dependencies = [ "indexmap 1.9.3", "log", "num-traits 0.2.18", - "rustc-hash", + "rustc-hash 1.1.0", "spirv", "termcolor", "thiserror", @@ -2133,12 +2115,6 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "percent-encoding" version = "2.3.1" @@ -2252,9 +2228,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -2527,6 +2503,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" + [[package]] name = "rustc-hex" version = "2.1.0" @@ -3574,7 +3556,7 @@ dependencies = [ "parking_lot", "profiling", "raw-window-handle", - "rustc-hash", + "rustc-hash 1.1.0", "smallvec", "thiserror", "web-sys", @@ -3604,7 +3586,7 @@ dependencies = [ "js-sys", "khronos-egl", "libc", - "libloading 0.8.1", + "libloading 0.7.4", "log", "metal", "naga", @@ -3614,7 +3596,7 @@ dependencies = [ "range-alloc", "raw-window-handle", "renderdoc-sys", - "rustc-hash", + "rustc-hash 1.1.0", "smallvec", "thiserror", "wasm-bindgen", @@ -3634,18 +3616,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "wide" version = "0.7.15" diff --git a/Cargo.toml b/Cargo.toml index 3d261aaf6..9ee7a820e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,7 +59,7 @@ wgpu-core = { version = "0.17.0", features = ["vulkan", "wgsl"] } find_cuda_helper = "0.2.0" criterion = { version = "0.5.1", default-features = false } darling = "0.20.3" -proc-macro2 = "1.0.66" +proc-macro2 = "1.0" quote = "1.0.32" syn = { version = "2.0.28", features = ["full"] } petgraph = { version = "0.6.0", features = ["serde-1"] } @@ -83,7 +83,7 @@ sha3 = "0.10.5" digest = "0.10.5" link-cplusplus = "1.0.9" cmake = "0.1.46" -bindgen = "0.66.1" +bindgen = "0.71" once_cell = "1.18.0" rlp = "0.5.2" fs_extra = "1.2.0" diff --git a/purge-github-caches.sh b/purge-github-caches.sh new file mode 100755 index 000000000..aa42937dc --- /dev/null +++ b/purge-github-caches.sh @@ -0,0 +1,15 @@ +#! /bin/bash + +gh auth status + +if [ $? -ne 0 ]; then + gh auth login +fi + +actions=$(gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/sunscreen-tech/sunscreen/actions/caches | jq '.actions_caches | .[] | .id') + +for i in $actions; do + echo "Deleting cache id " $i + gh api --method DELETE -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" /repos/sunscreen-tech/sunscreen/actions/caches/$i +done + diff --git a/seal_fhe/build.rs b/seal_fhe/build.rs index 7f6915132..66982fb98 100644 --- a/seal_fhe/build.rs +++ b/seal_fhe/build.rs @@ -136,7 +136,7 @@ fn main() { } let mut builder = bindgen::builder() - .clang_arg(format!("-I{}", out_path.join("include/SEAL-3.7").display())) + .clang_arg(format!("-I{}", out_path.join("include/SEAL-4.0").display())) .clang_arg("-ISEAL/native/src") .clang_arg("-xc++") .clang_arg("-std=c++17"); @@ -153,7 +153,7 @@ fn main() { let builder = builder .detect_include_paths(true) .header("bindgen_wrapper.h") - .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .allowlist_function("BatchEncoder_.*") .allowlist_function("Ciphertext_.*") .allowlist_function("CKKSEncoder_.*") @@ -181,6 +181,8 @@ fn main() { let bindings = builder.generate().unwrap(); + println!("{}", bindings); + bindings .write_to_file(out_path.join("bindings.rs")) .expect("Failed to write bindings"); diff --git a/sunscreen_tfhe/notes/glwe_scheme_switching.md b/sunscreen_tfhe/notes/glwe_scheme_switching.md new file mode 100644 index 000000000..5d6dbae8b --- /dev/null +++ b/sunscreen_tfhe/notes/glwe_scheme_switching.md @@ -0,0 +1,60 @@ +This document describes how to generalize scheme switching presented in Micheli et al. to the GLWE setting. In their paper, the authors describe an efficient algorithm for taking a GadgetRLWE (i.e. RLEV) ciphertext and producing an RGSW ciphertext. We generalize the algorithm to allow taking a GLEV ciphertext and producing a GGSW ciphertext. + +# Background +Let $\mathcal{R}=\mathbb{Z_q}[X]/(X^N+1)$ for power of two $N$. + +Recall that $\mathsf{GLEV}_{\vec{s}}(m)=[ \mathsf{GLWE}(\frac{q}{\beta^1}m), \mathsf{GLWE}(\frac{q}{\beta^2}m), ..., \mathsf{GLWE}(\frac{q}{\beta^\ell}m) ]$ where $\beta$ and $\ell$ are scheme parameters that define a radix decomposition. + +Furthermore, recall the gadget product $\odot$ between $a \in \mathcal{R}$ and $\mathsf{GLEV}(m)$: + +$$ +a \odot \mathsf{GLEV}(m):=\sum_{i=0}^{\ell-1}\mathsf{Decomp}_{i, \beta}(a)\times\mathsf{GLWE}(\frac{q}{\beta^{i+1}}m) +$$ +$$ +\approx\mathsf{GLWE}(am) +$$ + +# Scheme switching +## Keygen +Given a GLWE scheme with poly degree $N$ and GLWE size $k$ and secret key $\vec{s}$, define a scheme switching key as follows: + +* Let $\mathbf{sk} = \vec{s} \otimes \vec{s}$ +* Compute scheme switching key $\mathbf{s_{ss}}$ where $\mathbf{s_{ss}}^{i,j}=\mathsf{GLEV_{\vec{s}}}(\mathbf{sk}_{i,j})$ for $i, j\in [0, k)$. +* Observe that since $\mathbf{sk_{i,j}}=\mathbf{sk_{j, i}}$, we can reduce our keysize by roughly half. Simply store $s_{ss}^{i,j}$ using standard symmetric matrix compression. + +## Algorithm +### First, an observation +Suppose we have $(\vec{a}, b) = \mathsf{GLWE}(m)$. Construct trivial GLWE ciphertext $t$ by placing $b$ in the $p$'th place in the basis coefficients and 0 elsewhere $t_p(b)=((0, ..., b, ... 0), 0)$. Observe what happens if we decrypt $t_p(b)$ under any key $\vec{s}$: + +$$ +m = (\sum_{i \ne p}^{[0, k)}0\cdot s_i + b \cdot s_p) - 0 +$$ + +$$ += b \cdot s_p +$$ + +Since the error is 0 as well, we can elide the rounding step. Thus, $t$ is a $\mathsf{GLWE}$ encryption of $b \cdot s_p$ under $\vec{s}$. + +### Our regularly scheduled program +Given $x=\mathsf{GLEV}(m)$, we have $x_i=\mathsf{GLWE}(\frac{q}{\beta^{i+1}}m)=(\vec{a}^{(i)}, b^{(i)}), i\in[0,\ell_{ggsw})$. + +For each $i \in [0, \ell_{ggsw}), j \in [0, k)$ compute using $\mathsf{s_{ss}}^{j,m}$ + +$$ +y_{i, j}=t_j(b^{(i)}) + \sum_{m=0}^{k-1} a^{(i)}_m \odot \mathsf{GLEV}_{\vec{s}}(s_j \cdot s_m)=\mathsf{GLWE}_{\vec{s}}(\sum_{m=0}^{k-1}a^{(i)}_m \cdot s_m \cdot s_j + b^{(i)}\cdot s_j) +$$ +$$ +=\mathsf{GLWE}_{\vec{s}}((\sum_{m=0}^{k-1}a^{(i)}_m \cdot s_m + b^{(i)})\cdot s_j) +$$ +$$ +=\mathsf{GLWE}_{\vec{s}}(\frac{q}{\beta^{i+1}}\cdot m \cdot s_j + e_i \cdot s_j) +$$ + +Note the $e_i$ term is small if $s_j$ is small (i.e. binary), and thus we are left with encryptions of $s_j \cdot m$ + +Further note, the radix decomposition in the above $\odot$ is $\beta_{ss}, \ell_{ss}$, which may be distinct from $(\beta_{ggsw}, \ell_{ggsw})$ + +Let $z_j=\mathsf{GLEV}_{\vec{s}}(m \cdot s_j)=(y_{0,j}, y_{1,j}, ..., y_{\ell_{ggsw}-1, j})$ + +Output $\mathsf{GGSW}_{\vec{s}}(m)=(z_0, z_1, ..., z_{k-1}, x)$ \ No newline at end of file diff --git a/sunscreen_tfhe/notes/leveled_computation.md b/sunscreen_tfhe/notes/leveled_computation.md new file mode 100644 index 000000000..a1e895bb3 --- /dev/null +++ b/sunscreen_tfhe/notes/leveled_computation.md @@ -0,0 +1,79 @@ +# Background +## Ciphertexts +We denote $Z_m$ to be the message space (which is often binary in our setting). + +* LWE: encrypt $m \in Z_m$. +* GLWE: encrypts $m \in Z_m[X]/(X^N+1)$ where $N$ is a power of 2. +* GLEV: encrypts $m \in Z_m[X]/(X^N+1)$ where $N$ is a power of 2. Internally, the message is encrypted multiple times in different GLWE ciphertexts, but multiplied by different gadget coefficients in each one. +* GGSW: generally encrypts a polynomial of degree 2^N with coefficients in $Z_m$, but in our setting always encrypts the 0 or 1 polynomial. Is internally a bunch of GLEV ciphertexts encrypting $-S * m$ and one GLEV encrypting $m$. This ciphertexts exists to cause clever algebraic cancellation that allows for an outer product between GGSW and GLWE ciphertertexts. + +## CMUX +At the heart of TFHE is the CMux operations, which takes 2 GLWE ciphertexts `a, b` and a `sel` GGSW ciphertext encrypting 0 or 1. CMux results in a new GLWE ciphertext encrypting `a` when `sel` is 0 and `b` when `sel` is 1. + +We compute CMUX using the GGSW and GLWE outer produce and GLWE's additive homomorphism: + +$$(b - a) * sel + a$$ + +We remark that CMUX is very fast like 40us and is the ideal primitive for computation. + +## Computing with CMUX +Given an input set of GGSW ciphertexts each encrypting {0,1}, one can compute any function by passing the input GGSWs to the select lines of a cmux tree. Building such a mux tree is a trick taught in intro EE courses. Given a lookup table that produces output bit y, we do the following: + +* For n inputs, create the canonical 2^n mux tree. +* For the ith input to the mux tree, we feed the constant containing the ith row in our lookup table under our output. + +We can then apply the following rules to optimize the circuit: +* If both inputs are the same, replace mux with a wire to the next level. On the first level, this just means replacing the mux with a constant. +* Deduplicate any redundant muxes taking the same inputs. +* Repeat for each layer of the mux tree. + +We can then optimize across truth tables using common subexpression elimination to remove redundant multiplexers across functions. + +When applying this technique to TFHE, we can use trivial encryptions of the 0/1 polynomials as GLWE inputs to the MUX tree while the encrypted user input comes in as GGSW. + +Published literature indicates one can compute CMux trees with a depth of > 20,000, indicating ample noise ceiling. For reference, a 32-bit addition circuit requires a mere depth of 64. + +# A small problem +CMux takes user inputs as GGSW ciphertexts, but outputs GLWE ciphertexts 😑. This means you can't directly use the result of a MUX tree as an input to another and chain computation. Working around this requires a Rube-Goldberg sequence of cryptographic operations. + +* Sample extract the GLWE result to produce an LWE under the compute noise parameters +* Keyswitch to a high-noise LWE ciphertext. This makes the next step cheaper. +* Circuit bootstrap (CBS) to GGSW + +The result of the CBS operation can now be used in another MUX tree. Unfortunately, keyswitching is fairly expensive (a couple of ms per bit) and CBS is quite expensive (10s of ms per bit). Despite an ample noise budget to perform more computation, we're obligated to bootstrap only because we have to switch ciphertext types. + +# A new approach: GLEVCMux +In an ideal world, we could convert directly from GLWE back to GGSW, skipping circuit bootstrapping. With a cheap and clever detour through GLEV, we can accomplish this. + +Scheme switching as originally proposed allows one to take an RLEV ciphertext and scheme switch key and produce an RGSW ciphertext. We've extended this to [work in the GLWE setting as well](./glwe_scheme_switching.md). This isn't immediately, useful, but with a slight modification to CMux, it is exactly the missing piece we need. Unlike the CBS regime, Scheme switching is basically as fast as a CMux operation. + +We remark that a GLEVCMux requires $\ell$ CMux operations. + +## GLEVCMux +We propose a new GLEVCMux algorithm that takes `a`, `b` and GLEV ciphertexts instead of GLWE. The algorithm is quite simple: for each gadget-multiplied GLWE ciphertext in `a` and `b` compute the standard GLWE CMux against `sel`. This results in a new GLEV encrypting the same message as `a` when `sel` is 0 and `b` when `sel` is 1. + +We can then use GLEVCMux trees to do our computation, scheme switch, and feed the resulting GGSWs into another GLEVCMux tree. + +This incurs $\ell$ times the overhead when computing the CMUX tree, but these trees are linear in the input length for many operations (comparisons, add, sub, bitshift). For other operations such as integer multiplication and division, one could reformulate the computation as a series of smaller GLEVMUX trees. Or, you could use a different trick. + +## PackedCMux +Let's revert back to our standard CMux for a minute. Under our original regime, we use trivial encryptions of the 0 and 1 polynomials. However, instead of the 1 polynomial, let's use a trivial encryption of a polynomial whose first $\ell$ coefficients are the gadget decomposed 1. Now when we run these polynomials through a mux tree, the resultant GLWE's first $\ell$ message coefficients contain the equivalent GLEV message. + +We can use existing coefficient extraction techniques (e.g. homomorphic trace) to produce $\ell$ GLWE ciphertexts each containing a single gadget decomposed 0 (which is just zero) or $1/\beta^j$. These $\ell$ GLWE ciphertexts together form a GLEV cipertext! We can now scheme switch and continue computation. + +This technique does not incur the $\ell$ factor overhead during CMUX computation, but requires extracting a few coefficients. Extrapolating results from Circuit Bootstrapping: Faster and Smaller by Wang et. al. we estimate this to take a millisecond or two, which is still an order of magnitude faster than the KS + CBS process. + +# Another new approach: GGSWCMux +We can extend GLEVCmux to implement a GGSWCmux. However, this is going to require a new server key. Furthermore, this trick only works when we would otherwise be able to use a CMux tree with trivially encrypted a and b, which is fortunately always when using CMux-based computation. + +## GGSWCMux server key +A GGSWCMux server key $sk_{ggswcmux}$ consists of k+1 (k being the GLWE size) GLEV encryptions encrypting 0 and -s_i for $0 \le i \lt j$ respectively. The GLEV encryption of zero can collapse to a single GLWE encryption of zero (as an optimization) since the each gadget decomposition of $-sk_i * 0$ is zero and thus we can reuse the same ciphertext. + +We informally remark that these encrypt that these two values should exactly encrypt the same messages as a standard GGSW's first $k$ GLEV rows, so the exact make security analysis holds. + +## GGSWCMux +Let GLEV $(s_0^i, s_1^i) = sk_{ggswcmux}$. If using the zero GLWE optimization, we can just repeat the singluar $s_0$ GLWE for each of the $s_0^i$ GLEV GLWE entries. + +We now perform $k+1$ GLEV CMux operations for each entry in the CMux tree. The last one is a standard GLEV CMux. For the first $k$ GLEV CMux operations, in first layer of the CMux tree whenever we would pass a trivial GLEV encryption of 1, we instead pass $s_1^i$ and $s_0^i$ whenever we would pass a trivial GLEV encryption of 0. + +After the CMux tree completes, we remark that that the first k resulting GLEV ciphertexts encrypt $-s * m$ and the last ciphertext encrypts $m$, thus forming a GGSW that can be used in subsequent CMux operations. \ No newline at end of file