diff --git a/docs/avx2-notes.md b/docs/avx2-notes.md index 87992b3ce..ccb502233 100644 --- a/docs/avx2-notes.md +++ b/docs/avx2-notes.md @@ -12,7 +12,7 @@ representation (which uses radix \\(2^{51}\\)) amounts to regrouping digits. The field element representation is oriented around the AVX2 -`vpmuluqdq` instruction, which multiplies the low 32 bits of each +`vpmuludq` instruction, which multiplies the low 32 bits of each 64-bit lane of each operand to produce a 64-bit result. ```text,no_run diff --git a/docs/parallel-formulas.md b/docs/parallel-formulas.md index 22f59cd43..7f1e1c1a8 100644 --- a/docs/parallel-formulas.md +++ b/docs/parallel-formulas.md @@ -327,7 +327,7 @@ There are several directions for future improvement: [sandy2x]: https://eprint.iacr.org/2015/943.pdf [avx2trac]: https://trac.torproject.org/projects/tor/ticket/8897#comment:28 [hwcd08]: https://www.iacr.org/archive/asiacrypt2008/53500329/53500329.pdf -[curve_models]: https://doc-internal.dalek.rs/curve25519_dalek/curve_models/index.html +[curve_models]: https://doc-internal.dalek.rs/curve25519_dalek/backend/serial/curve_models/index.html [bbjlp08]: https://eprint.iacr.org/2008/013 [cmo98]: https://link.springer.com/content/pdf/10.1007%2F3-540-49649-1_6.pdf [intel]: https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf diff --git a/src/backend/vector/avx2/edwards.rs b/src/backend/vector/avx2/edwards.rs index 846572090..b25a557b9 100644 --- a/src/backend/vector/avx2/edwards.rs +++ b/src/backend/vector/avx2/edwards.rs @@ -153,7 +153,7 @@ impl ExtendedPoint { // Set tmp1 = ( S_9, S_6, S_6, S_9) // b < ( 1.6, 1.6, 1.6, 1.6) tmp1 = tmp0.shuffle(Shuffle::DBBD); - // Set tmp1 = ( S_8, S_5, S_8, S_5) + // Set tmp0 = ( S_8, S_5, S_8, S_5) // b < (2.33, 1.01, 2.33, 1.01) tmp0 = tmp0.shuffle(Shuffle::CACA); @@ -188,7 +188,7 @@ impl From for CachedPoint { let mut x = P.0; x = x.blend(x.diff_sum(), Lanes::AB); - // x = (X1 - Y1, X2 + Y2, Z2, T2) = (S2 S3 Z2 T2) + // x = (Y2 - X2, Y2 + X2, Z2, T2) = (S2 S3 Z2 T2) x = x * (121666, 121666, 2 * 121666, 2 * 121665); // x = (121666*S2 121666*S3 2*121666*Z2 2*121665*T2) @@ -521,4 +521,24 @@ mod test { let P = &constants::ED25519_BASEPOINT_TABLE * &Scalar::from(8475983829u64); doubling_test_helper(P); } + + #[test] + fn basepoint_odd_lookup_table_verify() { + use constants; + use backend::vector::avx2::constants::{BASEPOINT_ODD_LOOKUP_TABLE}; + + let basepoint_odd_table = NafLookupTable8::::from(&constants::ED25519_BASEPOINT_POINT); + println!("basepoint_odd_lookup_table = {:?}", basepoint_odd_table); + + let table_B = &BASEPOINT_ODD_LOOKUP_TABLE; + for (b_vec, base_vec) in table_B.0.iter().zip(basepoint_odd_table.0.iter()) { + let b_splits = b_vec.0.split(); + let base_splits = base_vec.0.split(); + + assert_eq!(base_splits[0], b_splits[0]); + assert_eq!(base_splits[1], b_splits[1]); + assert_eq!(base_splits[2], b_splits[2]); + assert_eq!(base_splits[3], b_splits[3]); + } + } }