diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index e0d43101ed..175d55d00d 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -56,6 +56,9 @@ ### Bug fixes +* Switch most L-Qubit default kernels to `LM`. Add `LM::multiQubitOp` tests, failing when targeting out-of-order wires clustered close to `num_qubits-1`. Fix the `LM::multiQubitOp` kernel implementation by introducing a generic `revWireParity` routine and replacing the `bitswap`-based implementation. Mimic the changes fixing the corresponding `multiQubitOp` and `expval` functors in L-Kokkos. + [(#511)](https://github.com/PennyLaneAI/pennylane-lightning/pull/511) + * Fix RTD builds by removing unsupported `sytem_packages` configuration option. [(#491)](https://github.com/PennyLaneAI/pennylane-lightning/pull/491) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index a946116c88..49d70f2d90 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.33.0-dev16" +__version__ = "0.33.0-dev17" diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/GateFunctorsParam.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/GateFunctorsParam.hpp index 317a3e7bbf..b1e99d8946 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/GateFunctorsParam.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/GateFunctorsParam.hpp @@ -17,11 +17,14 @@ #include #include "BitUtil.hpp" +#include "BitUtilKokkos.hpp" /// @cond DEV namespace { using namespace Pennylane::Util; using Kokkos::Experimental::swap; +using Pennylane::LightningKokkos::Util::one; +using Pennylane::LightningKokkos::Util::wires2Parity; using std::size_t; } // namespace /// @endcond @@ -44,6 +47,8 @@ template struct multiQubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; std::size_t dim; std::size_t num_qubits; @@ -55,41 +60,43 @@ template struct multiQubitOpFunctor { wires_host(wires_.data(), wires_.size()); Kokkos::resize(wires, wires_host.size()); Kokkos::deep_copy(wires, wires_host); - dim = static_cast(1U) << wires_.size(); + dim = one << wires_.size(); num_qubits = num_qubits_; arr = arr_; matrix = matrix_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const MemberType &teamMember) const { - const std::size_t k = teamMember.league_rank() * dim; + const std::size_t k = teamMember.league_rank(); ScratchViewComplex coeffs_in(teamMember.team_scratch(0), dim); ScratchViewSizeT indices(teamMember.team_scratch(0), dim); if (teamMember.team_rank() == 0) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, dim), - [&](const std::size_t inner_idx) { - std::size_t idx = k | inner_idx; - const std::size_t n_wires = wires.size(); - - for (std::size_t pos = 0; pos < n_wires; pos++) { - std::size_t x = - ((idx >> (n_wires - pos - 1)) ^ - (idx >> (num_qubits - wires(pos) - 1))) & - 1U; - idx = idx ^ ((x << (n_wires - pos - 1)) | - (x << (num_qubits - wires(pos) - 1))); - } - - indices(inner_idx) = idx; - coeffs_in(inner_idx) = arr(idx); - }); + std::size_t idx = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + idx |= ((k << i) & parity(i)); + } + indices(0) = idx; + coeffs_in(0) = arr(idx); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, 1, dim), + [&](const std::size_t inner_idx) { + std::size_t index = indices(0); + for (std::size_t i = 0; i < wires.size(); + i++) { + if ((inner_idx & (one << i)) != 0) { + index |= rev_wire_shifts(i); + } + } + indices(inner_idx) = index; + coeffs_in(inner_idx) = arr(index); + }); } teamMember.team_barrier(); Kokkos::parallel_for( Kokkos::TeamThreadRange(teamMember, dim), [&](const std::size_t i) { - const auto idx = indices[i]; + const auto idx = indices(i); arr(idx) = 0.0; const std::size_t base_idx = i * dim; @@ -1491,7 +1498,7 @@ template struct apply1QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; const std::size_t n_wires = 1; - const std::size_t dim = static_cast(1U) << n_wires; + const std::size_t dim = one << n_wires; std::size_t num_qubits; size_t rev_wire; size_t rev_wire_shift; @@ -1532,7 +1539,7 @@ template struct apply2QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; const std::size_t n_wires = 2; - const std::size_t dim = static_cast(1U) << n_wires; + const std::size_t dim = one << n_wires; std::size_t num_qubits; std::size_t rev_wire0; std::size_t rev_wire1; @@ -1595,15 +1602,6 @@ template struct apply2QubitOpFunctor { GATETERM3(xx, 0B010, v010) + GATETERM3(xx, 0B011, v011) + \ GATETERM3(xx, 0B100, v100) + GATETERM3(xx, 0B101, v101) + \ GATETERM3(xx, 0B110, v110) + GATETERM3(xx, 0B111, v111) -#define INDEX(ivar, xx) \ - kdim | xx; \ - for (std::size_t pos = 0; pos < n_wires; pos++) { \ - std::size_t x = ((ivar >> (n_wires - pos - 1)) ^ \ - (ivar >> (num_qubits - wires(pos) - 1))) & \ - 1U; \ - ivar = ivar ^ ((x << (n_wires - pos - 1)) | \ - (x << (num_qubits - wires(pos) - 1))); \ - } template struct apply3QubitOpFunctor { using ComplexT = Kokkos::complex; @@ -1613,8 +1611,10 @@ template struct apply3QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; const std::size_t n_wires = 3; - const std::size_t dim = static_cast(1U) << n_wires; + const std::size_t dim = one << n_wires; std::size_t num_qubits; apply3QubitOpFunctor(KokkosComplexVector &arr_, std::size_t num_qubits_, @@ -1628,28 +1628,32 @@ template struct apply3QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k) const { - const std::size_t kdim = k * dim; - std::size_t i000 = INDEX(i000, 0B000); + std::size_t i000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i000 |= ((k << i) & parity(i)); + } ComplexT v000 = arr(i000); - std::size_t i001 = INDEX(i001, 0B001); + + std::size_t i001 = i000 | rev_wire_shifts(0); ComplexT v001 = arr(i001); - std::size_t i010 = INDEX(i010, 0B010); + std::size_t i010 = i000 | rev_wire_shifts(1); ComplexT v010 = arr(i010); - std::size_t i011 = INDEX(i011, 0B011); + std::size_t i011 = i000 | rev_wire_shifts(0) | rev_wire_shifts(1); ComplexT v011 = arr(i011); - std::size_t i100 = INDEX(i100, 0B100); + std::size_t i100 = i000 | rev_wire_shifts(2); ComplexT v100 = arr(i100); - std::size_t i101 = INDEX(i101, 0B101); + std::size_t i101 = i000 | rev_wire_shifts(0) | rev_wire_shifts(2); ComplexT v101 = arr(i101); - std::size_t i110 = INDEX(i110, 0B110); + std::size_t i110 = i000 | rev_wire_shifts(1) | rev_wire_shifts(2); ComplexT v110 = arr(i110); - std::size_t i111 = INDEX(i111, 0B111); + std::size_t i111 = + i000 | rev_wire_shifts(0) | rev_wire_shifts(1) | rev_wire_shifts(2); ComplexT v111 = arr(i111); - arr(i000) = GATESUM3(0B000); arr(i001) = GATESUM3(0B001); arr(i010) = GATESUM3(0B010); @@ -1681,8 +1685,10 @@ template struct apply4QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; const std::size_t n_wires = 4; - const std::size_t dim = static_cast(1U) << n_wires; + const std::size_t dim = one << n_wires; std::size_t num_qubits; apply4QubitOpFunctor(KokkosComplexVector &arr_, std::size_t num_qubits_, @@ -1696,42 +1702,51 @@ template struct apply4QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k) const { - const std::size_t kdim = k * dim; - std::size_t i0000 = INDEX(i0000, 0B0000); + std::size_t i0000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i0000 |= ((k << i) & parity(i)); + } ComplexT v0000 = arr(i0000); - std::size_t i0001 = INDEX(i0001, 0B0001); + + std::size_t i0001 = i0000 | rev_wire_shifts(0); ComplexT v0001 = arr(i0001); - std::size_t i0010 = INDEX(i0010, 0B0010); + std::size_t i0010 = i0000 | rev_wire_shifts(1); ComplexT v0010 = arr(i0010); - std::size_t i0011 = INDEX(i0011, 0B0011); + std::size_t i0011 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1); ComplexT v0011 = arr(i0011); - std::size_t i0100 = INDEX(i0100, 0B0100); + std::size_t i0100 = i0000 | rev_wire_shifts(2); ComplexT v0100 = arr(i0100); - std::size_t i0101 = INDEX(i0101, 0B0101); + std::size_t i0101 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(2); ComplexT v0101 = arr(i0101); - std::size_t i0110 = INDEX(i0110, 0B0110); + std::size_t i0110 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(2); ComplexT v0110 = arr(i0110); - std::size_t i0111 = INDEX(i0111, 0B0111); + std::size_t i0111 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2); ComplexT v0111 = arr(i0111); - std::size_t i1000 = INDEX(i1000, 0B1000); + std::size_t i1000 = i0000 | rev_wire_shifts(3); ComplexT v1000 = arr(i1000); - std::size_t i1001 = INDEX(i1001, 0B1001); + std::size_t i1001 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(3); ComplexT v1001 = arr(i1001); - std::size_t i1010 = INDEX(i1010, 0B1010); + std::size_t i1010 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(3); ComplexT v1010 = arr(i1010); - std::size_t i1011 = INDEX(i1011, 0B1011); + std::size_t i1011 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3); ComplexT v1011 = arr(i1011); - std::size_t i1100 = INDEX(i1100, 0B1100); + std::size_t i1100 = i0000 | rev_wire_shifts(2) | rev_wire_shifts(3); ComplexT v1100 = arr(i1100); - std::size_t i1101 = INDEX(i1101, 0B1101); + std::size_t i1101 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3); ComplexT v1101 = arr(i1101); - std::size_t i1110 = INDEX(i1110, 0B1110); + std::size_t i1110 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3); ComplexT v1110 = arr(i1110); - std::size_t i1111 = INDEX(i1111, 0B1111); + std::size_t i1111 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3); ComplexT v1111 = arr(i1111); arr(i0000) = GATESUM4(0B0000); @@ -1780,8 +1795,10 @@ template struct apply5QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; const std::size_t n_wires = 5; - const std::size_t dim = static_cast(1U) << n_wires; + const std::size_t dim = one << n_wires; std::size_t num_qubits; apply5QubitOpFunctor(KokkosComplexVector &arr_, std::size_t num_qubits_, @@ -1795,74 +1812,95 @@ template struct apply5QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k) const { - const std::size_t kdim = k * dim; - std::size_t i00000 = INDEX(i00000, 0B00000); + std::size_t i00000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i00000 |= ((k << i) & parity(i)); + } ComplexT v00000 = arr(i00000); - std::size_t i00001 = INDEX(i00001, 0B00001); + + std::size_t i00001 = i00000 | rev_wire_shifts(0); ComplexT v00001 = arr(i00001); - std::size_t i00010 = INDEX(i00010, 0B00010); + std::size_t i00010 = i00000 | rev_wire_shifts(1); ComplexT v00010 = arr(i00010); - std::size_t i00011 = INDEX(i00011, 0B00011); + std::size_t i00011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1); ComplexT v00011 = arr(i00011); - std::size_t i00100 = INDEX(i00100, 0B00100); + std::size_t i00100 = i00000 | rev_wire_shifts(2); ComplexT v00100 = arr(i00100); - std::size_t i00101 = INDEX(i00101, 0B00101); + std::size_t i00101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2); ComplexT v00101 = arr(i00101); - std::size_t i00110 = INDEX(i00110, 0B00110); + std::size_t i00110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2); ComplexT v00110 = arr(i00110); - std::size_t i00111 = INDEX(i00111, 0B00111); + std::size_t i00111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2); ComplexT v00111 = arr(i00111); - std::size_t i01000 = INDEX(i01000, 0B01000); + std::size_t i01000 = i00000 | rev_wire_shifts(3); ComplexT v01000 = arr(i01000); - std::size_t i01001 = INDEX(i01001, 0B01001); + std::size_t i01001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(3); ComplexT v01001 = arr(i01001); - std::size_t i01010 = INDEX(i01010, 0B01010); + std::size_t i01010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(3); ComplexT v01010 = arr(i01010); - std::size_t i01011 = INDEX(i01011, 0B01011); + std::size_t i01011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3); ComplexT v01011 = arr(i01011); - std::size_t i01100 = INDEX(i01100, 0B01100); + std::size_t i01100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(3); ComplexT v01100 = arr(i01100); - std::size_t i01101 = INDEX(i01101, 0B01101); + std::size_t i01101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3); ComplexT v01101 = arr(i01101); - std::size_t i01110 = INDEX(i01110, 0B01110); + std::size_t i01110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3); ComplexT v01110 = arr(i01110); - std::size_t i01111 = INDEX(i01111, 0B01111); + std::size_t i01111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3); ComplexT v01111 = arr(i01111); - std::size_t i10000 = INDEX(i10000, 0B10000); + std::size_t i10000 = i00000 | rev_wire_shifts(4); ComplexT v10000 = arr(i10000); - std::size_t i10001 = INDEX(i10001, 0B10001); + std::size_t i10001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(4); ComplexT v10001 = arr(i10001); - std::size_t i10010 = INDEX(i10010, 0B10010); + std::size_t i10010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(4); ComplexT v10010 = arr(i10010); - std::size_t i10011 = INDEX(i10011, 0B10011); + std::size_t i10011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(4); ComplexT v10011 = arr(i10011); - std::size_t i10100 = INDEX(i10100, 0B10100); + std::size_t i10100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(4); ComplexT v10100 = arr(i10100); - std::size_t i10101 = INDEX(i10101, 0B10101); + std::size_t i10101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(4); ComplexT v10101 = arr(i10101); - std::size_t i10110 = INDEX(i10110, 0B10110); + std::size_t i10110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(4); ComplexT v10110 = arr(i10110); - std::size_t i10111 = INDEX(i10111, 0B10111); + std::size_t i10111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(4); ComplexT v10111 = arr(i10111); - std::size_t i11000 = INDEX(i11000, 0B11000); + std::size_t i11000 = i00000 | rev_wire_shifts(3) | rev_wire_shifts(4); ComplexT v11000 = arr(i11000); - std::size_t i11001 = INDEX(i11001, 0B11001); + std::size_t i11001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(3) | + rev_wire_shifts(4); ComplexT v11001 = arr(i11001); - std::size_t i11010 = INDEX(i11010, 0B11010); + std::size_t i11010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(3) | + rev_wire_shifts(4); ComplexT v11010 = arr(i11010); - std::size_t i11011 = INDEX(i11011, 0B11011); + std::size_t i11011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3) | rev_wire_shifts(4); ComplexT v11011 = arr(i11011); - std::size_t i11100 = INDEX(i11100, 0B11100); + std::size_t i11100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(3) | + rev_wire_shifts(4); ComplexT v11100 = arr(i11100); - std::size_t i11101 = INDEX(i11101, 0B11101); + std::size_t i11101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3) | rev_wire_shifts(4); ComplexT v11101 = arr(i11101); - std::size_t i11110 = INDEX(i11110, 0B11110); + std::size_t i11110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3) | rev_wire_shifts(4); ComplexT v11110 = arr(i11110); - std::size_t i11111 = INDEX(i11111, 0B11111); + std::size_t i11111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3) | + rev_wire_shifts(4); ComplexT v11111 = arr(i11111); arr(i00000) = GATESUM5(0B00000); diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/tests/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/tests/CMakeLists.txt index 067c89b0d5..51ea141c06 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/tests/CMakeLists.txt +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/gates/tests/CMakeLists.txt @@ -20,6 +20,7 @@ target_link_libraries(lightning_kokkos_gates_tests INTERFACE Catch2::Catch2 lightning_kokkos_measurements lightning_kokkos_observables lightning_kokkos + lightning_kokkos_utils ) ProcessTestOptions(lightning_kokkos_gates_tests) diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp index c9f2b0b0e7..fdf8ec124a 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/measurements/ExpValFunctors.hpp @@ -12,18 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once - #include #include "BitUtil.hpp" +#include "BitUtilKokkos.hpp" /// @cond DEV namespace { using namespace Pennylane::Util; +using Pennylane::LightningKokkos::Util::one; +using Pennylane::LightningKokkos::Util::wires2Parity; } // namespace /// @endcond namespace Pennylane::LightningKokkos::Functors { + template struct getExpectationValueIdentityFunctor { Kokkos::View *> arr; @@ -174,6 +177,8 @@ template struct getExpValMultiQubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; std::size_t dim; std::size_t num_qubits; @@ -187,33 +192,36 @@ template struct getExpValMultiQubitOpFunctor { Kokkos::resize(wires, wires_.size()); Kokkos::deep_copy(wires, wires_host); - dim = static_cast(1U) << wires_.size(); + dim = one << wires_.size(); num_qubits = num_qubits_; arr = arr_; matrix = matrix_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const MemberType &teamMember, PrecisionT &expval) const { - const std::size_t k = teamMember.league_rank() * dim; + const std::size_t k = teamMember.league_rank(); PrecisionT tempExpVal = 0.0; ScratchViewComplex coeffs_in(teamMember.team_scratch(0), dim); if (teamMember.team_rank() == 0) { - Kokkos::parallel_for( - Kokkos::ThreadVectorRange(teamMember, dim), - [&](const std::size_t inner_idx) { - std::size_t idx = k | inner_idx; - const std::size_t n_wires = wires.size(); - for (std::size_t pos = 0; pos < n_wires; pos++) { - std::size_t x = - ((idx >> (n_wires - pos - 1)) ^ - (idx >> (num_qubits - wires(pos) - 1))) & - 1U; - idx = idx ^ ((x << (n_wires - pos - 1)) | - (x << (num_qubits - wires(pos) - 1))); - } - coeffs_in(inner_idx) = arr(idx); - }); + std::size_t idx = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + idx |= ((k << i) & parity(i)); + } + coeffs_in(0) = arr(idx); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember, 1, dim), + [&](const std::size_t inner_idx) { + std::size_t index = idx; + for (std::size_t i = 0; i < wires.size(); + i++) { + if ((inner_idx & (one << i)) != 0) { + index |= rev_wire_shifts(i); + } + } + coeffs_in(inner_idx) = arr(index); + }); } teamMember.team_barrier(); Kokkos::parallel_reduce( @@ -273,7 +281,8 @@ template struct getExpVal1QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; const std::size_t n_wires = 1; - const std::size_t dim = static_cast(1U) << n_wires; + + const std::size_t dim = one << n_wires; std::size_t num_qubits; std::size_t rev_wire; std::size_t rev_wire_shift; @@ -321,7 +330,8 @@ template struct getExpVal2QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; const std::size_t n_wires = 2; - const std::size_t dim = static_cast(1U) << n_wires; + + const std::size_t dim = one << n_wires; std::size_t num_qubits; std::size_t rev_wire0; std::size_t rev_wire1; @@ -376,15 +386,6 @@ template struct getExpVal2QubitOpFunctor { EXPVALTERM3(xx, 0B010, i010) + EXPVALTERM3(xx, 0B011, i011) + \ EXPVALTERM3(xx, 0B100, i100) + EXPVALTERM3(xx, 0B101, i101) + \ EXPVALTERM3(xx, 0B110, i110) + EXPVALTERM3(xx, 0B111, i111)) -#define INDEX(ivar, xx) \ - kdim | xx; \ - for (std::size_t pos = 0; pos < n_wires; pos++) { \ - std::size_t x = ((ivar >> (n_wires - pos - 1)) ^ \ - (ivar >> (num_qubits - wires(pos) - 1))) & \ - 1U; \ - ivar = ivar ^ ((x << (n_wires - pos - 1)) | \ - (x << (num_qubits - wires(pos) - 1))); \ - } template struct getExpVal3QubitOpFunctor { using ComplexT = Kokkos::complex; @@ -394,8 +395,12 @@ template struct getExpVal3QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; + const std::size_t n_wires = 3; - const std::size_t dim = static_cast(1U) << n_wires; + + const std::size_t dim = one << n_wires; std::size_t num_qubits; getExpVal3QubitOpFunctor(const KokkosComplexVector &arr_, @@ -411,20 +416,24 @@ template struct getExpVal3QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k, PrecisionT &expval) const { - const std::size_t kdim = k * dim; - std::size_t i000 = INDEX(i000, 0B000); - std::size_t i001 = INDEX(i001, 0B001); - std::size_t i010 = INDEX(i010, 0B010); - std::size_t i011 = INDEX(i011, 0B011); - std::size_t i100 = INDEX(i100, 0B100); - std::size_t i101 = INDEX(i101, 0B101); - std::size_t i110 = INDEX(i110, 0B110); - std::size_t i111 = INDEX(i111, 0B111); + std::size_t i000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i000 |= ((k << i) & parity(i)); + } + std::size_t i001 = i000 | rev_wire_shifts(0); + std::size_t i010 = i000 | rev_wire_shifts(1); + std::size_t i011 = i000 | rev_wire_shifts(0) | rev_wire_shifts(1); + std::size_t i100 = i000 | rev_wire_shifts(2); + std::size_t i101 = i000 | rev_wire_shifts(0) | rev_wire_shifts(2); + std::size_t i110 = i000 | rev_wire_shifts(1) | rev_wire_shifts(2); + std::size_t i111 = + i000 | rev_wire_shifts(0) | rev_wire_shifts(1) | rev_wire_shifts(2); expval += real(EXPVAL3(i000, 0B000)); expval += real(EXPVAL3(i001, 0B001)); expval += real(EXPVAL3(i010, 0B010)); @@ -457,8 +466,12 @@ template struct getExpVal4QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; + const std::size_t n_wires = 4; - const std::size_t dim = static_cast(1U) << n_wires; + + const std::size_t dim = one << n_wires; std::size_t num_qubits; getExpVal4QubitOpFunctor(const KokkosComplexVector &arr_, @@ -473,28 +486,36 @@ template struct getExpVal4QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k, PrecisionT &expval) const { - const std::size_t kdim = k * dim; - - std::size_t i0000 = INDEX(i0000, 0B0000); - std::size_t i0001 = INDEX(i0001, 0B0001); - std::size_t i0010 = INDEX(i0010, 0B0010); - std::size_t i0011 = INDEX(i0011, 0B0011); - std::size_t i0100 = INDEX(i0100, 0B0100); - std::size_t i0101 = INDEX(i0101, 0B0101); - std::size_t i0110 = INDEX(i0110, 0B0110); - std::size_t i0111 = INDEX(i0111, 0B0111); - std::size_t i1000 = INDEX(i1000, 0B1000); - std::size_t i1001 = INDEX(i1001, 0B1001); - std::size_t i1010 = INDEX(i1010, 0B1010); - std::size_t i1011 = INDEX(i1011, 0B1011); - std::size_t i1100 = INDEX(i1100, 0B1100); - std::size_t i1101 = INDEX(i1101, 0B1101); - std::size_t i1110 = INDEX(i1110, 0B1110); - std::size_t i1111 = INDEX(i1111, 0B1111); + std::size_t i0000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i0000 |= ((k << i) & parity(i)); + } + + std::size_t i0001 = i0000 | rev_wire_shifts(0); + std::size_t i0010 = i0000 | rev_wire_shifts(1); + std::size_t i0011 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1); + std::size_t i0100 = i0000 | rev_wire_shifts(2); + std::size_t i0101 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(2); + std::size_t i0110 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(2); + std::size_t i0111 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2); + std::size_t i1000 = i0000 | rev_wire_shifts(3); + std::size_t i1001 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(3); + std::size_t i1010 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(3); + std::size_t i1011 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3); + std::size_t i1100 = i0000 | rev_wire_shifts(2) | rev_wire_shifts(3); + std::size_t i1101 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3); + std::size_t i1110 = i0000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3); + std::size_t i1111 = i0000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3); expval += real(EXPVAL4(i0000, 0B0000)); expval += real(EXPVAL4(i0001, 0B0001)); @@ -544,8 +565,12 @@ template struct getExpVal5QubitOpFunctor { KokkosComplexVector arr; KokkosComplexVector matrix; KokkosIntVector wires; + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; + const std::size_t n_wires = 5; - const std::size_t dim = static_cast(1U) << n_wires; + + const std::size_t dim = one << n_wires; std::size_t num_qubits; getExpVal5QubitOpFunctor(const KokkosComplexVector &arr_, @@ -560,44 +585,64 @@ template struct getExpVal5QubitOpFunctor { arr = arr_; matrix = matrix_; num_qubits = num_qubits_; + std::tie(parity, rev_wire_shifts) = wires2Parity(num_qubits_, wires_); } KOKKOS_INLINE_FUNCTION void operator()(const std::size_t k, PrecisionT &expval) const { - const std::size_t kdim = k * dim; - - std::size_t i00000 = INDEX(i00000, 0B00000); - std::size_t i00001 = INDEX(i00001, 0B00001); - std::size_t i00010 = INDEX(i00010, 0B00010); - std::size_t i00011 = INDEX(i00011, 0B00011); - std::size_t i00100 = INDEX(i00100, 0B00100); - std::size_t i00101 = INDEX(i00101, 0B00101); - std::size_t i00110 = INDEX(i00110, 0B00110); - std::size_t i00111 = INDEX(i00111, 0B00111); - std::size_t i01000 = INDEX(i01000, 0B01000); - std::size_t i01001 = INDEX(i01001, 0B01001); - std::size_t i01010 = INDEX(i01010, 0B01010); - std::size_t i01011 = INDEX(i01011, 0B01011); - std::size_t i01100 = INDEX(i01100, 0B01100); - std::size_t i01101 = INDEX(i01101, 0B01101); - std::size_t i01110 = INDEX(i01110, 0B01110); - std::size_t i01111 = INDEX(i01111, 0B01111); - std::size_t i10000 = INDEX(i10000, 0B10000); - std::size_t i10001 = INDEX(i10001, 0B10001); - std::size_t i10010 = INDEX(i10010, 0B10010); - std::size_t i10011 = INDEX(i10011, 0B10011); - std::size_t i10100 = INDEX(i10100, 0B10100); - std::size_t i10101 = INDEX(i10101, 0B10101); - std::size_t i10110 = INDEX(i10110, 0B10110); - std::size_t i10111 = INDEX(i10111, 0B10111); - std::size_t i11000 = INDEX(i11000, 0B11000); - std::size_t i11001 = INDEX(i11001, 0B11001); - std::size_t i11010 = INDEX(i11010, 0B11010); - std::size_t i11011 = INDEX(i11011, 0B11011); - std::size_t i11100 = INDEX(i11100, 0B11100); - std::size_t i11101 = INDEX(i11101, 0B11101); - std::size_t i11110 = INDEX(i11110, 0B11110); - std::size_t i11111 = INDEX(i11111, 0B11111); + std::size_t i00000 = (k & parity(0)); + for (std::size_t i = 1; i < parity.size(); i++) { + i00000 |= ((k << i) & parity(i)); + } + + std::size_t i00001 = i00000 | rev_wire_shifts(0); + std::size_t i00010 = i00000 | rev_wire_shifts(1); + std::size_t i00011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1); + std::size_t i00100 = i00000 | rev_wire_shifts(2); + std::size_t i00101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2); + std::size_t i00110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2); + std::size_t i00111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2); + std::size_t i01000 = i00000 | rev_wire_shifts(3); + std::size_t i01001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(3); + std::size_t i01010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(3); + std::size_t i01011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3); + std::size_t i01100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(3); + std::size_t i01101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3); + std::size_t i01110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3); + std::size_t i01111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3); + std::size_t i10000 = i00000 | rev_wire_shifts(4); + std::size_t i10001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(4); + std::size_t i10010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(4); + std::size_t i10011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(4); + std::size_t i10100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(4); + std::size_t i10101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(4); + std::size_t i10110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(4); + std::size_t i10111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(4); + std::size_t i11000 = i00000 | rev_wire_shifts(3) | rev_wire_shifts(4); + std::size_t i11001 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(3) | + rev_wire_shifts(4); + std::size_t i11010 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(3) | + rev_wire_shifts(4); + std::size_t i11011 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(3) | rev_wire_shifts(4); + std::size_t i11100 = i00000 | rev_wire_shifts(2) | rev_wire_shifts(3) | + rev_wire_shifts(4); + std::size_t i11101 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(2) | + rev_wire_shifts(3) | rev_wire_shifts(4); + std::size_t i11110 = i00000 | rev_wire_shifts(1) | rev_wire_shifts(2) | + rev_wire_shifts(3) | rev_wire_shifts(4); + std::size_t i11111 = i00000 | rev_wire_shifts(0) | rev_wire_shifts(1) | + rev_wire_shifts(2) | rev_wire_shifts(3) | + rev_wire_shifts(4); expval += real(EXPVAL5(i00000, 0B00000)); expval += real(EXPVAL5(i00001, 0B00001)); diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/utils/BitUtilKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/utils/BitUtilKokkos.hpp new file mode 100644 index 0000000000..1721c1cb3e --- /dev/null +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/utils/BitUtilKokkos.hpp @@ -0,0 +1,72 @@ +// Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file + * Defines utility functions for Bitwise operations. + */ +#pragma once +#include + +#include "BitUtil.hpp" + +/// @cond DEV +namespace { +using namespace Pennylane::Util; +using KokkosIntVector = Kokkos::View; +} // namespace +/// @endcond + +namespace Pennylane::LightningKokkos::Util { + +constexpr std::size_t one{1}; + +/** + * @brief Compute the parities and shifts for multi-qubit operations. + * + * @param num_qubits Number of qubits in the state vector. + * @param wires List of target wires. + * @return std::pair Parities and shifts for + * multi-qubit operations. + */ +inline auto wires2Parity(const std::size_t num_qubits, + const std::vector &wires) + -> std::pair { + KokkosIntVector parity; + KokkosIntVector rev_wire_shifts; + + std::vector rev_wires_(wires.size()); + std::vector rev_wire_shifts_(wires.size()); + for (std::size_t k = 0; k < wires.size(); k++) { + rev_wires_[k] = (num_qubits - 1) - wires[(wires.size() - 1) - k]; + rev_wire_shifts_[k] = (one << rev_wires_[k]); + } + const std::vector parity_ = revWireParity(rev_wires_); + + Kokkos::View> + rev_wire_shifts_host(rev_wire_shifts_.data(), rev_wire_shifts_.size()); + Kokkos::resize(rev_wire_shifts, rev_wire_shifts_host.size()); + Kokkos::deep_copy(rev_wire_shifts, rev_wire_shifts_host); + + Kokkos::View> + parity_host(parity_.data(), parity_.size()); + Kokkos::resize(parity, parity_host.size()); + Kokkos::deep_copy(parity, parity_host); + + return {parity, rev_wire_shifts}; +} + +} // namespace Pennylane::LightningKokkos::Util \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/AssignKernelMap_Default.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/AssignKernelMap_Default.cpp index b26e4062e4..4e404d4d76 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/AssignKernelMap_Default.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/AssignKernelMap_Default.cpp @@ -121,10 +121,10 @@ void assignKernelsForGateOp_Default() { /* Three-qubit gates */ instance.assignKernelForOp(GateOperation::Toffoli, all_threading, all_memory_model, all_qubit_numbers, - KernelType::PI); + KernelType::LM); instance.assignKernelForOp(GateOperation::CSWAP, all_threading, all_memory_model, all_qubit_numbers, - KernelType::PI); + KernelType::LM); /* QChem gates */ instance.assignKernelForOp(GateOperation::SingleExcitation, all_threading, @@ -138,13 +138,13 @@ void assignKernelsForGateOp_Default() { all_qubit_numbers, KernelType::LM); instance.assignKernelForOp(GateOperation::DoubleExcitation, all_threading, all_memory_model, all_qubit_numbers, - KernelType::PI); + KernelType::LM); instance.assignKernelForOp(GateOperation::DoubleExcitationPlus, all_threading, all_memory_model, - all_qubit_numbers, KernelType::PI); + all_qubit_numbers, KernelType::LM); instance.assignKernelForOp(GateOperation::DoubleExcitationMinus, all_threading, all_memory_model, - all_qubit_numbers, KernelType::PI); + all_qubit_numbers, KernelType::LM); /* Multi-qubit gates */ instance.assignKernelForOp(GateOperation::MultiRZ, all_threading, @@ -226,6 +226,6 @@ void assignKernelsForMatrixOp_Default() { KernelType::LM); instance.assignKernelForOp(MatrixOperation::MultiQubitOp, all_threading, all_memory_model, all_qubit_numbers, - KernelType::PI); + KernelType::LM); } } // namespace Pennylane::LightningQubit::KernelMap::Internal diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp index 58cd4a98f8..4df61f2c91 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp @@ -51,79 +51,31 @@ class GateImplementationsLM : public PauliGenerator { private: /* Alias utility functions */ static std::pair revWireParity(size_t rev_wire) { - using Pennylane::Util::fillLeadingOnes; - using Pennylane::Util::fillTrailingOnes; - - const size_t parity_low = fillTrailingOnes(rev_wire); - const size_t parity_high = fillLeadingOnes(rev_wire + 1); - return {parity_high, parity_low}; + const auto parity = Pennylane::Util::revWireParity( + std::array{rev_wire}); + return {parity[1], parity[0]}; } - static std::tuple revWireParity(size_t rev_wire0, size_t rev_wire1) { - using Pennylane::Util::fillLeadingOnes; - using Pennylane::Util::fillTrailingOnes; - - const size_t rev_wire_min = std::min(rev_wire0, rev_wire1); - const size_t rev_wire_max = std::max(rev_wire0, rev_wire1); - - const size_t parity_low = fillTrailingOnes(rev_wire_min); - const size_t parity_high = fillLeadingOnes(rev_wire_max + 1); - const size_t parity_middle = - fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max); - - return {parity_high, parity_middle, parity_low}; + const auto parity = Pennylane::Util::revWireParity( + std::array{rev_wire0, rev_wire1}); + return {parity[2], parity[1], parity[0]}; } - template static constexpr auto revWireParity(size_t rev_wire0, size_t rev_wire1, size_t rev_wire2) -> std::array { - using Pennylane::Util::fillLeadingOnes; - using Pennylane::Util::fillTrailingOnes; - - std::array rev_wire{rev_wire0, rev_wire1, rev_wire2}; - - std::sort(rev_wire.begin(), rev_wire.end()); - - const size_t parity_size = rev_wire.size() + 1; - - std::array parity{}; - - parity[0] = fillTrailingOnes(rev_wire[0]); - parity[1] = - fillLeadingOnes(rev_wire[0] + 1) & fillTrailingOnes(rev_wire[1]); - parity[2] = - fillLeadingOnes(rev_wire[1] + 1) & fillTrailingOnes(rev_wire[2]); - parity[3] = fillLeadingOnes(rev_wire[2] + 1); - - return parity; + return Pennylane::Util::revWireParity( + std::array{rev_wire0, rev_wire1, + rev_wire2}); } template static constexpr auto revWireParity(size_t rev_wire0, size_t rev_wire1, size_t rev_wire2, size_t rev_wire3) -> std::array { - using Pennylane::Util::fillLeadingOnes; - using Pennylane::Util::fillTrailingOnes; - - std::array rev_wire{rev_wire0, rev_wire1, rev_wire2, - rev_wire3}; - - std::sort(rev_wire.begin(), rev_wire.end()); - - const size_t parity_size = rev_wire.size() + 1; - std::array parity{}; - - parity[0] = fillTrailingOnes(rev_wire[0]); - parity[1] = - fillLeadingOnes(rev_wire[0] + 1) & fillTrailingOnes(rev_wire[1]); - parity[2] = - fillLeadingOnes(rev_wire[1] + 1) & fillTrailingOnes(rev_wire[2]); - parity[3] = - fillLeadingOnes(rev_wire[2] + 1) & fillTrailingOnes(rev_wire[3]); - parity[4] = fillLeadingOnes(rev_wire[3] + 1); - - return parity; + return Pennylane::Util::revWireParity( + std::array{rev_wire0, rev_wire1, rev_wire2, + rev_wire3}); } public: @@ -241,6 +193,7 @@ class GateImplementationsLM : public PauliGenerator { } } } + /** * @brief Apply a two qubit gate to the statevector. * @@ -328,29 +281,45 @@ class GateImplementationsLM : public PauliGenerator { template static void - applyMultiQubitOp(std::complex *arr, size_t num_qubits, + applyMultiQubitOp(std::complex *arr, std::size_t num_qubits, const std::complex *matrix, - const std::vector &wires, bool inverse) { + const std::vector &wires, bool inverse) { using Pennylane::Util::bitswap; + constexpr std::size_t one{1}; PL_ASSERT(num_qubits >= wires.size()); - const size_t dim = static_cast(1U) << wires.size(); - std::vector indices(dim); + const std::size_t dim = one << wires.size(); + std::vector indices(dim); std::vector> coeffs_in(dim, 0.0); + std::vector rev_wires(wires.size()); + std::vector rev_wire_shifts(wires.size()); + for (std::size_t k = 0; k < wires.size(); k++) { + rev_wires[k] = (num_qubits - 1) - wires[(wires.size() - 1) - k]; + rev_wire_shifts[k] = (one << rev_wires[k]); + } + const std::vector parity = + Pennylane::Util::revWireParity(rev_wires); + PL_ASSERT(wires.size() == parity.size() - 1); + if (inverse) { - for (size_t k = 0; k < exp2(num_qubits); k += dim) { - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - size_t idx = k | inner_idx; - const size_t n_wires = wires.size(); - for (size_t pos = 0; pos < n_wires; pos++) { - idx = bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); + for (std::size_t k = 0; k < exp2(num_qubits - wires.size()); k++) { + std::size_t idx = (k & parity[0]); + for (std::size_t i = 1; i < parity.size(); i++) { + idx |= ((k << i) & parity[i]); + } + indices[0] = idx; + coeffs_in[0] = arr[idx]; + for (std::size_t inner_idx = 1; inner_idx < dim; inner_idx++) { + idx = indices[0]; + for (std::size_t i = 0; i < wires.size(); i++) { + if ((inner_idx & (one << i)) != 0) { + idx |= rev_wire_shifts[i]; + } } indices[inner_idx] = idx; coeffs_in[inner_idx] = arr[idx]; } - for (size_t i = 0; i < dim; i++) { const auto idx = indices[i]; arr[idx] = 0.0; @@ -363,25 +332,29 @@ class GateImplementationsLM : public PauliGenerator { } } } else { - for (size_t k = 0; k < exp2(num_qubits); k += dim) { - for (size_t inner_idx = 0; inner_idx < dim; inner_idx++) { - size_t idx = k | inner_idx; - const size_t n_wires = wires.size(); - for (size_t pos = 0; pos < n_wires; pos++) { - idx = bitswap(idx, n_wires - pos - 1, - num_qubits - wires[pos] - 1); + for (std::size_t k = 0; k < exp2(num_qubits - wires.size()); k++) { + std::size_t idx = (k & parity[0]); + for (std::size_t i = 1; i < parity.size(); i++) { + idx |= ((k << i) & parity[i]); + } + indices[0] = idx; + coeffs_in[0] = arr[idx]; + for (std::size_t inner_idx = 1; inner_idx < dim; inner_idx++) { + idx = indices[0]; + for (std::size_t i = 0; i < wires.size(); i++) { + if ((inner_idx & (one << i)) != 0) { + idx |= rev_wire_shifts[i]; + } } indices[inner_idx] = idx; coeffs_in[inner_idx] = arr[idx]; } - - for (size_t i = 0; i < dim; i++) { - const auto idx = indices[i]; - arr[idx] = 0.0; - const size_t base_idx = i * dim; - - for (size_t j = 0; j < dim; j++) { - arr[idx] += matrix[base_idx + j] * coeffs_in[j]; + for (std::size_t i = 0; i < dim; i++) { + const auto index = indices[i]; + arr[index] = 0.0; + const std::size_t base_idx = i * dim; + for (std::size_t j = 0; j < dim; j++) { + arr[index] += matrix[base_idx + j] * coeffs_in[j]; } } } diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp index ed8b104d94..b0226ce28e 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp @@ -110,7 +110,7 @@ TEST_CASE("Test several limiting cases of default kernels", "[KernelMap]") { OperationKernelMap::getInstance(); SECTION("Single thread, large number of qubits") { // For large N, single thread calls "LM" for all single- and two-qubit - // gates. For k-qubit gates with k >= 3, we use PI. + // gates. auto gate_map = instance.getKernelMap(28, Threading::SingleThread, CPUMemoryModel::Unaligned); for_each_enum( @@ -123,9 +123,6 @@ TEST_CASE("Test several limiting cases of default kernels", "[KernelMap]") { gate_op) <= 2) { REQUIRE(gate_map[gate_op] == Pennylane::Gates::KernelType::LM); - } else { - REQUIRE(gate_map[gate_op] == - Pennylane::Gates::KernelType::PI); } }); } diff --git a/pennylane_lightning/core/src/utils/BitUtil.hpp b/pennylane_lightning/core/src/utils/BitUtil.hpp index 4486916d9d..15f0c470eb 100644 --- a/pennylane_lightning/core/src/utils/BitUtil.hpp +++ b/pennylane_lightning/core/src/utils/BitUtil.hpp @@ -17,9 +17,12 @@ * Defines utility functions for Bitwise operations. */ #pragma once +#include // sort +#include #include // countr_zero, popcount, has_single_bit #include // CHAR_BIT #include // size_t +#include namespace Pennylane::Util { /** @@ -83,4 +86,40 @@ inline auto constexpr bitswap(size_t bits, const size_t i, const size_t j) return bits ^ ((x << i) | (x << j)); } +/** + * @brief Return integers with leading/trailing ones at positions specified by a + * list of target wires. + * + * @param wire_list Target wires. + */ +inline auto revWireParity(const std::vector &wire_list) + -> std::vector { + const std::size_t wire_size = wire_list.size(); + auto rev_wire = wire_list; + std::sort(rev_wire.begin(), rev_wire.end()); + std::vector parity(wire_size + 1); + parity[0] = fillTrailingOnes(rev_wire[0]); + for (std::size_t i = 1; i < wire_size; i++) { + parity[i] = fillLeadingOnes(rev_wire[i - 1] + 1) & + fillTrailingOnes(rev_wire[i]); + } + parity[wire_size] = fillLeadingOnes(rev_wire[wire_size - 1] + 1); + return parity; +} + +template +inline auto revWireParity(const std::array &wire_list) + -> std::array { + auto rev_wire = wire_list; + std::sort(rev_wire.begin(), rev_wire.end()); + std::array parity{}; + parity[0] = fillTrailingOnes(rev_wire[0]); + for (std::size_t i = 1; i < wire_size; i++) { + parity[i] = fillLeadingOnes(rev_wire[i - 1] + 1) & + fillTrailingOnes(rev_wire[i]); + } + parity[wire_size] = fillLeadingOnes(rev_wire[wire_size - 1] + 1); + return parity; +} + } // namespace Pennylane::Util \ No newline at end of file diff --git a/setup.py b/setup.py index 2f76a3a047..137113c641 100644 --- a/setup.py +++ b/setup.py @@ -164,6 +164,7 @@ def build_extension(self, ext: CMakeExtension): env=os.environ, ) + with open(os.path.join("pennylane_lightning", "core", "_version.py"), encoding="utf-8") as f: version = f.readlines()[-1].split()[-1].strip("\"'") @@ -171,19 +172,19 @@ def build_extension(self, ext: CMakeExtension): "pennylane>=0.32", ] -packages_list = ['pennylane_lightning.'+backend] +packages_list = ["pennylane_lightning." + backend] if backend == "lightning_qubit": - packages_list += ['pennylane_lightning.core'] + packages_list += ["pennylane_lightning.core"] else: - requirements += ["pennylane_lightning=="+version] + requirements += ["pennylane_lightning==" + version] suffix = backend.replace("lightning_", "") suffix = suffix[0].upper() + suffix[1:] pennylane_plugins = [device_name + " = pennylane_lightning." + backend + ":Lightning" + suffix] -pkg_suffix = "" if suffix == "Qubit" else "_"+suffix +pkg_suffix = "" if suffix == "Qubit" else "_" + suffix info = { "name": f"PennyLane_Lightning{pkg_suffix}", @@ -207,13 +208,16 @@ def build_extension(self, ext: CMakeExtension): } if backend == "lightning_qubit": - info = info | { - "package_data": { - 'pennylane_lightning.core': [ - os.path.join("src", "*"), - os.path.join("src", "**", "*"), - ] - },} + info.update( + { + "package_data": { + "pennylane_lightning.core": [ + os.path.join("src", "*"), + os.path.join("src", "**", "*"), + ] + }, + } + ) classifiers = [ "Development Status :: 4 - Beta", diff --git a/tests/test_expval.py b/tests/test_expval.py index 9041eb363a..60dab62e04 100644 --- a/tests/test_expval.py +++ b/tests/test_expval.py @@ -14,6 +14,7 @@ """ Unit tests for the expval method of Lightning devices. """ +import itertools import pytest from conftest import THETA, PHI, VARPHI @@ -110,20 +111,25 @@ def test_hermitian_expectation(self, n_wires, theta, phi, qubit_device, tol): m = 2**n_wires U = np.random.rand(m, m) + 1j * np.random.rand(m, m) U = U + np.conj(U.T) - obs = qml.Hermitian(U, wires=range(n_wires)) + wires = list(range((n_qubits - n_wires), (n_qubits - n_wires) + n_wires)) + perms = list(itertools.permutations(wires)) init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) - - def circuit(): - qml.StatePrep(init_state, wires=range(n_qubits)) - qml.RY(theta, wires=[0]) - qml.RY(phi, wires=[1]) - qml.CNOT(wires=[0, 1]) - return qml.expval(obs) - - circ = qml.QNode(circuit, dev) - circ_def = qml.QNode(circuit, dev_def) - assert np.allclose(circ(), circ_def(), tol) + if n_wires > 4: + perms = perms[0::30] + for perm in perms: + obs = qml.Hermitian(U, wires=perm) + + def circuit(): + qml.StatePrep(init_state, wires=range(n_qubits)) + qml.RY(theta, wires=[0]) + qml.RY(phi, wires=[1]) + qml.CNOT(wires=[0, 1]) + return qml.expval(obs) + + circ = qml.QNode(circuit, dev) + circ_def = qml.QNode(circuit, dev_def) + assert np.allclose(circ(), circ_def(), tol) @pytest.mark.parametrize("diff_method", ("parameter-shift", "adjoint")) diff --git a/tests/test_gates.py b/tests/test_gates.py index d3d33b8ade..8839b1667d 100644 --- a/tests/test_gates.py +++ b/tests/test_gates.py @@ -240,15 +240,11 @@ def test_get_diagonalizing_gates(obs, has_rotation): @pytest.mark.parametrize("theta,phi", list(zip(THETA, PHI))) -@pytest.mark.parametrize("n_wires", range(1, 7)) -def test_qubit_unitary(n_wires, theta, phi, tol): +def test_qubit_RY(theta, phi, tol): """Test that Hadamard expectation value is correct""" - n_qubits = 10 + n_qubits = 4 dev_def = qml.device("default.qubit", wires=n_qubits) dev = qml.device(device_name, wires=n_qubits) - m = 2**n_wires - U = np.random.rand(m, m) + 1j * np.random.rand(m, m) - U, _ = np.linalg.qr(U) init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) @@ -256,10 +252,39 @@ def circuit(): qml.StatePrep(init_state, wires=range(n_qubits)) qml.RY(theta, wires=[0]) qml.RY(phi, wires=[1]) - qml.CNOT(wires=[0, 1]) - qml.QubitUnitary(U, wires=range(2, 2 + n_wires)) return qml.state() circ = qml.QNode(circuit, dev) circ_def = qml.QNode(circuit, dev_def) assert np.allclose(circ(), circ_def(), tol) + + +@pytest.mark.parametrize("theta,phi", list(zip(THETA, PHI))) +@pytest.mark.parametrize("n_wires", range(1, 7)) +def test_qubit_unitary(n_wires, theta, phi, tol): + """Test that Hadamard expectation value is correct""" + n_qubits = 10 + dev_def = qml.device("default.qubit", wires=n_qubits) + dev = qml.device(device_name, wires=n_qubits) + m = 2**n_wires + U = np.random.rand(m, m) + 1j * np.random.rand(m, m) + U, _ = np.linalg.qr(U) + init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) + init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) + wires = list(range((n_qubits - n_wires), (n_qubits - n_wires) + n_wires)) + perms = list(itertools.permutations(wires)) + if n_wires > 4: + perms = perms[0::30] + for perm in perms: + + def circuit(): + qml.StatePrep(init_state, wires=range(n_qubits)) + qml.RY(theta, wires=[0]) + qml.RY(phi, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.QubitUnitary(U, wires=perm) + return qml.state() + + circ = qml.QNode(circuit, dev) + circ_def = qml.QNode(circuit, dev_def) + assert np.allclose(circ(), circ_def(), tol)