From 9fc12fde366d8f4c6541af47492ed9d8a0238cdb Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 17 Jul 2024 09:17:21 +0800 Subject: [PATCH 1/3] perf(python): specify tune-cpu --- .github/workflows/release-python.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 80a5e6fc3e63..1f527df34307 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -132,20 +132,22 @@ jobs: # IMPORTANT: All features enabled here should also be included in py-polars/polars/_cpu_check.py run: | if [[ "$IS_LTS_CPU" = true ]]; then + TUNE_CPU=x86-64-v2 FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt - elif [[ "$IS_MACOS" = true ]]; then - FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma,+pclmulqdq else + TUNE_CPU=skylake FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq fi echo "features=$FEATURES" >> $GITHUB_OUTPUT + echo "tune_cpu=$TUNE_CPU" >> $GITHUB_OUTPUT - name: Set RUSTFLAGS for x86-64 if: matrix.architecture == 'x86-64' env: FEATURES: ${{ steps.features.outputs.features }} + TUNE_CPU: ${{ steps.features.outputs.tune_cpu }} CFG: ${{ matrix.package == 'polars-lts-cpu' && '--cfg allocator="default"' || '' }} - run: echo "RUSTFLAGS=-C target-feature=${{ steps.features.outputs.features }} $CFG" >> $GITHUB_ENV + run: echo "RUSTFLAGS=-C target-feature=$FEATURES -Z tune-cpu=$TUNE_CPU $CFG" >> $GITHUB_ENV - name: Set variables in CPU check module run: | From d7f828c22ea1d89b365b8456dc364bfc5acf5f68 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 17 Jul 2024 09:28:13 +0800 Subject: [PATCH 2/3] perf(python): add feature +movbe --- .github/workflows/release-python.yml | 2 +- py-polars/polars/_cpu_check.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 1f527df34307..9bec31846c29 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -136,7 +136,7 @@ jobs: FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt else TUNE_CPU=skylake - FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq + FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq,+movbe fi echo "features=$FEATURES" >> $GITHUB_OUTPUT echo "tune_cpu=$TUNE_CPU" >> $GITHUB_OUTPUT diff --git a/py-polars/polars/_cpu_check.py b/py-polars/polars/_cpu_check.py index a857653e4cac..192ca298ecbd 100644 --- a/py-polars/polars/_cpu_check.py +++ b/py-polars/polars/_cpu_check.py @@ -221,6 +221,7 @@ def _read_cpu_flags() -> dict[str, bool]: "fma": bool(cpuid1.ecx & (1 << 12)), "sse4.1": bool(cpuid1.ecx & (1 << 19)), "sse4.2": bool(cpuid1.ecx & (1 << 20)), + "movbe": bool(cpuid1.ecx & (1 << 22)), "popcnt": bool(cpuid1.ecx & (1 << 23)), "pclmulqdq": bool(cpuid1.ecx & (1 << 1)), "avx": bool(cpuid1.ecx & (1 << 28)), From 624a6b2575aba0e266d3a20407db4e7cd21a29e0 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 17 Jul 2024 09:33:45 +0800 Subject: [PATCH 3/3] perf(python): add feature +cmpxchg16b --- .github/workflows/release-python.yml | 4 ++-- py-polars/polars/_cpu_check.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index 9bec31846c29..190708e859ad 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -133,10 +133,10 @@ jobs: run: | if [[ "$IS_LTS_CPU" = true ]]; then TUNE_CPU=x86-64-v2 - FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt + FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+cmpxchg16b else TUNE_CPU=skylake - FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq,+movbe + FEATURES=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+cmpxchg16b,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq,+movbe fi echo "features=$FEATURES" >> $GITHUB_OUTPUT echo "tune_cpu=$TUNE_CPU" >> $GITHUB_OUTPUT diff --git a/py-polars/polars/_cpu_check.py b/py-polars/polars/_cpu_check.py index 192ca298ecbd..c71029c303f7 100644 --- a/py-polars/polars/_cpu_check.py +++ b/py-polars/polars/_cpu_check.py @@ -219,6 +219,7 @@ def _read_cpu_flags() -> dict[str, bool]: "sse3": bool(cpuid1.ecx & (1 << 0)), "ssse3": bool(cpuid1.ecx & (1 << 9)), "fma": bool(cpuid1.ecx & (1 << 12)), + "cmpxchg16b": bool(cpuid1.ecx & (1 << 13)), "sse4.1": bool(cpuid1.ecx & (1 << 19)), "sse4.2": bool(cpuid1.ecx & (1 << 20)), "movbe": bool(cpuid1.ecx & (1 << 22)),