From 6bb9987dff205a592070b81407919210957822f2 Mon Sep 17 00:00:00 2001 From: Hajime Senuma Date: Wed, 18 Sep 2024 07:24:08 +0900 Subject: [PATCH] Improve basic hash by METH_FASTCALL (#87) --- .github/workflows/benchmark-base-hash.yml | 91 +++++++++++++++++ CHANGELOG.md | 11 ++- README.md | 11 ++- benchmark/benchmark.py | 17 +++- benchmark/plot_graph.py | 1 + src/mmh3/mmh3module.c | 115 ++++++++++++++++++++-- tests/test_invalid_inputs.py | 3 + 7 files changed, 228 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/benchmark-base-hash.yml diff --git a/.github/workflows/benchmark-base-hash.yml b/.github/workflows/benchmark-base-hash.yml new file mode 100644 index 0000000..354ae12 --- /dev/null +++ b/.github/workflows/benchmark-base-hash.yml @@ -0,0 +1,91 @@ +--- +name: Benchmark Base Hash + +on: + workflow_dispatch: + +permissions: {} + +jobs: + benchmark: + permissions: + contents: read + packages: read + runs-on: ubuntu-22.04 + env: + BENCHMARK_MAX_SIZE: 65536 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: | + pip install --upgrade pip + pip install . + pip install ".[benchmark]" + - name: Tune the system for benchmarking + run: | + echo "Running \"lscpu -a -e\"..." + lscpu -a -e + + echo -n "Checking randomize_va_space: " + cat /proc/sys/kernel/randomize_va_space + echo "randomize_va_space should be 2, meaning ASLR is fully enabled." + + systemctl status irqbalance + echo "Stopping irqbalance..." + sudo systemctl stop irqbalance + + echo -n "Checking default_smp_affinity: " + cat /proc/irq/default_smp_affinity + echo 3 | sudo tee /proc/irq/default_smp_affinity > /dev/null + echo -n "Updated default_smp_affinity to: " + cat /proc/irq/default_smp_affinity + + echo -n "Checking perf_event_max_sample_rate: " + cat /proc/sys/kernel/perf_event_max_sample_rate + echo 1 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null + echo -n "Updated perf_event_max_sample_rate to: " + cat /proc/sys/kernel/perf_event_max_sample_rate + - name: Benchmark hash functions + run: | + mkdir var + taskset -c 2,3 python benchmark/benchmark.py \ + -o var/mmh3_base_hash_500.json \ + --test-hash mmh3_base_hash \ + --test-buffer-size-max "$BENCHMARK_MAX_SIZE" + taskset -c 2,3 python benchmark/benchmark.py \ + -o var/mmh3_32_500.json \ + --test-hash mmh3_32 \ + --test-buffer-size-max "$BENCHMARK_MAX_SIZE" + pip uninstall -y mmh3 + pip install mmh3==4.1.0 + taskset -c 2,3 python benchmark/benchmark.py \ + -o var/mmh3_base_hash_410.json \ + --test-hash mmh3_base_hash \ + --test-buffer-size-max "$BENCHMARK_MAX_SIZE" + - name: Reset the system from benchmarking + run: | + echo -n "Checking perf_event_max_sample_rate: " + cat /proc/sys/kernel/perf_event_max_sample_rate + echo 100000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate > /dev/null + echo -n "Updated perf_event_max_sample_rate to: " + cat /proc/sys/kernel/perf_event_max_sample_rate + + echo -n "Checking default_smp_affinity: " + cat /proc/irq/default_smp_affinity + echo f | sudo tee /proc/irq/default_smp_affinity > /dev/null + echo -n "Updated default_smp_affinity to: " + cat /proc/irq/default_smp_affinity + + echo "Restarting irqbalance..." + sudo systemctl restart irqbalance + systemctl status irqbalance + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: benchmark-results + path: var diff --git a/CHANGELOG.md b/CHANGELOG.md index aacd218..81cfd4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,13 +15,16 @@ since version 3.0.0. ### Added - Add support for Python 3.13. +- Improve the performance of the `hash()` function with + [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), + reducing the overhead of function calls. For data sizes between 1–2 KB + (e.g., 48x48 favicons), performance is 10%–20% faster. For smaller data + (~500 bytes, like 16x16 favicons), performance increases by approximately 30%. - Add `digest` functions that support the new buffer protocol ([PEP 688](https://peps.python.org/pep-0688/)) as input ([#75](https://github.com/hajimes/mmh3/pull/75)). - These functions are implemented with - [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), - offering improved performance over legacy functions - ([#84](https://github.com/hajimes/mmh3/pull/84)). + These functions are implemented with `METH_FASTCALL` too, offering improved + performance ([#84](https://github.com/hajimes/mmh3/pull/84)). - Slightly improve the performance of the `hash_bytes()` function. - Add Read the Docs documentation ([#54](https://github.com/hajimes/mmh3/issues/54)). diff --git a/README.md b/README.md index d67e50f..3ff2f1f 100644 --- a/README.md +++ b/README.md @@ -136,13 +136,16 @@ complete changelog. #### Added - Add support for Python 3.13. +- Improve the performance of the `hash()` function with + [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), + reducing the overhead of function calls. For data sizes between 1–2 KB + (e.g., 48x48 favicons), performance is 10%–20% faster. For smaller data + (~500 bytes, like 16x16 favicons), performance increases by approximately 30%. - Add `digest` functions that support the new buffer protocol ([PEP 688](https://peps.python.org/pep-0688/)) as input ([#75](https://github.com/hajimes/mmh3/pull/75)). - These functions are implemented with - [METH_FASTCALL](https://docs.python.org/3/c-api/structures.html#c.METH_FASTCALL), - offering improved performance over legacy functions - ([#84](https://github.com/hajimes/mmh3/pull/84)). + These functions are implemented with `METH_FASTCALL` too, offering improved + performance ([#84](https://github.com/hajimes/mmh3/pull/84)). - Slightly improve the performance of the `hash_bytes()` function. - Add Read the Docs documentation ([#54](https://github.com/hajimes/mmh3/issues/54)). diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index f8d9087..dd94048 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -223,9 +223,20 @@ def add_cmdline_args(cmd: list, args) -> None: cmd.extend(("--test-buffer-size-max", str(args.test_buffer_size_max))) +# "if hasattr" is used to check for the existence of the function in the +# module, to compare the performance of the current implementation with the +# old one (version 4.1.0), which does not implement the new functions. +# These conditions should be removed in the future. HASHES = { - "mmh3_32": mmh3.mmh3_32_digest, - "mmh3_128": mmh3.mmh3_x64_128_digest, + "mmh3_base_hash": mmh3.hash, + "mmh3_32": ( + mmh3.mmh3_32_digest if hasattr(mmh3, "mmh3_32_digest") else mmh3.hash_bytes + ), + "mmh3_128": ( + mmh3.mmh3_x64_128_digest + if hasattr(mmh3, "mmh3_x64_128_digest") + else mmh3.hash128 + ), "xxh_32": xxhash.xxh32_digest, "xxh_64": xxhash.xxh64_digest, "xxh3_64": xxhash.xxh3_64_digest, @@ -257,7 +268,7 @@ def add_cmdline_args(cmd: list, args) -> None: runner.argparser.add_argument( "--test-type", type=str, - help="Type of benchmarking to perform", + help="Type of benchmarking to perform (experimental)", choices=BENCHMARKING_TYPES.keys(), default="random", ) diff --git a/benchmark/plot_graph.py b/benchmark/plot_graph.py index bd02ef0..669735c 100644 --- a/benchmark/plot_graph.py +++ b/benchmark/plot_graph.py @@ -46,6 +46,7 @@ def ordered_intersection(list1: list[T], list2: list[T]) -> list[T]: DIGEST_SIZES = { + "mmh3_base_hash": mmh3.mmh3_32().digest_size, "mmh3_32": mmh3.mmh3_32().digest_size, "mmh3_128": mmh3.mmh3_x64_128().digest_size, "xxh_32": xxhash.xxh32().digest_size, diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index 5e0f20c..3efc6ea 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -45,6 +45,59 @@ typedef unsigned __int64 uint64_t; return -1; \ } +// obj: PyObject* +// target_str: const char * +// len: Py_ssize_t +#define MMH3_HASH_VALIDATE_AND_SET_BYTES(obj, target_str, len) \ + if (PyBytes_Check(obj)) { \ + target_str_len = PyBytes_Size(obj); \ + target_str = PyBytes_AS_STRING(obj); \ + } \ + else if (PyUnicode_Check(obj)) { \ + target_str_len = PyUnicode_GET_LENGTH(obj); \ + target_str = PyUnicode_AsUTF8AndSize(obj, &target_str_len); \ + } \ + else { \ + PyErr_Format(PyExc_TypeError, \ + "argument 1 must be read-only bytes-like object, " \ + "not '%s'", \ + Py_TYPE(obj)->tp_name); \ + return NULL; \ + } + +// obj: PyObject* +// seed: unsigned long +#define MMH3_HASH_VALIDATE_AND_SET_SEED(obj, seed) \ + if (!PyLong_Check(obj)) { \ + PyErr_Format(PyExc_TypeError, \ + "'%s' object cannot be interpreted as an integer", \ + Py_TYPE(obj)->tp_name); \ + return NULL; \ + } \ + seed = PyLong_AsUnsignedLong(obj); \ + if (seed == (unsigned long)-1 && PyErr_Occurred()) { \ + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ + return NULL; \ + } \ + } \ + if (seed > 0xFFFFFFFF) { \ + PyErr_SetString(PyExc_ValueError, "seed is out of range"); \ + return NULL; \ + } + +// nargs: Py_ssize_t +// name: const char * +// pos: int +#define MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, name, pos) \ + if (nargs >= pos) { \ + PyErr_Format(PyExc_TypeError, \ + "argument for function given by name " \ + "('%s') and position (%d)", \ + name, pos); \ + return NULL; \ + } + #define MMH3_VALIDATE_ARGS_AND_SET_SEED(nargs, args, seed) \ if (nargs < 1) { \ PyErr_SetString(PyExc_TypeError, \ @@ -102,33 +155,75 @@ PyDoc_STRVAR( ".. versionchanged:: 5.0.0\n" " The ``seed`` argument is now strictly checked for valid range.\n" " The type of the ``signed`` argument has been changed from\n" - " ``bool`` to ``Any``.\n"); + " ``bool`` to ``Any``. Performance improvements have been made.\n"); static PyObject * -mmh3_hash(PyObject *self, PyObject *args, PyObject *keywds) +mmh3_hash(PyObject *self, PyObject *const *args, Py_ssize_t nargs, + PyObject *kwnames) { const char *target_str; Py_ssize_t target_str_len; - long long seed = 0; + unsigned long seed = 0; int32_t result[1]; long long_result = 0; int is_signed = 1; - static char *kwlist[] = {"key", "seed", "signed", NULL}; - #ifndef _MSC_VER #if __LONG_WIDTH__ == 64 || defined(__APPLE__) static uint64_t mask[] = {0x0ffffffff, 0xffffffffffffffff}; #endif #endif - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|Lp", kwlist, - &target_str, &target_str_len, &seed, - &is_signed)) { + if ((nargs < 1) && kwnames == NULL) { + PyErr_SetString(PyExc_TypeError, + "function missing required argument 'key' (pos 1)"); return NULL; } - MMH3_VALIDATE_SEED_RETURN_NULL(seed); + if (nargs > 3) { + PyErr_Format(PyExc_TypeError, + "function takes at most 3 arguments (%d given)", + (int)nargs); + return NULL; + } + + if (nargs >= 1) { + MMH3_HASH_VALIDATE_AND_SET_BYTES(args[0], target_str, target_str_len); + } + + if (nargs >= 2) { + MMH3_HASH_VALIDATE_AND_SET_SEED(args[1], seed); + } + + if (nargs >= 3) { + is_signed = PyObject_IsTrue(args[2]); + } + + if (kwnames) { + for (Py_ssize_t i = 0; i < PyTuple_Size(kwnames); i++) { + const char *kwname = PyUnicode_AsUTF8(PyTuple_GetItem(kwnames, i)); + if (strcmp(kwname, "key") == 0) { + MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "key", 1); + MMH3_HASH_VALIDATE_AND_SET_BYTES(args[nargs + i], target_str, + target_str_len); + } + else if (strcmp(kwname, "seed") == 0) { + MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "seed", 2); + MMH3_HASH_VALIDATE_AND_SET_SEED(args[nargs + i], seed); + } + else if (strcmp(kwname, "signed") == 0) { + MMH3_HASH_VALIDATE_ARG_DUPLICATION(nargs, "signed", 3); + is_signed = PyObject_IsTrue(args[nargs + i]); + } + else { + PyErr_Format( + PyExc_TypeError, + "'%s' is an invalid keyword argument for this function", + kwname); + return NULL; + } + } + } murmurhash3_x86_32(target_str, target_str_len, (uint32_t)seed, result); @@ -986,7 +1081,7 @@ mmh3_mmh3_x86_128_utupledigest(PyObject *self, PyObject *const *args, // See // https://docs.python.org/3/extending/extending.html#keyword-parameters-for-extension-functions static PyMethodDef Mmh3Methods[] = { - {"hash", (PyCFunction)mmh3_hash, METH_VARARGS | METH_KEYWORDS, + {"hash", (PyCFunction)mmh3_hash, METH_FASTCALL | METH_KEYWORDS, mmh3_hash_doc}, {"hash_from_buffer", (PyCFunction)mmh3_hash_from_buffer, METH_VARARGS | METH_KEYWORDS, mmh3_hash_from_buffer_doc}, diff --git a/tests/test_invalid_inputs.py b/tests/test_invalid_inputs.py index e4d8bbf..4fd68c9 100644 --- a/tests/test_invalid_inputs.py +++ b/tests/test_invalid_inputs.py @@ -18,6 +18,9 @@ def test_hash_raises_typeerror() -> None: mmh3.hash(b"hello, world", seed="42") with pytest.raises(TypeError): mmh3.hash([1, 2, 3], 42) + # pylint: disable=redundant-keyword-arg + with pytest.raises(TypeError): + mmh3.hash(b"hello, world", key=b"42") @no_type_check