From c430d27c64bf32a5eb7b5465f9040396dc1ece61 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Sun, 29 Dec 2024 11:13:45 +0800 Subject: [PATCH] Check incoming values in yajl2_c backend When events are sent to the yajl2_c backend, we assumed the value sent by the user was a tuple of the correct length. This is usually correct, but it's wrong to rely on it blindly. This commit adds the necessary checks on input values sent to the send() method of the *_basecoro generators of the yajl2_c backend. The python-based backends already perform these checks by using the target list assignment syntax (e.g., `a, b = c`) to receive any values sent to them. The checks in the yajl2_c backend are performed using a new ijson_unpack utility function, which mimics the behaviour of target list assignments, performing the same cardinality checks, and raising the same exceptions when things go wrong. Note that the Python C API doesn't offer an equivalent utility function, and the closest alternative (ensuring a PyTuple, then invoking PyArg_UnpackTuple) doesn't yield the same error messages and error types (TypeError v/s ValueError). With this behaviour in place, we can now reliably test across all backends that incorrect inputs raise certain exceptions. Signed-off-by: Rodrigo Tobar --- src/ijson/backends/ext/_yajl2/common.h | 3 ++ .../backends/ext/_yajl2/items_basecoro.c | 15 ++++++--- .../backends/ext/_yajl2/kvitems_basecoro.c | 15 ++++++--- src/ijson/backends/ext/_yajl2/module.c | 33 +++++++++++++++++++ .../backends/ext/_yajl2/parse_basecoro.c | 12 +++++-- tests/test_items.py | 15 +++++++++ tests/test_kvitems.py | 15 +++++++++ tests/test_parse.py | 16 +++++++++ 8 files changed, 113 insertions(+), 11 deletions(-) diff --git a/src/ijson/backends/ext/_yajl2/common.h b/src/ijson/backends/ext/_yajl2/common.h index dcd75d6..2f1977f 100644 --- a/src/ijson/backends/ext/_yajl2/common.h +++ b/src/ijson/backends/ext/_yajl2/common.h @@ -54,4 +54,7 @@ PyObject* ijson_return_self(PyObject *self); /* Common function used by empty methods in coroutines/generators */ PyObject* ijson_return_none(PyObject *self); +/* Unpacks an iterable into multiple values, returns 0 on success, -1 on error */ +int ijson_unpack(PyObject *o, Py_ssize_t expected, ...); + #endif /* COMMON_H */ \ No newline at end of file diff --git a/src/ijson/backends/ext/_yajl2/items_basecoro.c b/src/ijson/backends/ext/_yajl2/items_basecoro.c index 422d792..a557d96 100644 --- a/src/ijson/backends/ext/_yajl2/items_basecoro.c +++ b/src/ijson/backends/ext/_yajl2/items_basecoro.c @@ -76,10 +76,17 @@ PyObject* items_basecoro_send_impl(PyObject *self, PyObject *path, PyObject *eve static PyObject* items_basecoro_send(PyObject *self, PyObject *tuple) { - PyObject *path = PyTuple_GetItem(tuple, 0); - PyObject *event = PyTuple_GetItem(tuple, 1); - PyObject *value = PyTuple_GetItem(tuple, 2); - return items_basecoro_send_impl(self, path, event, value); + PyObject *path = NULL; + PyObject *event = NULL; + PyObject *value = NULL; + PyObject *result = NULL; + if(!ijson_unpack(tuple, 3, &path, &event, &value)) { + result = items_basecoro_send_impl(self, path, event, value); + } + Py_XDECREF(value); + Py_XDECREF(event); + Py_XDECREF(path); + return result; } static PyMethodDef items_basecoro_methods[] = { diff --git a/src/ijson/backends/ext/_yajl2/kvitems_basecoro.c b/src/ijson/backends/ext/_yajl2/kvitems_basecoro.c index 504d98b..e1cb96f 100644 --- a/src/ijson/backends/ext/_yajl2/kvitems_basecoro.c +++ b/src/ijson/backends/ext/_yajl2/kvitems_basecoro.c @@ -99,10 +99,17 @@ PyObject* kvitems_basecoro_send_impl(PyObject *self, PyObject *path, PyObject *e static PyObject* kvitems_basecoro_send(PyObject *self, PyObject *tuple) { - PyObject *path = PyTuple_GetItem(tuple, 0); - PyObject *event = PyTuple_GetItem(tuple, 1); - PyObject *value = PyTuple_GetItem(tuple, 2); - return kvitems_basecoro_send_impl(self, path, event, value); + PyObject *path = NULL; + PyObject *event = NULL; + PyObject *value = NULL; + PyObject *result = NULL; + if(!ijson_unpack(tuple, 3, &path, &event, &value)) { + result = kvitems_basecoro_send_impl(self, path, event, value); + } + Py_XDECREF(value); + Py_XDECREF(event); + Py_XDECREF(path); + return result; } static PyMethodDef kvitems_basecoro_methods[] = { diff --git a/src/ijson/backends/ext/_yajl2/module.c b/src/ijson/backends/ext/_yajl2/module.c index 6e71ccc..02f3216 100644 --- a/src/ijson/backends/ext/_yajl2/module.c +++ b/src/ijson/backends/ext/_yajl2/module.c @@ -8,6 +8,7 @@ * Copyright by UWA (in the framework of the ICRAR) */ #include +#include #include "common.h" #include "async_reading_generator.h" @@ -54,6 +55,38 @@ PyObject* ijson_return_none(PyObject *self) Py_RETURN_NONE; } +int ijson_unpack(PyObject *o, Py_ssize_t expected, ...) +{ + va_list args; + va_start(args, expected); + PyObject *iter = PyObject_GetIter(o); + if (!iter) { + PyErr_Format(PyExc_TypeError, "cannot unpack non-iterable %s object", Py_TYPE(o)->tp_name); + return -1; + } + Py_ssize_t count = 0; + for (PyObject *o; (o = PyIter_Next(iter)); count++) { + if (count >= expected) { + continue; + } + PyObject **target = va_arg(args, PyObject **); + *target = o; + } + Py_DECREF(iter); + if (PyErr_Occurred()) { + return -1; + } + if (count > expected) { + PyErr_Format(PyExc_ValueError, "too many values to unpack (excepted %d, got %zd)", expected, count); + return -1; + } + else if (count < expected) { + PyErr_Format(PyExc_ValueError, "not enough values to unpack (excepted %d, got %zd)", expected, count); + return -1; + } + return 0; +} + /* Module initialization */ /* Support for Python 2/3 */ diff --git a/src/ijson/backends/ext/_yajl2/parse_basecoro.c b/src/ijson/backends/ext/_yajl2/parse_basecoro.c index bd3bcf7..ba573cd 100644 --- a/src/ijson/backends/ext/_yajl2/parse_basecoro.c +++ b/src/ijson/backends/ext/_yajl2/parse_basecoro.c @@ -127,9 +127,15 @@ PyObject* parse_basecoro_send_impl(PyObject *self, PyObject *event, PyObject *va static PyObject* parse_basecoro_send(PyObject *self, PyObject *tuple) { - PyObject *event = PyTuple_GetItem(tuple, 0); - PyObject *value = PyTuple_GetItem(tuple, 1); - return parse_basecoro_send_impl(self, event, value); + PyObject *event = NULL; + PyObject *value = NULL; + PyObject *result = NULL; + if(!ijson_unpack(tuple, 2, &event, &value)) { + result = parse_basecoro_send_impl(self, event, value); + } + Py_XDECREF(value); + Py_XDECREF(event); + return result; } static PyMethodDef parse_basecoro_methods[] = { diff --git a/tests/test_items.py b/tests/test_items.py index 76e43a5..b4de142 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -67,3 +67,18 @@ def test_multiple_values(adaptor): adaptor.items(multiple_json, "", multiple_values=False) result = adaptor.items(multiple_json, "", multiple_values=True) assert [JSON_OBJECT, JSON_OBJECT, JSON_OBJECT] == result + + +def test_coro_needs_input_with_three_elements(backend): + int_element_parse_events = list(backend.parse(b'0')) + # all good + assert [0] == list(backend.items(int_element_parse_events, '')) + # one more element in event + with pytest.raises(ValueError, match="too many values"): + next(backend.items((event + ('extra dummy',) for event in int_element_parse_events), '')) + # one less + with pytest.raises(ValueError, match="not enough values"): + next(backend.items((event[:-1] for event in int_element_parse_events), '')) + # not an iterable + with pytest.raises(TypeError, match="cannot unpack"): + next(backend.items([None], '')) diff --git a/tests/test_kvitems.py b/tests/test_kvitems.py index 564b33b..4e89e70 100644 --- a/tests/test_kvitems.py +++ b/tests/test_kvitems.py @@ -43,3 +43,18 @@ def test_kvitems_array(adaptor): ]) def test_kvitems_empty_member(adaptor, test_case): assert test_case.kvitems == adaptor.kvitems(test_case.json, test_case.prefix) + + +def test_coro_needs_input_with_three_elements(backend): + int_element_parse_events = list(backend.parse(b'{"a": 0}')) + # all good + assert [('a', 0)] == list(backend.kvitems(int_element_parse_events, '')) + # one more element in event + with pytest.raises(ValueError, match="too many values"): + next(backend.kvitems((event + ('extra dummy',) for event in int_element_parse_events), '')) + # one less + with pytest.raises(ValueError, match="not enough values"): + next(backend.kvitems((event[:-1] for event in int_element_parse_events), '')) + # not an iterable + with pytest.raises(TypeError, match="cannot unpack"): + next(backend.kvitems([None], '')) diff --git a/tests/test_parse.py b/tests/test_parse.py index 373a6cf..61cae3b 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,5 +1,7 @@ """Tests for the ijson.parse method""" +import pytest + from .test_base import ARRAY_JSON, ARRAY_JSON_PARSE_EVENTS, JSON, JSON_PARSE_EVENTS def test_parse(adaptor): @@ -7,3 +9,17 @@ def test_parse(adaptor): def test_parse_array(adaptor): assert ARRAY_JSON_PARSE_EVENTS == adaptor.parse(ARRAY_JSON) + +def test_coro_needs_input_with_two_elements(backend): + int_element_basic_parse_events = list(backend.basic_parse(b'0', use_float=True)) + # all good + assert [('', 'number', 0)] == list(backend.parse(int_element_basic_parse_events)) + # one more element in event + with pytest.raises(ValueError, match="too many values"): + next(backend.parse(event + ('extra dummy',) for event in int_element_basic_parse_events)) + # one less + with pytest.raises(ValueError, match="not enough values"): + next(backend.parse(event[:-1] for event in int_element_basic_parse_events)) + # not an iterable + with pytest.raises(TypeError, match="cannot unpack"): + next(backend.parse([None]))