diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst
index a6ae8fdaa4991c..c897ec9e47b7ca 100644
--- a/Doc/library/pyexpat.rst
+++ b/Doc/library/pyexpat.rst
@@ -196,6 +196,37 @@ XMLParser Objects
:exc:`ExpatError` to be raised with the :attr:`code` attribute set to
``errors.codes[errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING]``.
+.. method:: xmlparser.SetReparseDeferralEnabled(enabled)
+
+ .. warning::
+
+ Calling ``SetReparseDeferralEnabled(False)`` has security implications,
+ as detailed below; please make sure to understand these consequences
+ prior to using the ``SetReparseDeferralEnabled`` method.
+
+ Expat 2.6.0 introduced a security mechanism called "reparse deferral"
+ where instead of causing denial of service through quadratic runtime
+ from reparsing large tokens, reparsing of unfinished tokens is now delayed
+ by default until a sufficient amount of input is reached.
+ Due to this delay, registered handlers may — depending of the sizing of
+ input chunks pushed to Expat — no longer be called right after pushing new
+ input to the parser. Where immediate feedback and taking over responsiblity
+ of protecting against denial of service from large tokens are both wanted,
+ calling ``SetReparseDeferralEnabled(False)`` disables reparse deferral
+ for the current Expat parser instance, temporarily or altogether.
+ Calling ``SetReparseDeferralEnabled(True)`` allows re-enabling reparse
+ deferral.
+
+ .. versionadded:: 3.13
+
+.. method:: xmlparser.GetReparseDeferralEnabled()
+
+ Returns whether reparse deferral is currently enabled for the given
+ Expat parser instance.
+
+ .. versionadded:: 3.13
+
+
:class:`xmlparser` objects have the following attributes:
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 75a7915c15240d..19c7af452e2b71 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -166,6 +166,11 @@ data but would still like to have incremental parsing capabilities, take a look
at :func:`iterparse`. It can be useful when you're reading a large XML document
and don't want to hold it wholly in memory.
+Where *immediate* feedback through events is wanted, calling method
+:meth:`XMLPullParser.flush` can help reduce delay;
+please make sure to study the related security notes.
+
+
Finding interesting elements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1387,6 +1392,19 @@ XMLParser Objects
Feeds data to the parser. *data* is encoded data.
+
+ .. method:: flush()
+
+ Triggers parsing of any previously fed unparsed data, which can be
+ used to ensure more immediate feedback, in particular with Expat >=2.6.0.
+ The implementation of :meth:`flush` temporarily disables reparse deferral
+ with Expat (if currently enabled) and triggers a reparse.
+ Disabling reparse deferral has security consequences; please see
+ :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details.
+
+ .. versionadded:: 3.13
+
+
:meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method
for each opening tag, its ``end(tag)`` method for each closing tag, and data
is processed by method ``data(data)``. For further supported callback
@@ -1448,6 +1466,17 @@ XMLPullParser Objects
Feed the given bytes data to the parser.
+ .. method:: flush()
+
+ Triggers parsing of any previously fed unparsed data, which can be
+ used to ensure more immediate feedback, in particular with Expat >=2.6.0.
+ The implementation of :meth:`flush` temporarily disables reparse deferral
+ with Expat (if currently enabled) and triggers a reparse.
+ Disabling reparse deferral has security consequences; please see
+ :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details.
+
+ .. versionadded:: 3.13
+
.. method:: close()
Signal the parser that the data stream is terminated. Unlike
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 3a277d7ce1585f..d08c63e7b2c2c5 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -174,6 +174,17 @@ Other Language Changes
(Contributed by Victor Stinner in :gh:`114570`.)
+* Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425)
+ by adding five new methods:
+
+ * :meth:`xml.etree.ElementTree.XMLParser.flush`
+ * :meth:`xml.etree.ElementTree.XMLPullParser.flush`
+ * :meth:`xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`
+ * :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`
+ * :meth:`!xml.sax.expatreader.ExpatParser.flush`
+
+ (Contributed by Sebastian Pipping in :gh:`115623`.)
+
New Modules
===========
diff --git a/Include/pyexpat.h b/Include/pyexpat.h
index 07020b5dc964cb..9824d099c3df7d 100644
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -48,8 +48,10 @@ struct PyExpat_CAPI
enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding);
int (*DefaultUnknownEncodingHandler)(
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
- /* might be none for expat < 2.1.0 */
+ /* might be NULL for expat < 2.1.0 */
int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt);
+ /* might be NULL for expat < 2.6.0 */
+ XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled);
/* always add new stuff to the end! */
};
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index d941a1a8f9ebc6..1d56ccd71cf962 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -755,5 +755,59 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])
+class ReparseDeferralTest(unittest.TestCase):
+ def test_getter_setter_round_trip(self):
+ parser = expat.ParserCreate()
+ enabled = (expat.version_info >= (2, 6, 0))
+
+ self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
+ parser.SetReparseDeferralEnabled(False)
+ self.assertIs(parser.GetReparseDeferralEnabled(), False)
+ parser.SetReparseDeferralEnabled(True)
+ self.assertIs(parser.GetReparseDeferralEnabled(), enabled)
+
+ def test_reparse_deferral_enabled(self):
+ if expat.version_info < (2, 6, 0):
+ self.skipTest(f'Expat {expat.version_info} does not '
+ 'support reparse deferral')
+
+ started = []
+
+ def start_element(name, _):
+ started.append(name)
+
+ parser = expat.ParserCreate()
+ parser.StartElementHandler = start_element
+ self.assertTrue(parser.GetReparseDeferralEnabled())
+
+ for chunk in (b''):
+ parser.Parse(chunk, False)
+
+ # The key test: Have handlers already fired? Expecting: no.
+ self.assertEqual(started, [])
+
+ parser.Parse(b'', True)
+
+ self.assertEqual(started, ['doc'])
+
+ def test_reparse_deferral_disabled(self):
+ started = []
+
+ def start_element(name, _):
+ started.append(name)
+
+ parser = expat.ParserCreate()
+ parser.StartElementHandler = start_element
+ if expat.version_info >= (2, 6, 0):
+ parser.SetReparseDeferralEnabled(False)
+ self.assertFalse(parser.GetReparseDeferralEnabled())
+
+ for chunk in (b''):
+ parser.Parse(chunk, False)
+
+ # The key test: Have handlers already fired? Expecting: yes.
+ self.assertEqual(started, ['doc'])
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
index eda4e6a46df437..97e96668f85c8a 100644
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -19,6 +19,7 @@
from io import BytesIO, StringIO
import codecs
import os.path
+import pyexpat
import shutil
import sys
from urllib.error import URLError
@@ -1214,6 +1215,56 @@ def test_expat_incremental_reset(self):
self.assertEqual(result.getvalue(), start + b"text")
+ def test_flush_reparse_deferral_enabled(self):
+ if pyexpat.version_info < (2, 6, 0):
+ self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral')
+
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertTrue(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"")
+
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"")
+
+ def test_flush_reparse_deferral_disabled(self):
+ result = BytesIO()
+ xmlgen = XMLGenerator(result)
+ parser = create_parser()
+ parser.setContentHandler(xmlgen)
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ if pyexpat.version_info >= (2, 6, 0):
+ parser._parser.SetReparseDeferralEnabled(False)
+
+ self.assertEqual(result.getvalue(), start) # i.e. no elements started
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assertFalse(parser._parser.GetReparseDeferralEnabled())
+ self.assertEqual(result.getvalue(), start + b"")
+
+ parser.feed("")
+ parser.close()
+
+ self.assertEqual(result.getvalue(), start + b"")
+
# ===== Locator support
def test_expat_locator_noinfo(self):
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index c535d631bb646f..14df482ba6c207 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -121,10 +121,6 @@
"""
-fails_with_expat_2_6_0 = (unittest.expectedFailure
- if pyexpat.version_info >= (2, 6, 0) else
- lambda test: test)
-
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
@@ -1462,12 +1458,14 @@ def test_attlist_default(self):
class XMLPullParserTest(unittest.TestCase):
- def _feed(self, parser, data, chunk_size=None):
+ def _feed(self, parser, data, chunk_size=None, flush=False):
if chunk_size is None:
parser.feed(data)
else:
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
+ if flush:
+ parser.flush()
def assert_events(self, parser, expected, max_events=None):
self.assertEqual(
@@ -1485,34 +1483,32 @@ def assert_event_tags(self, parser, expected, max_events=None):
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
- def test_simple_xml(self, chunk_size=None):
+ def test_simple_xml(self, chunk_size=None, flush=False):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
- self._feed(parser, "\n", chunk_size)
+ self._feed(parser, "\n", chunk_size, flush)
self.assert_event_tags(parser, [])
self._feed(parser,
"\n text\n", chunk_size)
+ self._feed(parser, ">\n", chunk_size, flush)
self.assert_event_tags(parser, [('end', 'element')])
- self._feed(parser, "texttail\n", chunk_size)
- self._feed(parser, "\n", chunk_size)
+ self._feed(parser, "texttail\n", chunk_size, flush)
+ self._feed(parser, "\n", chunk_size, flush)
self.assert_event_tags(parser, [
('end', 'element'),
('end', 'empty-element'),
])
- self._feed(parser, "\n", chunk_size)
+ self._feed(parser, "\n", chunk_size, flush)
self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())
- @fails_with_expat_2_6_0
def test_simple_xml_chunk_1(self):
- self.test_simple_xml(chunk_size=1)
+ self.test_simple_xml(chunk_size=1, flush=True)
- @fails_with_expat_2_6_0
def test_simple_xml_chunk_5(self):
- self.test_simple_xml(chunk_size=5)
+ self.test_simple_xml(chunk_size=5, flush=True)
def test_simple_xml_chunk_22(self):
self.test_simple_xml(chunk_size=22)
@@ -1711,6 +1707,57 @@ def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
+ def test_flush_reparse_deferral_enabled(self):
+ if pyexpat.version_info < (2, 6, 0):
+ self.skipTest(f'Expat {pyexpat.version_info} does not '
+ 'support reparse deferral')
+
+ parser = ET.XMLPullParser(events=('start', 'end'))
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ self.assert_event_tags(parser, []) # i.e. no elements started
+ if ET is pyET:
+ self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assert_event_tags(parser, [('start', 'doc')])
+ if ET is pyET:
+ self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.feed("")
+ parser.close()
+
+ self.assert_event_tags(parser, [('end', 'doc')])
+
+ def test_flush_reparse_deferral_disabled(self):
+ parser = ET.XMLPullParser(events=('start', 'end'))
+
+ for chunk in (""):
+ parser.feed(chunk)
+
+ if pyexpat.version_info >= (2, 6, 0):
+ if not ET is pyET:
+ self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled '
+ 'methods not available in C')
+ parser._parser._parser.SetReparseDeferralEnabled(False)
+
+ self.assert_event_tags(parser, []) # i.e. no elements started
+ if ET is pyET:
+ self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.flush()
+
+ self.assert_event_tags(parser, [('start', 'doc')])
+ if ET is pyET:
+ self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
+
+ parser.feed("")
+ parser.close()
+
+ self.assert_event_tags(parser, [('end', 'doc')])
#
# xinclude tests (samples from appendix C of the xinclude specification)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index a37fead41b750e..9e15d34d22aa6c 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1320,6 +1320,11 @@ def read_events(self):
else:
yield event
+ def flush(self):
+ if self._parser is None:
+ raise ValueError("flush() called after end of stream")
+ self._parser.flush()
+
def XML(text, parser=None):
"""Parse XML document from string constant.
@@ -1726,6 +1731,15 @@ def close(self):
del self.parser, self._parser
del self.target, self._target
+ def flush(self):
+ was_enabled = self.parser.GetReparseDeferralEnabled()
+ try:
+ self.parser.SetReparseDeferralEnabled(False)
+ self.parser.Parse(b"", False)
+ except self._error as v:
+ self._raiseerror(v)
+ finally:
+ self.parser.SetReparseDeferralEnabled(was_enabled)
# --------------------------------------------------------------------
# C14N 2.0
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
index b9ad52692db8dd..ba3c1e98517429 100644
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -214,6 +214,20 @@ def feed(self, data, isFinal=False):
# FIXME: when to invoke error()?
self._err_handler.fatalError(exc)
+ def flush(self):
+ if self._parser is None:
+ return
+
+ was_enabled = self._parser.GetReparseDeferralEnabled()
+ try:
+ self._parser.SetReparseDeferralEnabled(False)
+ self._parser.Parse(b"", False)
+ except expat.error as e:
+ exc = SAXParseException(expat.ErrorString(e.code), e, self)
+ self._err_handler.fatalError(exc)
+ finally:
+ self._parser.SetReparseDeferralEnabled(was_enabled)
+
def _close_source(self):
source = self._source
try:
diff --git a/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst
new file mode 100644
index 00000000000000..97b23936928d91
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst
@@ -0,0 +1,8 @@
+Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding
+five new methods:
+
+* ``xml.etree.ElementTree.XMLParser.flush``
+* ``xml.etree.ElementTree.XMLPullParser.flush``
+* ``xml.parsers.expat.xmlparser.GetReparseDeferralEnabled``
+* ``xml.parsers.expat.xmlparser.SetReparseDeferralEnabled``
+* ``xml.sax.expatreader.ExpatParser.flush``
diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json
index e28eaea81d6aae..27e6742292ac6d 100644
--- a/Misc/sbom.spdx.json
+++ b/Misc/sbom.spdx.json
@@ -132,11 +132,11 @@
"checksums": [
{
"algorithm": "SHA1",
- "checksumValue": "baa44fe4581895d42e8d5e83d8ce6a69b1c34dbe"
+ "checksumValue": "f50c899172acd93fc539007bfb43315b83d407e4"
},
{
"algorithm": "SHA256",
- "checksumValue": "33a7b9ac8bf4571e23272cdf644c6f9808bd44c66b149e3c41ab3870d1888609"
+ "checksumValue": "d571b8258cfaa067a20adef553e5fcedd6671ca4a8841483496de031bd904567"
}
],
"fileName": "Modules/expat/pyexpatns.h"
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 54451081211654..edd2f88a4881c3 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3894,6 +3894,40 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self)
}
}
+/*[clinic input]
+_elementtree.XMLParser.flush
+
+[clinic start generated code]*/
+
+static PyObject *
+_elementtree_XMLParser_flush_impl(XMLParserObject *self)
+/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/
+{
+ if (!_check_xmlparser(self)) {
+ return NULL;
+ }
+
+ elementtreestate *st = self->state;
+
+ if (EXPAT(st, SetReparseDeferralEnabled) == NULL) {
+ Py_RETURN_NONE;
+ }
+
+ // NOTE: The Expat parser in the C implementation of ElementTree is not
+ // exposed to the outside; as a result we known that reparse deferral
+ // is currently enabled, or we would not even have access to function
+ // XML_SetReparseDeferralEnabled in the first place (which we checked
+ // for, a few lines up).
+
+ EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_FALSE);
+
+ PyObject *res = expat_parse(st, self, "", 0, XML_FALSE);
+
+ EXPAT(st, SetReparseDeferralEnabled)(self->parser, XML_TRUE);
+
+ return res;
+}
+
/*[clinic input]
_elementtree.XMLParser.feed
@@ -4288,6 +4322,7 @@ static PyType_Spec treebuilder_spec = {
static PyMethodDef xmlparser_methods[] = {
_ELEMENTTREE_XMLPARSER_FEED_METHODDEF
_ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
+ _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF
_ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
_ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
{NULL, NULL}
diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h
index 9622591a1aa855..10b2dd1c15f7fd 100644
--- a/Modules/clinic/_elementtree.c.h
+++ b/Modules/clinic/_elementtree.c.h
@@ -1169,6 +1169,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored)
return _elementtree_XMLParser_close_impl(self);
}
+PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__,
+"flush($self, /)\n"
+"--\n"
+"\n");
+
+#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \
+ {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__},
+
+static PyObject *
+_elementtree_XMLParser_flush_impl(XMLParserObject *self);
+
+static PyObject *
+_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored))
+{
+ return _elementtree_XMLParser_flush_impl(self);
+}
+
PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__,
"feed($self, data, /)\n"
"--\n"
@@ -1219,4 +1236,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args,
exit:
return return_value;
}
-/*[clinic end generated code: output=218ec9e6a889f796 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=aed9f53eeb0404e0 input=a9049054013a1b77]*/
diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h
index a5b93e68598204..343cb91b975038 100644
--- a/Modules/clinic/pyexpat.c.h
+++ b/Modules/clinic/pyexpat.c.h
@@ -8,6 +8,53 @@ preserve
#endif
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__,
+"SetReparseDeferralEnabled($self, enabled, /)\n"
+"--\n"
+"\n"
+"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.");
+
+#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \
+ {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__},
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
+ int enabled);
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ int enabled;
+
+ enabled = PyObject_IsTrue(arg);
+ if (enabled < 0) {
+ goto exit;
+ }
+ return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__,
+"GetReparseDeferralEnabled($self, /)\n"
+"--\n"
+"\n"
+"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.");
+
+#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \
+ {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__},
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self);
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored))
+{
+ return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self);
+}
+
PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__,
"Parse($self, data, isfinal=False, /)\n"
"--\n"
@@ -498,4 +545,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg)
#ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */
-/*[clinic end generated code: output=48c4296e43777df4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=892e48e41f9b6e4b input=a9049054013a1b77]*/
diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h
index d45d9b6c457159..8ee03ef0792815 100644
--- a/Modules/expat/pyexpatns.h
+++ b/Modules/expat/pyexpatns.h
@@ -108,6 +108,7 @@
#define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler
#define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing
#define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler
+#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled
#define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet
#define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler
#define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 62cd262a7885e9..f04f96bc2f7601 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -7,6 +7,7 @@
#include "pycore_pyhash.h" // _Py_HashSecret
#include "pycore_traceback.h" // _PyTraceback_Add()
+#include
#include // offsetof()
#include "expat.h"
#include "pyexpat.h"
@@ -81,6 +82,12 @@ typedef struct {
/* NULL if not enabled */
int buffer_size; /* Size of buffer, in XML_Char units */
int buffer_used; /* Buffer units in use */
+ bool reparse_deferral_enabled; /* Whether to defer reparsing of
+ unfinished XML tokens; a de-facto cache of
+ what Expat has the authority on, for lack
+ of a getter API function
+ "XML_GetReparseDeferralEnabled" in Expat
+ 2.6.0 */
PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers;
} xmlparseobject;
@@ -703,6 +710,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
#define MAX_CHUNK_SIZE (1 << 20)
+/*[clinic input]
+pyexpat.xmlparser.SetReparseDeferralEnabled
+
+ enabled: bool
+ /
+
+Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0.
+[clinic start generated code]*/
+
+static PyObject *
+pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self,
+ int enabled)
+/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/
+{
+#if XML_COMBINED_VERSION >= 20600
+ XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE);
+ self->reparse_deferral_enabled = (bool)enabled;
+#endif
+ Py_RETURN_NONE;
+}
+
+/*[clinic input]
+pyexpat.xmlparser.GetReparseDeferralEnabled
+
+Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0.
+[clinic start generated code]*/
+
+static PyObject *
+pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self)
+/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/
+{
+ return PyBool_FromLong(self->reparse_deferral_enabled);
+}
+
/*[clinic input]
pyexpat.xmlparser.Parse
@@ -1063,6 +1104,8 @@ static struct PyMethodDef xmlparse_methods[] = {
#if XML_COMBINED_VERSION >= 19505
PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
#endif
+ PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF
+ PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -1158,6 +1201,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
self->ns_prefixes = 0;
self->handlers = NULL;
self->intern = Py_XNewRef(intern);
+#if XML_COMBINED_VERSION >= 20600
+ self->reparse_deferral_enabled = true;
+#else
+ self->reparse_deferral_enabled = false;
+#endif
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
@@ -2019,6 +2067,11 @@ pyexpat_exec(PyObject *mod)
#else
capi->SetHashSalt = NULL;
#endif
+#if XML_COMBINED_VERSION >= 20600
+ capi->SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled;
+#else
+ capi->SetReparseDeferralEnabled = NULL;
+#endif
/* export using capsule */
PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,