From d17848da98b630b39c7afb304054e81df0be9806 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 17 Nov 2023 11:54:46 +0100 Subject: [PATCH] gh-103477: Write gzip trailer with zlib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RHEL, SLES and Ubuntu for IBM zSystems (aka s390x) ship with a zlib optimization [1] that significantly improves deflate performance by using a specialized CPU instruction. This instruction not only compresses the data, but also computes a checksum. At the moment Pyhton's gzip support performs compression and checksum calculation separately, which creates unnecessary overhead. The reason is that Python needs to write specific values into gzip header, so it uses a raw stream instead of a gzip stream, and zlib does not compute a checksum for raw streams. The challenge with using gzip streams instead of zlib streams is dealing with zlib-generated gzip header, which we need to rather generate manually. Implement the method proposed by @rhpvorderman: use Z_BLOCK on the first deflate() call in order to stop before the first deflate block is emitted. The data that is emitted up until this point is zlib-generated gzip header, which should be discarded. Expose this new functionality by adding a boolean gzip_trailer argument to zlib.compress() and zlib.compressobj(). Make use of it in gzip.compress(), GzipFile and TarFile. The performance improvement varies depending on data being compressed, but it's in the ballpark of 40%. An alternative approach is to use the deflateSetHeader() function, introduced in zlib v1.2.2.1 (2011). This also works, but the change was deemed too intrusive [2]. 📜🤖 Added by blurb_it. [1] https://github.com/madler/zlib/pull/410 [2] https://github.com/python/cpython/pull/103478 --- Lib/gzip.py | 19 ++--- Lib/tarfile.py | 9 +- ...-11-17-12-26-47.gh-issue-103477._7cTsK.rst | 1 + Modules/clinic/zlibmodule.c.h | 71 +++++++++++----- Modules/zlibmodule.c | 84 +++++++++++++++++-- 5 files changed, 135 insertions(+), 49 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-17-12-26-47.gh-issue-103477._7cTsK.rst diff --git a/Lib/gzip.py b/Lib/gzip.py index 177f9080dc5af8b..39a46ed0d573c69 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -221,7 +221,8 @@ def __init__(self, filename=None, mode=None, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, - 0) + 0, + gzip_trailer=True) self._write_mtime = mtime self._buffer_size = _WRITE_BUFFER_SIZE self._buffer = io.BufferedWriter(_WriteBufferStream(self), @@ -245,8 +246,6 @@ def __repr__(self): def _init_write(self, filename): self.name = filename - self.crc = zlib.crc32(b"") - self.size = 0 self.writebuf = [] self.bufsize = 0 self.offset = 0 # Current file offset for seek(), tell(), etc @@ -310,8 +309,6 @@ def _write_raw(self, data): if length > 0: self.fileobj.write(self.compress.compress(data)) - self.size += length - self.crc = zlib.crc32(data, self.crc) self.offset += length return length @@ -355,9 +352,6 @@ def close(self): if self.mode == WRITE: self._buffer.flush() fileobj.write(self.compress.flush()) - write32u(fileobj, self.crc) - # self.size may exceed 2 GiB, or even 4 GiB - write32u(fileobj, self.size & 0xffffffff) elif self.mode == READ: self._buffer.close() finally: @@ -611,10 +605,11 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): # This is faster and with less overhead. return zlib.compress(data, level=compresslevel, wbits=31) header = _create_simple_gzip_header(compresslevel, mtime) - trailer = struct.pack("= -15 && *wbits <= -9) { + /* Ask zlib to emit gzip header and gzip trailer. We need ony the + trailer, but it's not possible to request that, so we will have to + skip the header manually. */ + *wbits = 16 - *wbits; + } else { + /* Ignore gzip_trailer. */ + *gzip_trailer = 0; + } +} + +static int +skip_gzip_header(z_stream *zst) +{ + /* Emit gzip header into a throw-away buffer by compressing an empty + buffer with Z_BLOCK. The header should fully fit into the buffer, so + one deflate() call should be enough, but use a loop anyway just in + case. */ + uInt saved_avail_in = zst->avail_in, saved_avail_out = zst->avail_out; + Bytef *saved_next_in = zst->next_in, *saved_next_out = zst->next_out; + int flush = Z_BLOCK; + Bytef tmp[32]; + int err; + + while (true) { + zst->next_in = NULL; + zst->avail_in = 0; + zst->next_out = tmp; + zst->avail_out = sizeof(tmp); + err = deflate(zst, flush); + if (err != Z_OK) { + return err; + } + if (zst->avail_out != 0) { + break; + } + flush = Z_NO_FLUSH; + } + zst->next_in = saved_next_in; + zst->avail_in = saved_avail_in; + zst->next_out = saved_next_out; + zst->avail_out = saved_avail_out; + + return Z_OK; +} + static void arrange_input_buffer(z_stream *zst, Py_ssize_t *remains) { @@ -319,13 +368,16 @@ zlib.compress Compression level, in 0-9 or -1. wbits: int(c_default="MAX_WBITS") = MAX_WBITS The window buffer size and container format. + gzip_trailer: bool = False + Whether to append a gzip trailer to a raw stream. Returns a bytes object containing compressed data. [clinic start generated code]*/ static PyObject * -zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) -/*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/ +zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits, + int gzip_trailer) +/*[clinic end generated code: output=feb20f80fe7e4848 input=c17ae8b22942f857]*/ { PyObject *return_value; int flush; @@ -341,6 +393,8 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) goto error; } + arrange_gzip_trailer(&gzip_trailer, &wbits); + zst.opaque = NULL; zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; @@ -364,6 +418,12 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) goto error; } + if (gzip_trailer && (err = skip_gzip_header(&zst)) != Z_OK) { + deflateEnd(&zst); + zlib_error(state, zst, err, "while skipping gzip header"); + goto error; + } + do { arrange_input_buffer(&zst, &ibuflen); flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; @@ -555,14 +615,17 @@ zlib.compressobj zdict: Py_buffer = None The predefined compression dictionary - a sequence of bytes containing subsequences that are likely to occur in the input data. + gzip_trailer: bool = False + Whether to append a gzip trailer to a raw stream. Return a compressor object. [clinic start generated code]*/ static PyObject * zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, - int memLevel, int strategy, Py_buffer *zdict) -/*[clinic end generated code: output=8b5bed9c8fc3814d input=2fa3d026f90ab8d5]*/ + int memLevel, int strategy, Py_buffer *zdict, + int gzip_trailer) +/*[clinic end generated code: output=fb4c37ba07d34e28 input=8de44294b8fe50f4]*/ { zlibstate *state = get_zlib_state(module); if (zdict->buf != NULL && (size_t)zdict->len > UINT_MAX) { @@ -571,6 +634,8 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, return NULL; } + arrange_gzip_trailer(&gzip_trailer, &wbits); + compobject *self = newcompobject(state->Comptype); if (self == NULL) goto error; @@ -583,14 +648,12 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, switch (err) { case Z_OK: self->is_initialised = 1; - if (zdict->buf == NULL) { - goto success; - } else { + if (zdict->buf != NULL) { err = deflateSetDictionary(&self->zst, zdict->buf, (unsigned int)zdict->len); switch (err) { case Z_OK: - goto success; + break; case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Invalid dictionary"); goto error; @@ -599,6 +662,11 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, goto error; } } + if (gzip_trailer && (err = skip_gzip_header(&self->zst)) != Z_OK) { + zlib_error(state, self->zst, err, "while skipping gzip header"); + goto error; + } + goto success; case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object");