From 0fb4f465b2b0862d972ebbd8483500eaaf357e56 Mon Sep 17 00:00:00 2001 From: Lukasz Anforowicz Date: Wed, 15 Nov 2023 23:53:50 +0000 Subject: [PATCH] Cap buffer sizes via `ZlibStream::set_max_total_output`. Before this commit, `ZlibStream::new` would always allocate and zero out 64kB of data via `out_buffer: vec![0; 2 * CHUNCK_BUFFER_SIZE]`. After this commit, `out_buffer` is capped by the actual size of the decompressed image data. `StreamingDecoder::parse_ihdr` estimates the image size and notifies `ZlibStream` via the new `set_max_total_output` method. Impact on the runtime of the `decode/generated-png:noncompressed-8x8.png` benchmark (3 measurements): * [-24.028% -23.693% -23.460%] (p = 0.00 < 0.05) * [-23.830% -23.480% -23.200%] (p = 0.00 < 0.05) * [-21.259% -20.893% -20.618%] (p = 0.00 < 0.05) --- src/decoder/stream.rs | 41 +++++++++++++++++++++++++++++++++++- src/decoder/zlib.rs | 48 +++++++++++++++++++++++++++++++++---------- src/test_utils.rs | 8 ++++---- 3 files changed, 81 insertions(+), 16 deletions(-) diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index 9e805c62..f75f7a63 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -1313,6 +1313,16 @@ impl StreamingDecoder { } }; + if let Some(mut raw_row_len) = color_type.checked_raw_row_length(bit_depth, width) { + if interlaced { + // This overshoots, but overestimating should be fine. + // TODO: Calculate **exact** IDAT size for interlaced images. + raw_row_len = raw_row_len.saturating_mul(2); + } + self.inflater + .set_max_total_output((height as usize).saturating_mul(raw_row_len)); + } + self.info = Some(Info { width, height, @@ -1841,7 +1851,7 @@ mod tests { let png = { let mut png = Vec::new(); write_fdat_prefix(&mut png, 2, 8); - let image_data = generate_rgba8_with_width(8); + let image_data = generate_rgba8_with_width_and_height(8, 8); write_fdat(&mut png, 2, &image_data[..30]); write_fdat(&mut png, 3, &image_data[30..]); write_iend(&mut png); @@ -1892,4 +1902,33 @@ mod tests { assert_eq!(first_frame, second_frame); } + + #[test] + fn test_idat_bigger_than_image_size_from_ihdr() { + let png = { + let mut png = Vec::new(); + write_png_sig(&mut png); + write_rgba8_ihdr_with_width(&mut png, 8); + + // Here we want to test an invalid image where the `IDAT` chunk contains more data + // (data for 8 image) than declared in the `IHDR` chunk (which only describes an 8x8 + // image). + write_chunk( + &mut png, + b"IDAT", + &generate_rgba8_with_width_and_height(8, 256), + ); + + write_iend(&mut png); + png + }; + let decoder = Decoder::new(png.as_slice()); + let mut reader = decoder.read_info().unwrap(); + let mut buf = vec![0; reader.output_buffer_size()]; + + // TODO: Should this return an error instead? For now let's just have test assertions for + // the current behavior. + reader.next_frame(&mut buf).unwrap(); + assert_eq!(3093270825, crc32fast::hash(&buf)); + } } diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs index e83c8e8c..2e183bee 100644 --- a/src/decoder/zlib.rs +++ b/src/decoder/zlib.rs @@ -14,6 +14,10 @@ pub(super) struct ZlibStream { out_buffer: Vec, /// The cursor position in the output stream as a buffer index. out_pos: usize, + /// Limit on how many bytes can be decompressed in total. This field is mostly used for + /// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only + /// a small image is being decoded). + max_total_output: usize, /// Ignore and do not calculate the Adler-32 checksum. Defaults to `true`. /// /// This flag overrides `TINFL_FLAG_COMPUTE_ADLER32`. @@ -27,8 +31,9 @@ impl ZlibStream { ZlibStream { state: Box::new(Decompressor::new()), started: false, - out_buffer: vec![0; 2 * CHUNCK_BUFFER_SIZE], + out_buffer: Vec::new(), out_pos: 0, + max_total_output: usize::MAX, ignore_adler32: true, } } @@ -37,9 +42,14 @@ impl ZlibStream { self.started = false; self.out_buffer.clear(); self.out_pos = 0; + self.max_total_output = usize::MAX; *self.state = Decompressor::new(); } + pub(crate) fn set_max_total_output(&mut self, n: usize) { + self.max_total_output = n; + } + /// Set the `ignore_adler32` flag and return `true` if the flag was /// successfully set. /// @@ -101,10 +111,9 @@ impl ZlibStream { return Ok(()); } - loop { + while !self.state.is_done() { self.prepare_vec_for_appending(); - - let (in_consumed, out_consumed) = self + let (_in_consumed, out_consumed) = self .state .read(&[], self.out_buffer.as_mut_slice(), self.out_pos, true) .map_err(|err| { @@ -113,23 +122,38 @@ impl ZlibStream { self.out_pos += out_consumed; - if self.state.is_done() { - self.out_buffer.truncate(self.out_pos); - image_data.append(&mut self.out_buffer); - return Ok(()); - } else { + if !self.state.is_done() { let transferred = self.transfer_finished_data(image_data); assert!( - transferred > 0 || in_consumed > 0 || out_consumed > 0, + transferred > 0 || out_consumed > 0, "No more forward progress made in stream decoding." ); } } + + self.out_buffer.truncate(self.out_pos); + image_data.append(&mut self.out_buffer); + Ok(()) } /// Resize the vector to allow allocation of more data. fn prepare_vec_for_appending(&mut self) { - if self.out_buffer.len().saturating_sub(self.out_pos) >= CHUNCK_BUFFER_SIZE { + // The `debug_assert` below explains why we can use `>=` instead of `>` in the condition + // that compares `self.out_post >= self.max_total_output` in the next `if` statement. + debug_assert!(!self.state.is_done()); + if self.out_pos >= self.max_total_output { + // This can happen when the `max_total_output` was miscalculated (e.g. + // because the `IHDR` chunk was malformed and didn't match the `IDAT` chunk). In + // this case, let's reset `self.max_total_output` before further calculations. + self.max_total_output = usize::MAX; + } + + let current_len = self.out_buffer.len(); + let desired_len = self + .out_pos + .saturating_add(CHUNCK_BUFFER_SIZE) + .min(self.max_total_output); + if current_len >= desired_len { return; } @@ -150,6 +174,8 @@ impl ZlibStream { // Ensure the allocation request is valid. // TODO: maximum allocation limits? .min(isize::max_value() as usize) + // Don't unnecessarily allocate more than `max_total_output`. + .min(self.max_total_output) } fn transfer_finished_data(&mut self, image_data: &mut Vec) -> usize { diff --git a/src/test_utils.rs b/src/test_utils.rs index 5395bd7c..bad30366 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -73,8 +73,8 @@ pub fn write_rgba8_ihdr_with_width(w: &mut impl Write, width: u32) { write_chunk(w, b"IHDR", &data); } -/// Generates RGBA8 `width` x `width` image and wraps it in a store-only zlib container. -pub fn generate_rgba8_with_width(width: u32) -> Vec { +/// Generates RGBA8 `width` x `height` image and wraps it in a store-only zlib container. +pub fn generate_rgba8_with_width_and_height(width: u32, height: u32) -> Vec { // Generate arbitrary test pixels. let image_pixels = { let mut row = Vec::new(); @@ -88,7 +88,7 @@ pub fn generate_rgba8_with_width(width: u32) -> Vec { row.extend(row_pixels); std::iter::repeat(row) - .take(width as usize) + .take(height as usize) .flatten() .collect::>() }; @@ -104,7 +104,7 @@ pub fn generate_rgba8_with_width(width: u32) -> Vec { /// Writes an IDAT chunk. pub fn write_rgba8_idats(w: &mut impl Write, size: u32, idat_bytes: usize) { - let data = generate_rgba8_with_width(size); + let data = generate_rgba8_with_width_and_height(size, size); for chunk in data.chunks(idat_bytes) { write_chunk(w, b"IDAT", chunk);