From 0fb4f465b2b0862d972ebbd8483500eaaf357e56 Mon Sep 17 00:00:00 2001
From: Lukasz Anforowicz <lukasza@chromium.org>
Date: Wed, 15 Nov 2023 23:53:50 +0000
Subject: [PATCH] Cap buffer sizes via `ZlibStream::set_max_total_output`.

Before this commit, `ZlibStream::new` would always allocate and zero out
64kB of data via `out_buffer: vec![0; 2 * CHUNCK_BUFFER_SIZE]`.  After
this commit, `out_buffer` is capped by the actual size of the
decompressed image data.  `StreamingDecoder::parse_ihdr` estimates the
image size and notifies `ZlibStream` via the new `set_max_total_output`
method.

Impact on the runtime of the
`decode/generated-png:noncompressed-8x8.png` benchmark (3 measurements):

* [-24.028% -23.693% -23.460%] (p = 0.00 < 0.05)
* [-23.830% -23.480% -23.200%] (p = 0.00 < 0.05)
* [-21.259% -20.893% -20.618%] (p = 0.00 < 0.05)
---
 src/decoder/stream.rs | 41 +++++++++++++++++++++++++++++++++++-
 src/decoder/zlib.rs   | 48 +++++++++++++++++++++++++++++++++----------
 src/test_utils.rs     |  8 ++++----
 3 files changed, 81 insertions(+), 16 deletions(-)
diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs
index 9e805c62..f75f7a63 100644
--- a/src/decoder/stream.rs
+++ b/src/decoder/stream.rs
@@ -1313,6 +1313,16 @@ impl StreamingDecoder {
             }
         };
 
+        if let Some(mut raw_row_len) = color_type.checked_raw_row_length(bit_depth, width) {
+            if interlaced {
+                // This overshoots, but overestimating should be fine.
+                // TODO: Calculate **exact** IDAT size for interlaced images.
+                raw_row_len = raw_row_len.saturating_mul(2);
+            }
+            self.inflater
+                .set_max_total_output((height as usize).saturating_mul(raw_row_len));
+        }
+
         self.info = Some(Info {
             width,
             height,
@@ -1841,7 +1851,7 @@ mod tests {
         let png = {
             let mut png = Vec::new();
             write_fdat_prefix(&mut png, 2, 8);
-            let image_data = generate_rgba8_with_width(8);
+            let image_data = generate_rgba8_with_width_and_height(8, 8);
             write_fdat(&mut png, 2, &image_data[..30]);
             write_fdat(&mut png, 3, &image_data[30..]);
             write_iend(&mut png);
@@ -1892,4 +1902,33 @@ mod tests {
 
         assert_eq!(first_frame, second_frame);
     }
+
+    #[test]
+    fn test_idat_bigger_than_image_size_from_ihdr() {
+        let png = {
+            let mut png = Vec::new();
+            write_png_sig(&mut png);
+            write_rgba8_ihdr_with_width(&mut png, 8);
+
+            // Here we want to test an invalid image where the `IDAT` chunk contains more data
+            // (data for 8 image) than declared in the `IHDR` chunk (which only describes an 8x8
+            // image).
+            write_chunk(
+                &mut png,
+                b"IDAT",
+                &generate_rgba8_with_width_and_height(8, 256),
+            );
+
+            write_iend(&mut png);
+            png
+        };
+        let decoder = Decoder::new(png.as_slice());
+        let mut reader = decoder.read_info().unwrap();
+        let mut buf = vec![0; reader.output_buffer_size()];
+
+        // TODO: Should this return an error instead?  For now let's just have test assertions for
+        // the current behavior.
+        reader.next_frame(&mut buf).unwrap();
+        assert_eq!(3093270825, crc32fast::hash(&buf));
+    }
 }
diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs
index e83c8e8c..2e183bee 100644
--- a/src/decoder/zlib.rs
+++ b/src/decoder/zlib.rs
@@ -14,6 +14,10 @@ pub(super) struct ZlibStream {
     out_buffer: Vec<u8>,
     /// The cursor position in the output stream as a buffer index.
     out_pos: usize,
+    /// Limit on how many bytes can be decompressed in total.  This field is mostly used for
+    /// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only
+    /// a small image is being decoded).
+    max_total_output: usize,
     /// Ignore and do not calculate the Adler-32 checksum. Defaults to `true`.
     ///
     /// This flag overrides `TINFL_FLAG_COMPUTE_ADLER32`.
@@ -27,8 +31,9 @@ impl ZlibStream {
         ZlibStream {
             state: Box::new(Decompressor::new()),
             started: false,
-            out_buffer: vec![0; 2 * CHUNCK_BUFFER_SIZE],
+            out_buffer: Vec::new(),
             out_pos: 0,
+            max_total_output: usize::MAX,
             ignore_adler32: true,
         }
     }
@@ -37,9 +42,14 @@ impl ZlibStream {
         self.started = false;
         self.out_buffer.clear();
         self.out_pos = 0;
+        self.max_total_output = usize::MAX;
         *self.state = Decompressor::new();
     }
 
+    pub(crate) fn set_max_total_output(&mut self, n: usize) {
+        self.max_total_output = n;
+    }
+
     /// Set the `ignore_adler32` flag and return `true` if the flag was
     /// successfully set.
     ///
@@ -101,10 +111,9 @@ impl ZlibStream {
             return Ok(());
         }
 
-        loop {
+        while !self.state.is_done() {
             self.prepare_vec_for_appending();
-
-            let (in_consumed, out_consumed) = self
+            let (_in_consumed, out_consumed) = self
                 .state
                 .read(&[], self.out_buffer.as_mut_slice(), self.out_pos, true)
                 .map_err(|err| {
@@ -113,23 +122,38 @@ impl ZlibStream {
 
             self.out_pos += out_consumed;
 
-            if self.state.is_done() {
-                self.out_buffer.truncate(self.out_pos);
-                image_data.append(&mut self.out_buffer);
-                return Ok(());
-            } else {
+            if !self.state.is_done() {
                 let transferred = self.transfer_finished_data(image_data);
                 assert!(
-                    transferred > 0 || in_consumed > 0 || out_consumed > 0,
+                    transferred > 0 || out_consumed > 0,
                     "No more forward progress made in stream decoding."
                 );
             }
         }
+
+        self.out_buffer.truncate(self.out_pos);
+        image_data.append(&mut self.out_buffer);
+        Ok(())
     }
 
     /// Resize the vector to allow allocation of more data.
     fn prepare_vec_for_appending(&mut self) {
-        if self.out_buffer.len().saturating_sub(self.out_pos) >= CHUNCK_BUFFER_SIZE {
+        // The `debug_assert` below explains why we can use `>=` instead of `>` in the condition
+        // that compares `self.out_post >= self.max_total_output` in the next `if` statement.
+        debug_assert!(!self.state.is_done());
+        if self.out_pos >= self.max_total_output {
+            // This can happen when the `max_total_output` was miscalculated (e.g.
+            // because the `IHDR` chunk was malformed and didn't match the `IDAT` chunk).  In
+            // this case, let's reset `self.max_total_output` before further calculations.
+            self.max_total_output = usize::MAX;
+        }
+
+        let current_len = self.out_buffer.len();
+        let desired_len = self
+            .out_pos
+            .saturating_add(CHUNCK_BUFFER_SIZE)
+            .min(self.max_total_output);
+        if current_len >= desired_len {
             return;
         }
 
@@ -150,6 +174,8 @@ impl ZlibStream {
             // Ensure the allocation request is valid.
             // TODO: maximum allocation limits?
             .min(isize::max_value() as usize)
+            // Don't unnecessarily allocate more than `max_total_output`.
+            .min(self.max_total_output)
     }
 
     fn transfer_finished_data(&mut self, image_data: &mut Vec<u8>) -> usize {
diff --git a/src/test_utils.rs b/src/test_utils.rs
index 5395bd7c..bad30366 100644
--- a/src/test_utils.rs
+++ b/src/test_utils.rs
@@ -73,8 +73,8 @@ pub fn write_rgba8_ihdr_with_width(w: &mut impl Write, width: u32) {
     write_chunk(w, b"IHDR", &data);
 }
 
-/// Generates RGBA8 `width` x `width` image and wraps it in a store-only zlib container.
-pub fn generate_rgba8_with_width(width: u32) -> Vec<u8> {
+/// Generates RGBA8 `width` x `height` image and wraps it in a store-only zlib container.
+pub fn generate_rgba8_with_width_and_height(width: u32, height: u32) -> Vec<u8> {
     // Generate arbitrary test pixels.
     let image_pixels = {
         let mut row = Vec::new();
@@ -88,7 +88,7 @@ pub fn generate_rgba8_with_width(width: u32) -> Vec<u8> {
         row.extend(row_pixels);
 
         std::iter::repeat(row)
-            .take(width as usize)
+            .take(height as usize)
             .flatten()
             .collect::<Vec<_>>()
     };
@@ -104,7 +104,7 @@ pub fn generate_rgba8_with_width(width: u32) -> Vec<u8> {
 
 /// Writes an IDAT chunk.
 pub fn write_rgba8_idats(w: &mut impl Write, size: u32, idat_bytes: usize) {
-    let data = generate_rgba8_with_width(size);
+    let data = generate_rgba8_with_width_and_height(size, size);
 
     for chunk in data.chunks(idat_bytes) {
         write_chunk(w, b"IDAT", chunk);