Unfilter in place instead of copying data out of ZlibStream's buffers.

Before this commit the decompressed image data would flow as follows: 1. `fdeflate` writes the raw data into `ZlibStream::out_buffer` 2. Each byte of (raw) image data is copied (i.e. `append`-ed) into `Reader::current`. See [`append` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L168) and [`extend` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L208). A. Before this happens, `Reader::current` is compacted, which would requires some additional copying of data. See [`drain` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/mod.rs#L733-L737) B. After this happens, the `ZlibStream::out_buffer` is **always** compacted - the consumed data is removed and the remaining data is copied within the buffer (i.e. shifted left, to position 0). See [`drain` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L208) 3. The data is `unfilter`-ed in place, inside `Reader::current` 4. Each byte of (unfiltered) image data is copied into `Reader::prev`. See [`copy_from_slice` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/mod.rs#L769) 5. The unfiltered data is used as input for `expand_...` calls. After this commit we avoid copies in steps 2 and 4 and instead keep the data in `ZlibStream::out_buffer` a bit longer. In particular, Unfiltering is done by keeping the previous and current row within `ZlibStream::out_buffer` and mutating a portion of `ZlibStream::out_buffer` in place. Additionally, after this commit compaction of `ZlibStream::out_buffer` (in step 2B above) is rate-limited, so that at most 1 byte goes through `memcpy` (or `memset`) per 1 byte of decompressed output. And after this commit the compaction of `Reader::current` (in step 2A above) doesn't happen at all (since this buffer has been removed entirely). The changes in this commit should have miminal or no impact on the cache/memory pressure. On one hand, this commit *removes* some memory pressure by removing the `Reader::current` and `Reader::prev` buffers. OTOH, this commit *increases* the amount of data stored in the `ZlibStream::out_buffer` (by more or less the same amount). Therefore the overall amount of memory used during decoding should stay the same (if we ignore change stemming from the tweaks to the frequency of compaction of `ZlibStream::out_buffer`). This commit includes a breaking API change and therefore increases the crate version in `Cargo.toml`. In particular, after this commit the `png::StreamingDecoder::update` API no longer takes `&mut Vec<u8>` and the stream of decompressed data has to be instead accessed and/or manipilated via the following new methids: `decompressed_image_data`, `decompressed_image_data_mut`, `discard_image_data`.
image-rs · Oct 2, 2023 · 87b44a8 · 87b44a8
1 parent f10238a
commit 87b44a8
Show file tree

Hide file tree

Showing 5 changed files with 227 additions and 102 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "png"
-version = "0.17.10"
+version = "0.18.0"
 license = "MIT OR Apache-2.0"
 
 description = "PNG decoding and encoding library in pure Rust"

diff --git a/examples/pngcheck.rs b/examples/pngcheck.rs
@@ -174,7 +174,7 @@ fn check_image<P: AsRef<Path>>(c: Config, fname: P) -> io::Result<()> {
             }
             buf = &data[..n];
         }
-        match decoder.update(buf, &mut Vec::new()) {
+        match decoder.update(buf) {
             Ok((_, ImageEnd)) => {
                 if !have_idat {
                     // This isn't beautiful. But it works.

diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
@@ -6,6 +6,7 @@ use self::stream::{FormatErrorInner, CHUNCK_BUFFER_SIZE};
 
 use std::io::{BufRead, BufReader, Read};
 use std::mem;
+use std::num::NonZeroUsize;
 use std::ops::Range;
 
 use crate::chunk;
@@ -188,15 +189,14 @@ impl<R: Read> Decoder<R> {
     /// Most image metadata will not be read until `read_info` is called, so those fields will be
     /// None or empty.
     pub fn read_header_info(&mut self) -> Result<&Info, DecodingError> {
-        let mut buf = Vec::new();
         while self.read_decoder.info().is_none() {
-            buf.clear();
-            if self.read_decoder.decode_next(&mut buf)?.is_none() {
+            if self.read_decoder.decode_next()?.is_none() {
                 return Err(DecodingError::Format(
                     FormatErrorInner::UnexpectedEof.into(),
                 ));
             }
         }
+        debug_assert!(self.read_decoder.decompressed_image_data().is_empty());
         Ok(self.read_decoder.info().unwrap())
     }
 
@@ -210,9 +210,7 @@ impl<R: Read> Decoder<R> {
             subframe: SubframeInfo::not_yet_init(),
             fctl_read: 0,
             next_frame: SubframeIdx::Initial,
-            prev: Vec::new(),
-            current: Vec::new(),
-            scan_start: 0,
+            prev_row_len: None,
             transform: self.transform,
             scratch_buffer: Vec::new(),
             limits: self.limits,
@@ -281,9 +279,25 @@ struct ReadDecoder<R: Read> {
 }
 
 impl<R: Read> ReadDecoder<R> {
-    /// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents are written
-    /// into image_data.
-    fn decode_next(&mut self, image_data: &mut Vec<u8>) -> Result<Option<Decoded>, DecodingError> {
+    fn decompressed_image_data(&self) -> &[u8] {
+        self.decoder.decompressed_image_data()
+    }
+
+    fn decompressed_image_data_mut(&mut self) -> &mut [u8] {
+        self.decoder.decompressed_image_data_mut()
+    }
+
+    fn discard_image_data(&mut self, number_of_bytes_to_discard: usize) {
+        self.decoder.discard_image_data(number_of_bytes_to_discard);
+    }
+
+    fn reset_image_data_inflater(&mut self) {
+        self.decoder.reset_image_data_inflater();
+    }
+
+    /// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents can be
+    /// found at the end of `decompressed_image_data_mut()`.
+    fn decode_next(&mut self) -> Result<Option<Decoded>, DecodingError> {
         while !self.at_eof {
             let (consumed, result) = {
                 let buf = self.reader.fill_buf()?;
@@ -292,7 +306,7 @@ impl<R: Read> ReadDecoder<R> {
                         FormatErrorInner::UnexpectedEof.into(),
                     ));
                 }
-                self.decoder.update(buf, image_data)?
+                self.decoder.update(buf)?
             };
             self.reader.consume(consumed);
             match result {
@@ -312,14 +326,17 @@ impl<R: Read> ReadDecoder<R> {
                     FormatErrorInner::UnexpectedEof.into(),
                 ));
             }
-            let (consumed, event) = self.decoder.update(buf, &mut vec![])?;
+            let (consumed, event) = self.decoder.update(buf)?;
             self.reader.consume(consumed);
             match event {
                 Decoded::Nothing => (),
                 Decoded::ImageEnd => self.at_eof = true,
                 // ignore more data
                 Decoded::ChunkComplete(_, _) | Decoded::ChunkBegin(_, _) | Decoded::ImageData => {}
-                Decoded::ImageDataFlushed => return Ok(()),
+                Decoded::ImageDataFlushed => {
+                    self.decoder.reset_image_data_inflater();
+                    return Ok(());
+                }
                 Decoded::PartialChunk(_) => {}
                 new => unreachable!("{:?}", new),
             }
@@ -347,12 +364,9 @@ pub struct Reader<R: Read> {
     /// control chunk. The IDAT image _may_ have such a chunk applying to it.
     fctl_read: u32,
     next_frame: SubframeIdx,
-    /// Previous raw line
-    prev: Vec<u8>,
-    /// Current raw line
-    current: Vec<u8>,
-    /// Start index of the current scan line.
-    scan_start: usize,
+    /// The length of the previous row (at the beginning of
+    /// `self.decoder.decompressed_image_data_mut()`) or `None` if there is no previous row yet.
+    prev_row_len: Option<NonZeroUsize>,
     /// Output transformations
     transform: Transformations,
     /// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference
@@ -401,12 +415,8 @@ impl<R: Read> Reader<R> {
     /// Requires IHDR before the IDAT and fcTL before fdAT.
     fn read_until_image_data(&mut self) -> Result<(), DecodingError> {
         loop {
-            // This is somewhat ugly. The API requires us to pass a buffer to decode_next but we
-            // know that we will stop before reading any image data from the stream. Thus pass an
-            // empty buffer and assert that remains empty.
-            let mut buf = Vec::new();
-            let state = self.decoder.decode_next(&mut buf)?;
-            assert!(buf.is_empty());
+            let state = self.decoder.decode_next()?;
+            assert!(self.decoder.decompressed_image_data().is_empty());
 
             match state {
                 Some(Decoded::ChunkBegin(_, chunk::IDAT))
@@ -444,12 +454,18 @@ impl<R: Read> Reader<R> {
             return Err(DecodingError::LimitsExceeded);
         }
 
-        self.prev.clear();
-        self.prev.resize(self.subframe.rowlen, 0);
+        self.reset_prev_row();
 
         Ok(())
     }
 
+    fn reset_prev_row(&mut self) {
+        if let Some(prev_row_len) = self.prev_row_len.as_ref() {
+            self.decoder.discard_image_data(prev_row_len.get());
+            self.prev_row_len = None;
+        }
+    }
+
     /// Get information on the image.
     ///
     /// The structure will change as new frames of an animated image are decoded.
@@ -471,6 +487,9 @@ impl<R: Read> Reader<R> {
     /// Output lines will be written in row-major, packed matrix with width and height of the read
     /// frame (or subframe), all samples are in big endian byte order where this matters.
     pub fn next_frame(&mut self, buf: &mut [u8]) -> Result<OutputInfo, DecodingError> {
+        self.reset_prev_row();
+        self.decoder.reset_image_data_inflater();
+
         let subframe_idx = match self.decoder.info().unwrap().frame_control() {
             None => SubframeIdx::Initial,
             Some(_) => SubframeIdx::Some(self.fctl_read - 1),
@@ -504,8 +523,6 @@ impl<R: Read> Reader<R> {
             line_size: self.output_line_size(self.subframe.width),
         };
 
-        self.current.clear();
-        self.scan_start = 0;
         let width = self.info().width;
         if self.info().interlaced {
             while let Some(InterlacedRow {
@@ -597,7 +614,6 @@ impl<R: Read> Reader<R> {
         output_buffer: &mut [u8],
     ) -> Result<(), DecodingError> {
         self.next_raw_interlaced_row(rowlen)?;
-        let row = &self.prev[1..rowlen];
 
         // Apply transformations and write resulting data to buffer.
         let (color_type, bit_depth, trns) = {
@@ -617,6 +633,7 @@ impl<R: Read> Reader<R> {
         } else {
             None
         };
+        let row = &self.decoder.decompressed_image_data()[1..rowlen];
         match (color_type, trns) {
             (ColorType::Indexed, _) if expand => {
                 expand_paletted(row, output_buffer, info, trns)?;
@@ -706,8 +723,7 @@ impl<R: Read> Reader<R> {
                 let (pass, line, width) = adam7.next()?;
                 let rowlen = self.info().raw_row_length_from_width(width);
                 if last_pass != pass {
-                    self.prev.clear();
-                    self.prev.resize(rowlen, 0u8);
+                    self.reset_prev_row();
                 }
                 Some((rowlen, InterlaceInfo::Adam7 { pass, line, width }))
             }
@@ -718,32 +734,33 @@ impl<R: Read> Reader<R> {
         }
     }
 
-    /// Write the next raw interlaced row into `self.prev`.
+    /// Write the next raw interlaced row into `self.decoder.decompressed_image_data_mut()`.
     ///
     /// The scanline is filtered against the previous scanline according to the specification.
     fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> {
         // Read image data until we have at least one full row (but possibly more than one).
-        while self.current.len() - self.scan_start < rowlen {
+        let required_len = match self.prev_row_len {
+            None => rowlen,
+            Some(prev_row_len) => {
+                debug_assert_eq!(prev_row_len.get(), rowlen);
+                rowlen * 2
+            }
+        };
+        while self.decoder.decompressed_image_data().len() < required_len {
             if self.subframe.consumed_and_flushed {
                 return Err(DecodingError::Format(
                     FormatErrorInner::NoMoreImageData.into(),
                 ));
             }
 
-            // Clear the current buffer before appending more data.
-            if self.scan_start > 0 {
-                self.current.drain(..self.scan_start).for_each(drop);
-                self.scan_start = 0;
-            }
-
-            match self.decoder.decode_next(&mut self.current)? {
+            match self.decoder.decode_next()? {
                 Some(Decoded::ImageData) => {}
                 Some(Decoded::ImageDataFlushed) => {
                     self.subframe.consumed_and_flushed = true;
                 }
                 None => {
                     return Err(DecodingError::Format(
-                        if self.current.is_empty() {
+                        if self.decoder.decompressed_image_data().is_empty() {
                             FormatErrorInner::NoMoreImageData
                         } else {
                             FormatErrorInner::UnexpectedEndOfChunk
@@ -755,18 +772,47 @@ impl<R: Read> Reader<R> {
             }
         }
 
-        // Get a reference to the current row and point scan_start to the next one.
-        let row = &mut self.current[self.scan_start..];
-        self.scan_start += rowlen;
-
-        // Unfilter the row.
-        let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format(
-            FormatErrorInner::UnknownFilterMethod(row[0]).into(),
-        ))?;
-        unfilter(filter, self.bpp, &self.prev[1..rowlen], &mut row[1..rowlen]);
+        {
+            // Calculate a reference to the previous row (or a buffer of zeros there is no previous
+            // row) and a mutable reference to the current row.
+            let mut zeros = Vec::new();
+            let (prev, row) = match self.prev_row_len {
+                None => {
+                    zeros.resize(rowlen, 0);
+                    (
+                        zeros.as_slice(),
+                        &mut self.decoder.decompressed_image_data_mut()[..rowlen],
+                    )
+                }
+                Some(prev_row_len) => {
+                    debug_assert_eq!(prev_row_len.get(), rowlen);
+                    let (prev, row) = self
+                        .decoder
+                        .decompressed_image_data_mut()
+                        .split_at_mut(rowlen);
+                    (&prev[..], &mut row[..rowlen])
+                }
+            };
+            debug_assert_eq!(prev.len(), rowlen);
+            debug_assert_eq!(row.len(), rowlen);
+
+            // Unfilter the row.
+            let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format(
+                FormatErrorInner::UnknownFilterMethod(row[0]).into(),
+            ))?;
+            unfilter(filter, self.bpp, &prev[1..], &mut row[1..]);
+        }
 
-        // Save the current row for the next pass.
-        self.prev[..rowlen].copy_from_slice(&row[..rowlen]);
+        match self.prev_row_len {
+            None => {
+                debug_assert_ne!(0, rowlen);
+                self.prev_row_len = NonZeroUsize::new(rowlen);
+            }
+            Some(prev_row_len) => {
+                debug_assert_eq!(prev_row_len.get(), rowlen);
+                self.decoder.discard_image_data(rowlen);
+            }
+        }
 
         Ok(())
     }