Skip to content

Commit

Permalink
Unfilter in place instead of copying data out of ZlibStream's buffers.
Browse files Browse the repository at this point in the history
Before this commit the decompressed image data would flow as follows:

1. `fdeflate` writes the raw data into `ZlibStream::out_buffer`
2. Each byte of (raw) image data is copied (i.e. `append`-ed) into
   `Reader::current`.  See
   [`append` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L168)
   and
   [`extend` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L208).
     A. Before this happens, `Reader::current` is compacted, which
        would requires some additional copying of data.  See
        [`drain` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/mod.rs#L733-L737)
     B. After this happens, the `ZlibStream::out_buffer` is **always**
        compacted - the consumed data is removed and the remaining data
        is copied within the buffer (i.e. shifted left, to position 0).
        See
        [`drain` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/zlib.rs#L208)
3. The data is `unfilter`-ed in place, inside `Reader::current`
4. Each byte of (unfiltered) image data is copied into `Reader::prev`.
   See
   [`copy_from_slice` here](https://github.com/image-rs/image-png/blob/f10238a1e886b228e7da5301e5c0f5011316f2d6/src/decoder/mod.rs#L769)
5. The unfiltered data is used as input for `expand_...` calls.

After this commit we avoid copies in steps 2 and 4 and instead keep
the data in `ZlibStream::out_buffer` a bit longer.  In particular,
Unfiltering is done by keeping the previous and current row within
`ZlibStream::out_buffer` and mutating a portion of
`ZlibStream::out_buffer` in place.

Additionally, after this commit compaction of `ZlibStream::out_buffer`
(in step 2B above) is rate-limited, so that at most 1 byte goes through
`memcpy` (or `memset`) per 1 byte of decompressed output.  And after
this commit the compaction of `Reader::current` (in step 2A above)
doesn't happen at all (since this buffer has been removed entirely).

The changes in this commit should have miminal or no impact on the
cache/memory pressure.  On one hand, this commit *removes* some memory
pressure by removing the `Reader::current` and `Reader::prev` buffers.
OTOH, this commit *increases* the amount of data stored in the
`ZlibStream::out_buffer` (by more or less the same amount). Therefore
the overall amount of memory used during decoding should stay the same
(if we ignore change stemming from the tweaks to the frequency of
compaction of `ZlibStream::out_buffer`).

This commit includes a breaking API change and therefore increases the
crate version in `Cargo.toml`.  In particular, after this commit
the `png::StreamingDecoder::update` API no longer takes `&mut Vec<u8>`
and the stream of decompressed data has to be instead accessed and/or
manipilated via the following new methids: `decompressed_image_data`,
`decompressed_image_data_mut`, `discard_image_data`.
  • Loading branch information
anforowicz committed Oct 2, 2023
1 parent f10238a commit 87b44a8
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 102 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "png"
version = "0.17.10"
version = "0.18.0"
license = "MIT OR Apache-2.0"

description = "PNG decoding and encoding library in pure Rust"
Expand Down
2 changes: 1 addition & 1 deletion examples/pngcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ fn check_image<P: AsRef<Path>>(c: Config, fname: P) -> io::Result<()> {
}
buf = &data[..n];
}
match decoder.update(buf, &mut Vec::new()) {
match decoder.update(buf) {
Ok((_, ImageEnd)) => {
if !have_idat {
// This isn't beautiful. But it works.
Expand Down
150 changes: 98 additions & 52 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use self::stream::{FormatErrorInner, CHUNCK_BUFFER_SIZE};

use std::io::{BufRead, BufReader, Read};
use std::mem;
use std::num::NonZeroUsize;
use std::ops::Range;

use crate::chunk;
Expand Down Expand Up @@ -188,15 +189,14 @@ impl<R: Read> Decoder<R> {
/// Most image metadata will not be read until `read_info` is called, so those fields will be
/// None or empty.
pub fn read_header_info(&mut self) -> Result<&Info, DecodingError> {
let mut buf = Vec::new();
while self.read_decoder.info().is_none() {
buf.clear();
if self.read_decoder.decode_next(&mut buf)?.is_none() {
if self.read_decoder.decode_next()?.is_none() {
return Err(DecodingError::Format(
FormatErrorInner::UnexpectedEof.into(),
));
}
}
debug_assert!(self.read_decoder.decompressed_image_data().is_empty());
Ok(self.read_decoder.info().unwrap())
}

Expand All @@ -210,9 +210,7 @@ impl<R: Read> Decoder<R> {
subframe: SubframeInfo::not_yet_init(),
fctl_read: 0,
next_frame: SubframeIdx::Initial,
prev: Vec::new(),
current: Vec::new(),
scan_start: 0,
prev_row_len: None,
transform: self.transform,
scratch_buffer: Vec::new(),
limits: self.limits,
Expand Down Expand Up @@ -281,9 +279,25 @@ struct ReadDecoder<R: Read> {
}

impl<R: Read> ReadDecoder<R> {
/// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents are written
/// into image_data.
fn decode_next(&mut self, image_data: &mut Vec<u8>) -> Result<Option<Decoded>, DecodingError> {
fn decompressed_image_data(&self) -> &[u8] {
self.decoder.decompressed_image_data()
}

fn decompressed_image_data_mut(&mut self) -> &mut [u8] {
self.decoder.decompressed_image_data_mut()
}

fn discard_image_data(&mut self, number_of_bytes_to_discard: usize) {
self.decoder.discard_image_data(number_of_bytes_to_discard);
}

fn reset_image_data_inflater(&mut self) {
self.decoder.reset_image_data_inflater();
}

/// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents can be
/// found at the end of `decompressed_image_data_mut()`.
fn decode_next(&mut self) -> Result<Option<Decoded>, DecodingError> {
while !self.at_eof {
let (consumed, result) = {
let buf = self.reader.fill_buf()?;
Expand All @@ -292,7 +306,7 @@ impl<R: Read> ReadDecoder<R> {
FormatErrorInner::UnexpectedEof.into(),
));
}
self.decoder.update(buf, image_data)?
self.decoder.update(buf)?
};
self.reader.consume(consumed);
match result {
Expand All @@ -312,14 +326,17 @@ impl<R: Read> ReadDecoder<R> {
FormatErrorInner::UnexpectedEof.into(),
));
}
let (consumed, event) = self.decoder.update(buf, &mut vec![])?;
let (consumed, event) = self.decoder.update(buf)?;
self.reader.consume(consumed);
match event {
Decoded::Nothing => (),
Decoded::ImageEnd => self.at_eof = true,
// ignore more data
Decoded::ChunkComplete(_, _) | Decoded::ChunkBegin(_, _) | Decoded::ImageData => {}
Decoded::ImageDataFlushed => return Ok(()),
Decoded::ImageDataFlushed => {
self.decoder.reset_image_data_inflater();
return Ok(());
}
Decoded::PartialChunk(_) => {}
new => unreachable!("{:?}", new),
}
Expand Down Expand Up @@ -347,12 +364,9 @@ pub struct Reader<R: Read> {
/// control chunk. The IDAT image _may_ have such a chunk applying to it.
fctl_read: u32,
next_frame: SubframeIdx,
/// Previous raw line
prev: Vec<u8>,
/// Current raw line
current: Vec<u8>,
/// Start index of the current scan line.
scan_start: usize,
/// The length of the previous row (at the beginning of
/// `self.decoder.decompressed_image_data_mut()`) or `None` if there is no previous row yet.
prev_row_len: Option<NonZeroUsize>,
/// Output transformations
transform: Transformations,
/// This buffer is only used so that `next_row` and `next_interlaced_row` can return reference
Expand Down Expand Up @@ -401,12 +415,8 @@ impl<R: Read> Reader<R> {
/// Requires IHDR before the IDAT and fcTL before fdAT.
fn read_until_image_data(&mut self) -> Result<(), DecodingError> {
loop {
// This is somewhat ugly. The API requires us to pass a buffer to decode_next but we
// know that we will stop before reading any image data from the stream. Thus pass an
// empty buffer and assert that remains empty.
let mut buf = Vec::new();
let state = self.decoder.decode_next(&mut buf)?;
assert!(buf.is_empty());
let state = self.decoder.decode_next()?;
assert!(self.decoder.decompressed_image_data().is_empty());

match state {
Some(Decoded::ChunkBegin(_, chunk::IDAT))
Expand Down Expand Up @@ -444,12 +454,18 @@ impl<R: Read> Reader<R> {
return Err(DecodingError::LimitsExceeded);
}

self.prev.clear();
self.prev.resize(self.subframe.rowlen, 0);
self.reset_prev_row();

Ok(())
}

fn reset_prev_row(&mut self) {
if let Some(prev_row_len) = self.prev_row_len.as_ref() {
self.decoder.discard_image_data(prev_row_len.get());
self.prev_row_len = None;
}
}

/// Get information on the image.
///
/// The structure will change as new frames of an animated image are decoded.
Expand All @@ -471,6 +487,9 @@ impl<R: Read> Reader<R> {
/// Output lines will be written in row-major, packed matrix with width and height of the read
/// frame (or subframe), all samples are in big endian byte order where this matters.
pub fn next_frame(&mut self, buf: &mut [u8]) -> Result<OutputInfo, DecodingError> {
self.reset_prev_row();
self.decoder.reset_image_data_inflater();

let subframe_idx = match self.decoder.info().unwrap().frame_control() {
None => SubframeIdx::Initial,
Some(_) => SubframeIdx::Some(self.fctl_read - 1),
Expand Down Expand Up @@ -504,8 +523,6 @@ impl<R: Read> Reader<R> {
line_size: self.output_line_size(self.subframe.width),
};

self.current.clear();
self.scan_start = 0;
let width = self.info().width;
if self.info().interlaced {
while let Some(InterlacedRow {
Expand Down Expand Up @@ -597,7 +614,6 @@ impl<R: Read> Reader<R> {
output_buffer: &mut [u8],
) -> Result<(), DecodingError> {
self.next_raw_interlaced_row(rowlen)?;
let row = &self.prev[1..rowlen];

// Apply transformations and write resulting data to buffer.
let (color_type, bit_depth, trns) = {
Expand All @@ -617,6 +633,7 @@ impl<R: Read> Reader<R> {
} else {
None
};
let row = &self.decoder.decompressed_image_data()[1..rowlen];
match (color_type, trns) {
(ColorType::Indexed, _) if expand => {
expand_paletted(row, output_buffer, info, trns)?;
Expand Down Expand Up @@ -706,8 +723,7 @@ impl<R: Read> Reader<R> {
let (pass, line, width) = adam7.next()?;
let rowlen = self.info().raw_row_length_from_width(width);
if last_pass != pass {
self.prev.clear();
self.prev.resize(rowlen, 0u8);
self.reset_prev_row();
}
Some((rowlen, InterlaceInfo::Adam7 { pass, line, width }))
}
Expand All @@ -718,32 +734,33 @@ impl<R: Read> Reader<R> {
}
}

/// Write the next raw interlaced row into `self.prev`.
/// Write the next raw interlaced row into `self.decoder.decompressed_image_data_mut()`.
///
/// The scanline is filtered against the previous scanline according to the specification.
fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> {
// Read image data until we have at least one full row (but possibly more than one).
while self.current.len() - self.scan_start < rowlen {
let required_len = match self.prev_row_len {
None => rowlen,
Some(prev_row_len) => {
debug_assert_eq!(prev_row_len.get(), rowlen);
rowlen * 2
}
};
while self.decoder.decompressed_image_data().len() < required_len {
if self.subframe.consumed_and_flushed {
return Err(DecodingError::Format(
FormatErrorInner::NoMoreImageData.into(),
));
}

// Clear the current buffer before appending more data.
if self.scan_start > 0 {
self.current.drain(..self.scan_start).for_each(drop);
self.scan_start = 0;
}

match self.decoder.decode_next(&mut self.current)? {
match self.decoder.decode_next()? {
Some(Decoded::ImageData) => {}
Some(Decoded::ImageDataFlushed) => {
self.subframe.consumed_and_flushed = true;
}
None => {
return Err(DecodingError::Format(
if self.current.is_empty() {
if self.decoder.decompressed_image_data().is_empty() {
FormatErrorInner::NoMoreImageData
} else {
FormatErrorInner::UnexpectedEndOfChunk
Expand All @@ -755,18 +772,47 @@ impl<R: Read> Reader<R> {
}
}

// Get a reference to the current row and point scan_start to the next one.
let row = &mut self.current[self.scan_start..];
self.scan_start += rowlen;

// Unfilter the row.
let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format(
FormatErrorInner::UnknownFilterMethod(row[0]).into(),
))?;
unfilter(filter, self.bpp, &self.prev[1..rowlen], &mut row[1..rowlen]);
{
// Calculate a reference to the previous row (or a buffer of zeros there is no previous
// row) and a mutable reference to the current row.
let mut zeros = Vec::new();
let (prev, row) = match self.prev_row_len {
None => {
zeros.resize(rowlen, 0);
(
zeros.as_slice(),
&mut self.decoder.decompressed_image_data_mut()[..rowlen],
)
}
Some(prev_row_len) => {
debug_assert_eq!(prev_row_len.get(), rowlen);
let (prev, row) = self
.decoder
.decompressed_image_data_mut()
.split_at_mut(rowlen);
(&prev[..], &mut row[..rowlen])
}
};
debug_assert_eq!(prev.len(), rowlen);
debug_assert_eq!(row.len(), rowlen);

// Unfilter the row.
let filter = FilterType::from_u8(row[0]).ok_or(DecodingError::Format(
FormatErrorInner::UnknownFilterMethod(row[0]).into(),
))?;
unfilter(filter, self.bpp, &prev[1..], &mut row[1..]);
}

// Save the current row for the next pass.
self.prev[..rowlen].copy_from_slice(&row[..rowlen]);
match self.prev_row_len {
None => {
debug_assert_ne!(0, rowlen);
self.prev_row_len = NonZeroUsize::new(rowlen);
}
Some(prev_row_len) => {
debug_assert_eq!(prev_row_len.get(), rowlen);
self.decoder.discard_image_data(rowlen);
}
}

Ok(())
}
Expand Down
Loading

0 comments on commit 87b44a8

Please sign in to comment.