diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 09772fee..29e2fd07 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -619,12 +619,10 @@ impl Reader { }; match (color_type, trns) { (ColorType::Indexed, _) if expand => { - output_buffer[..row.len()].copy_from_slice(row); - expand_paletted(output_buffer, info, trns)?; + expand_paletted(row, output_buffer, info, trns)?; } (ColorType::Grayscale | ColorType::GrayscaleAlpha, _) if bit_depth < 8 && expand => { - output_buffer[..row.len()].copy_from_slice(row); - expand_gray_u8(output_buffer, info, trns) + expand_gray_u8(row, output_buffer, info, trns) } (ColorType::Grayscale | ColorType::Rgb, Some(trns)) if expand => { let channels = color_type.samples(); @@ -811,6 +809,7 @@ impl SubframeInfo { } fn expand_paletted( + row: &[u8], buffer: &mut [u8], info: &Info, trns: Option>, @@ -842,7 +841,7 @@ fn expand_paletted( &[] }; - utils::unpack_bits(buffer, 4, info.bit_depth as u8, |i, chunk| { + utils::unpack_bits(row, buffer, 4, info.bit_depth as u8, |i, chunk| { let (rgb, a) = ( palette .get(3 * i as usize..3 * i as usize + 3) @@ -855,7 +854,7 @@ fn expand_paletted( chunk[3] = a; }); } else { - utils::unpack_bits(buffer, 3, info.bit_depth as u8, |i, chunk| { + utils::unpack_bits(row, buffer, 3, info.bit_depth as u8, |i, chunk| { let rgb = palette .get(3 * i as usize..3 * i as usize + 3) .unwrap_or(&black); @@ -873,7 +872,7 @@ fn expand_paletted( } } -fn expand_gray_u8(buffer: &mut [u8], info: &Info, trns: Option>) { +fn expand_gray_u8(row: &[u8], buffer: &mut [u8], info: &Info, trns: Option>) { let rescale = true; let scaling_factor = if rescale { (255) / ((1u16 << info.bit_depth as u8) - 1) as u8 @@ -881,7 +880,7 @@ fn expand_gray_u8(buffer: &mut [u8], info: &Info, trns: Option>) { 1 }; if let Some(trns) = trns { - utils::unpack_bits(buffer, 2, info.bit_depth as u8, |pixel, chunk| { + utils::unpack_bits(row, buffer, 2, info.bit_depth as u8, |pixel, chunk| { chunk[1] = if let Some(trns) = trns { if pixel == trns[0] { 0 @@ -894,7 +893,7 @@ fn expand_gray_u8(buffer: &mut [u8], info: &Info, trns: Option>) { chunk[0] = pixel * scaling_factor }) } else { - utils::unpack_bits(buffer, 1, info.bit_depth as u8, |val, chunk| { + utils::unpack_bits(row, buffer, 1, info.bit_depth as u8, |val, chunk| { chunk[0] = val * scaling_factor }) } diff --git a/src/utils.rs b/src/utils.rs index d43753b1..11063700 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,40 +1,38 @@ //! Utility functions -use std::iter::{repeat, StepBy}; +use std::iter::StepBy; use std::ops::Range; #[inline(always)] -pub fn unpack_bits(buf: &mut [u8], channels: usize, bit_depth: u8, func: F) +pub fn unpack_bits(row: &[u8], buf: &mut [u8], channels: usize, bit_depth: u8, func: F) where F: Fn(u8, &mut [u8]), { - // Return early if empty. This enables to subtract `channels` later without overflow. - if buf.len() < channels { - return; - } - - let bits = buf.len() / channels * bit_depth as usize; - let extra_bits = bits % 8; - let entries = bits / 8 - + match extra_bits { - 0 => 0, - _ => 1, - }; - let skip = match extra_bits { - 0 => 0, - n => (8 - n) / bit_depth as usize, - }; let mask = ((1u16 << bit_depth) - 1) as u8; - let i = (0..entries) - .rev() // reverse iterator - .flat_map(|idx| - // this has to be reversed too - (0..8).step_by(bit_depth.into()) - .zip(repeat(idx))) - .skip(skip); - let j = (0..=buf.len() - channels).rev().step_by(channels); - for ((shift, i), j) in i.zip(j) { - let pixel = (buf[i] & (mask << shift)) >> shift; - func(pixel, &mut buf[j..(j + channels)]) + + let mut buf_chunks = buf.chunks_exact_mut(channels); + + // `shift` iterates through these ranges for each bit depth: + // 1 => &[7, 6, 5, 4, 3, 2, 1, 0], + // 2 => &[6, 4, 2, 0], + // 4 => &[4, 0], + // 8 => &[0], + // + // `(0..8).step_by(bit_depth.into()).rev()` doesn't always optimize well so + // shifts are calculated instead. (2023-08, Rust 1.71) + + for &curr in row.iter() { + let mut shift = 8 - bit_depth as i32; + + while shift >= 0 { + if let Some(chunk) = buf_chunks.next() { + let pixel = (curr >> shift) & mask; + func(pixel, chunk); + } else { + return; + } + + shift -= bit_depth as i32; + } } }