Skip to content

Commit

Permalink
Use std::simd inside create_rgba_palette
Browse files Browse the repository at this point in the history
  • Loading branch information
anforowicz committed Jan 31, 2024
1 parent 57b5d54 commit c71f944
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ include = [

[dependencies]
bitflags = "1.0"
bytemuck = "1.14.1"
crc32fast = "1.2.0"
fdeflate = "0.3.3"
flate2 = "1.0"
Expand Down
20 changes: 20 additions & 0 deletions src/decoder/transform/palette.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,26 @@ fn create_rgba_palette(info: &Info) -> [[u8; 4]; 256] {
{
let mut palette_iter = palette;
let mut rgba_iter = &mut rgba_palette[..];

// TODO(https://github.com/rust-lang/rust/issues/86656): Stop gating this module behind the
// "unstable" feature of the `png` crate. This should be possible once the "portable_simd"
// feature of Rust gets stabilized.
#[cfg(feature = "unstable")]
{
use std::simd::simd_swizzle;
use std::simd::Simd;
while palette_iter.len() >= 16 {
// Note that `simd_swizzle` uses garbage values for the alpha channel.
let simd = Simd::<u8, 16>::from_slice(&palette_iter[0..16]);
let simd = simd_swizzle!(simd, [0, 1, 2, 0, 3, 4, 5, 0, 6, 7, 8, 0, 9, 10, 11, 0]);
let simd: [[u8; 4]; 4] = bytemuck::cast(simd.to_array());
rgba_iter[0..4].copy_from_slice(&simd);

palette_iter = &palette_iter[12..];
rgba_iter = &mut rgba_iter[4..];
}
}

while palette_iter.len() >= 4 {
// Copying 4 bytes at a time is more efficient than copying 3.
// OTOH, this clobbers the alpha value in `rgba_iter[0][3]` - we
Expand Down

0 comments on commit c71f944

Please sign in to comment.