Skip to content

Commit

Permalink
Additional small SIMD-related improvements for Paeth unfilter.
Browse files Browse the repository at this point in the history
This commit implements 2 changes proposed by @okaneco:

* Changing how `load3` and `load6` work
* Using a manual `if_then_else` instead of `std::simd::Mask::select`
  • Loading branch information
anforowicz committed Oct 6, 2023
1 parent f5021fb commit 97d4f63
Showing 1 changed file with 26 additions and 6 deletions.
32 changes: 26 additions & 6 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,23 @@ use crate::common::BytesPerPixel;
#[cfg(feature = "unstable")]
mod simd {
use std::simd::{
u8x4, u8x8, LaneCount, Simd, SimdInt, SimdOrd, SimdPartialEq, SimdUint, SupportedLaneCount,
u8x4, u8x8, LaneCount, Mask, Simd, SimdInt, SimdOrd, SimdPartialEq, SimdUint, SupportedLaneCount,
};

/// Equivalent to `condition.select(true_value, false_value)` but generates more compact
/// and faster assembly - see https://rust.godbolt.org/z/cbYK5xchf
fn if_then_else<const N: usize>(
condition: Mask<i16, N>,
true_value: Simd<i16, N>,
false_value: Simd<i16, N>,
) -> Simd<i16, N>
where
LaneCount<N>: SupportedLaneCount,
{
let condition = condition.to_int();
(condition & true_value) | (condition & !false_value)
}

/// This is an equivalent of the `PaethPredictor` function from
/// [the spec](http://www.libpng.org/pub/png/spec/1.2/PNG-Filters.html#Filter-type-4-Paeth)
/// except that it simultaenously calculates the predictor for all SIMD lanes.
Expand Down Expand Up @@ -48,9 +62,11 @@ mod simd {
// if smalest == pa
// then select a
// else select (if smallest == pb then select b else select c)
smallest
.simd_eq(pa)
.select(a, smallest.simd_eq(pb).select(b, c))
if_then_else(
smallest.simd_eq(pa),
a,
if_then_else(smallest.simd_eq(pb), b, c),
)
}

/// Memory of previous pixels (as needed to unfilter `FilterType::Paeth`).
Expand Down Expand Up @@ -88,7 +104,9 @@ mod simd {
}

fn load3(src: &[u8]) -> u8x4 {
u8x4::from_array([src[0], src[1], src[2], 0])
let mut temp = [0; 4];
temp[..3].copy_from_slice(&src[..3]);
u8x4::from_slice(&temp)
}

fn store3(src: u8x4, dest: &mut [u8]) {
Expand Down Expand Up @@ -127,7 +145,9 @@ mod simd {
}

fn load6(src: &[u8]) -> u8x8 {
u8x8::from_array([src[0], src[1], src[2], src[3], src[4], src[5], 0, 0])
let mut temp = [0; 8];
temp[..6].copy_from_slice(&src[..6]);
u8x8::from_slice(&temp)
}

fn store6(src: u8x8, dest: &mut [u8]) {
Expand Down

0 comments on commit 97d4f63

Please sign in to comment.