Skip to content

Commit

Permalink
Implement abstraction over mul_add (#22)
Browse files Browse the repository at this point in the history
* Implement abstraction over mul_add

Apparently, the codegen for mul_add is really bad
when FMA is disabled. Let's see if this is true

* Add attribute to allow non-FMA code once
  • Loading branch information
FreezyLemon authored Feb 29, 2024
1 parent 1850a99 commit f51dd7f
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions src/math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,27 +132,27 @@ fn log2(x: f32) -> f32 {

#[inline(always)]
fn poly5(x: f32, c0: f32, c1: f32, c2: f32, c3: f32, c4: f32, c5: f32) -> f32 {
x.mul_add(poly4(x, c1, c2, c3, c4, c5), c0)
multiply_add(x, poly4(x, c1, c2, c3, c4, c5), c0)
}

#[inline(always)]
fn poly4(x: f32, c0: f32, c1: f32, c2: f32, c3: f32, c4: f32) -> f32 {
x.mul_add(poly3(x, c1, c2, c3, c4), c0)
multiply_add(x, poly3(x, c1, c2, c3, c4), c0)
}

#[inline(always)]
fn poly3(x: f32, c0: f32, c1: f32, c2: f32, c3: f32) -> f32 {
x.mul_add(poly2(x, c1, c2, c3), c0)
multiply_add(x, poly2(x, c1, c2, c3), c0)
}

#[inline(always)]
fn poly2(x: f32, c0: f32, c1: f32, c2: f32) -> f32 {
x.mul_add(poly1(x, c1, c2), c0)
multiply_add(x, poly1(x, c1, c2), c0)
}

#[inline(always)]
fn poly1(x: f32, c0: f32, c1: f32) -> f32 {
x.mul_add(poly0(x, c1), c0)
multiply_add(x, poly0(x, c1), c0)
}

#[inline(always)]
Expand All @@ -174,3 +174,14 @@ pub fn expf(x: f32) -> f32 {
x.exp()
}
}

/// Computes (a * b) + c, leveraging FMA if available
#[inline]
#[allow(clippy::suboptimal_flops)]
pub fn multiply_add(a: f32, b: f32, c: f32) -> f32 {
if cfg!(target_feature = "fma") {
a.mul_add(b, c)
} else {
a * b + c
}
}

0 comments on commit f51dd7f

Please sign in to comment.