-
Notifications
You must be signed in to change notification settings - Fork 13.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Don't aggregate homogeneous floats in the Rust ABI
- Loading branch information
Showing
4 changed files
with
143 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
// assembly-output: emit-asm | ||
// needs-llvm-components: x86 | ||
// compile-flags: --target x86_64-unknown-linux-gnu | ||
// compile-flags: -C llvm-args=--x86-asm-syntax=intel | ||
// compile-flags: -C opt-level=3 | ||
|
||
#![crate_type = "rlib"] | ||
#![no_std] | ||
|
||
// CHECK-LABEL: sum_f32: | ||
// CHECK: addss xmm0, xmm1 | ||
// CHECK-NEXT: ret | ||
#[no_mangle] | ||
pub fn sum_f32(a: f32, b: f32) -> f32 { | ||
a + b | ||
} | ||
|
||
// CHECK-LABEL: sum_f32x2: | ||
// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}} | ||
// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}} | ||
// CHECK-NEXT: ret | ||
#[no_mangle] | ||
pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { | ||
[ | ||
a[0] + b[0], | ||
a[1] + b[1], | ||
] | ||
} | ||
|
||
// CHECK-LABEL: sum_f32x4: | ||
// CHECK: mov rax, [[PTR_IN:.*]] | ||
// CHECK-NEXT: movups [[XMMA:xmm[0-9]]], xmmword ptr [rsi] | ||
// CHECK-NEXT: movups [[XMMB:xmm[0-9]]], xmmword ptr [rdx] | ||
// CHECK-NEXT: addps [[XMMB]], [[XMMA]] | ||
// CHECK-NEXT: movups xmmword ptr [[[PTR_IN]]], [[XMMB]] | ||
// CHECK-NEXT: ret | ||
#[no_mangle] | ||
pub fn sum_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { | ||
[ | ||
a[0] + b[0], | ||
a[1] + b[1], | ||
a[2] + b[2], | ||
a[3] + b[3], | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
//! Check that small (less then 128bits on x86_64) homogeneous floats are either pass as an array | ||
//! or by a pointer | ||
// compile-flags: -C no-prepopulate-passes -O | ||
// only-x86_64 | ||
|
||
#![crate_type = "lib"] | ||
|
||
pub struct Foo { | ||
bar1: f32, | ||
bar2: f32, | ||
bar3: f32, | ||
bar4: f32, | ||
} | ||
|
||
// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1) | ||
#[no_mangle] | ||
pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { | ||
todo!() | ||
} | ||
|
||
// CHECK: define void @array_f32x4([4 x float]* {{.*}} sret([4 x float]) {{.*}} %0, [4 x float]* {{.*}} %a, [4 x float]* {{.*}} %b) | ||
#[no_mangle] | ||
pub fn array_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { | ||
todo!() | ||
} | ||
|
||
// CHECK: define void @array_f32x4_nested(%Foo* {{.*}} sret(%Foo) {{.*}} %0, %Foo* {{.*}} %a, %Foo* {{.*}} %b) | ||
#[no_mangle] | ||
pub fn array_f32x4_nested(a: Foo, b: Foo) -> Foo { | ||
todo!() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// This test that no matter the optimization level or the target feature enable, the non | ||
// aggregation of homogenous floats in the abi is sound and still produce the right answer. | ||
|
||
// revisions: opt-0 opt-0-native opt-1 opt-1-native opt-2 opt-2-native opt-3 opt-3-native | ||
// [opt-0]: compile-flags: -C opt-level=0 | ||
// [opt-1]: compile-flags: -C opt-level=1 | ||
// [opt-2]: compile-flags: -C opt-level=2 | ||
// [opt-3]: compile-flags: -C opt-level=3 | ||
// [opt-0-native]: compile-flags: -C target-cpu=native | ||
// [opt-1-native]: compile-flags: -C target-cpu=native | ||
// [opt-2-native]: compile-flags: -C target-cpu=native | ||
// [opt-3-native]: compile-flags: -C target-cpu=native | ||
// run-pass | ||
|
||
#![feature(core_intrinsics)] | ||
|
||
use std::intrinsics::black_box; | ||
|
||
pub fn sum_f32(a: f32, b: f32) -> f32 { | ||
a + b | ||
} | ||
|
||
pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] { | ||
[a[0] + b[0], a[1] + b[1]] | ||
} | ||
|
||
pub fn sum_f32x3(a: [f32; 3], b: [f32; 3]) -> [f32; 3] { | ||
[a[0] + b[0], a[1] + b[1], a[2] + b[2]] | ||
} | ||
|
||
pub fn sum_f32x4(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { | ||
[a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]] | ||
} | ||
|
||
fn main() { | ||
assert_eq!(1., black_box(sum_f32(black_box(0.), black_box(1.)))); | ||
assert_eq!([2., 2.], black_box(sum_f32x2(black_box([2., 0.]), black_box([0., 2.])))); | ||
assert_eq!( | ||
[3., 3., 3.], | ||
black_box(sum_f32x3(black_box([1., 2., 3.]), black_box([2., 1., 0.]))) | ||
); | ||
assert_eq!( | ||
[4., 4., 4., 4.], | ||
black_box(sum_f32x4(black_box([1., 2., 3., 4.]), black_box([3., 2., 1., 0.]))) | ||
); | ||
} |