Skip to content

Commit

Permalink
avx512bw (#1014)
Browse files Browse the repository at this point in the history
  • Loading branch information
minybot authored Feb 25, 2021
1 parent ecc7a73 commit 860494d
Show file tree
Hide file tree
Showing 2 changed files with 223 additions and 9 deletions.
18 changes: 9 additions & 9 deletions crates/core_arch/avx512bw.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,9 @@
* [x] [`_mm512_cvtepi16_epi8`]
* [x] [`_mm512_mask_cvtepi16_epi8`]
* [x] [`_mm512_maskz_cvtepi16_epi8`]
* [_] [`_mm512_mask_cvtepi16_storeu_epi8`]
* [_] [`_mm_mask_cvtepi16_storeu_epi8`]
* [_] [`_mm256_mask_cvtepi16_storeu_epi8`]
* [x] [`_mm512_mask_cvtepi16_storeu_epi8`]
* [x] [`_mm_mask_cvtepi16_storeu_epi8`]
* [x] [`_mm256_mask_cvtepi16_storeu_epi8`]
* [x] [`_mm_cvtepi16_epi8`]
* [x] [`_mm_mask_cvtepi16_epi8`]
* [x] [`_mm_maskz_cvtepi16_epi8`]
Expand All @@ -315,15 +315,15 @@
* [x] [`_mm512_cvtsepi16_epi8`]
* [x] [`_mm512_mask_cvtsepi16_epi8`]
* [x] [`_mm512_maskz_cvtsepi16_epi8`]
* [_] [`_mm512_mask_cvtsepi16_storeu_epi8`]
* [x] [`_mm_cvtsepi16_epi8`]
* [x] [`_mm_mask_cvtsepi16_epi8`]
* [x] [`_mm_maskz_cvtsepi16_epi8`]
* [x] [`_mm256_cvtsepi16_epi8`]
* [x] [`_mm256_mask_cvtsepi16_epi8`]
* [x] [`_mm256_maskz_cvtsepi16_epi8`]
* [_] [`_mm_mask_cvtsepi16_storeu_epi8`]
* [_] [`_mm256_mask_cvtsepi16_storeu_epi8`]
* [x] [`_mm512_mask_cvtsepi16_storeu_epi8`]
* [x] [`_mm_mask_cvtsepi16_storeu_epi8`]
* [x] [`_mm256_mask_cvtsepi16_storeu_epi8`]
* [x] [`_mm512_cvtepu8_epi16`]
* [x] [`_mm512_mask_cvtepu8_epi16`]
* [x] [`_mm512_maskz_cvtepu8_epi16`]
Expand All @@ -338,15 +338,15 @@
* [x] [`_mm512_cvtusepi16_epi8`]
* [x] [`_mm512_mask_cvtusepi16_epi8`]
* [x] [`_mm512_maskz_cvtusepi16_epi8`]
* [_] [`_mm512_mask_cvtusepi16_storeu_epi8`]
* [x] [`_mm_cvtusepi16_epi8`]
* [x] [`_mm_mask_cvtusepi16_epi8`]
* [x] [`_mm_maskz_cvtusepi16_epi8`]
* [x] [`_mm256_cvtusepi16_epi8`]
* [x] [`_mm256_mask_cvtusepi16_epi8`]
* [x] [`_mm256_maskz_cvtusepi16_epi8`]
* [_] [`_mm_mask_cvtusepi16_storeu_epi8`]
* [_] [`_mm256_mask_cvtusepi16_storeu_epi8`]
* [x] [`_mm512_mask_cvtusepi16_storeu_epi8`]
* [x] [`_mm_mask_cvtusepi16_storeu_epi8`]
* [x] [`_mm256_mask_cvtusepi16_storeu_epi8`]
* [x] [`_mm512_dbsad_epu8`]
* [x] [`_mm512_mask_dbsad_epu8`]
* [x] [`_mm512_maskz_dbsad_epu8`]
Expand Down
214 changes: 214 additions & 0 deletions crates/core_arch/src/x86/avx512bw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9388,6 +9388,96 @@ pub unsafe fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i, imm8:
transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
}

/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
}

/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
}

/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovswb))]
pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
}

/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k);
}

/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
}

/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovwb))]
pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
}

/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
#[inline]
#[target_feature(enable = "avx512bw")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
}

/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
}

/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
#[inline]
#[target_feature(enable = "avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovuswb))]
pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
Expand Down Expand Up @@ -9594,6 +9684,27 @@ extern "C" {
fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;

#[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);

#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);

#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
}

#[cfg(test)]
Expand Down Expand Up @@ -17905,4 +18016,107 @@ mod tests {
let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
let a = _mm512_set1_epi16(i16::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtsepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
let e = _mm256_set1_epi8(i8::MAX);
assert_eq_m256i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
let a = _mm256_set1_epi16(i16::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(i8::MAX);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
let a = _mm_set1_epi16(i16::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0, 0, 0, 0, 0,
i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
let a = _mm512_set1_epi16(8);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
let e = _mm256_set1_epi8(8);
assert_eq_m256i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
let a = _mm256_set1_epi16(8);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(8);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
let a = _mm_set1_epi16(8);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
let a = _mm512_set1_epi16(i16::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtusepi16_storeu_epi8(
&mut r as *mut _ as *mut i8,
0b11111111_11111111_11111111_11111111,
a,
);
let e = _mm256_set1_epi8(u8::MAX as i8);
assert_eq_m256i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
let a = _mm256_set1_epi16(i16::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(u8::MAX as i8);
assert_eq_m128i(r, e);
}

#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
let a = _mm_set1_epi16(i16::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
);
assert_eq_m128i(r, e);
}
}

0 comments on commit 860494d

Please sign in to comment.