Skip to content

Commit

Permalink
[CPU] Fixed Reduce kernel with bf16 destination precision
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-gorokhov committed Jan 29, 2025
1 parent ad44deb commit 895ec90
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions src/plugins/intel_cpu/src/nodes/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,9 +1006,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
uni_vmovups(op, vmm_dst);
break;
case memory::data_type::bf16:
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
if (isa == x64::avx512_core) {
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
} else {
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(xmm_dst.getIdx())});
uni_vmovdqu(op, xmm_dst);
}
break;
case memory::data_type::f16:
vcvtps2ph(op, vmm_dst, 0x4);
Expand Down Expand Up @@ -1723,9 +1729,15 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
uni_vmovups(op, vmm_dst);
break;
case memory::data_type::bf16:
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
if (isa == x64::avx512_core) {
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(ymm_dst.getIdx())});
vmovdqu16(op, ymm_dst);
} else {
uni_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())},
{static_cast<size_t>(xmm_dst.getIdx())});
uni_vmovdqu(op, xmm_dst);
}
break;
case memory::data_type::f16:
vcvtps2ph(op, vmm_dst, 0x4);
Expand Down

0 comments on commit 895ec90

Please sign in to comment.