From db986994e8bbb829058a38907a1af62173589a86 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Wed, 22 Jan 2025 14:23:33 -0700
Subject: [PATCH 1/2] Fix and/or intrinsics with non-int parameters (#7060)

And/or intrinsics were set to allow any parameters, which is consistent
with the behavior of the && and || operators they were meant to replace,
however this meant that if they were passed floating point values, those
values would be applied to the binary and/or operands, which isn't
allowed. Instead, they should be converted to booleans to be consistent
with the behavior of && and ||. This can be done simply by restricting
the parameters to booleans which forces the appropriate conversions.
Adds tests for for using bools, ints, and floats in scalars, different
sized vectors, and matrices as parameters to or and and.

Fixes: #7057
FIxes: #6995
(cherry picked from commit 25faa88053437c30e599b9e5323c94addc2397a1)
---
 tools/clang/test/CodeGenHLSL/vector-and.hlsl  | 141 +++++++++++++++
 tools/clang/test/CodeGenHLSL/vector-or.hlsl   | 164 ++++++++++++++++++
 .../test/CodeGenSPIRV/intrinsics.and.hlsl     |  45 +++++
 .../test/CodeGenSPIRV/intrinsics.or.hlsl      |  34 ++++
 utils/hct/gen_intrin_main.txt                 |   4 +-
 5 files changed, 386 insertions(+), 2 deletions(-)
 create mode 100644 tools/clang/test/CodeGenHLSL/vector-and.hlsl
 create mode 100644 tools/clang/test/CodeGenHLSL/vector-or.hlsl
diff --git a/tools/clang/test/CodeGenHLSL/vector-and.hlsl b/tools/clang/test/CodeGenHLSL/vector-and.hlsl
new file mode 100644
index 0000000000..d6ae00af2f
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/vector-and.hlsl
@@ -0,0 +1,141 @@
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=bool %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=bool %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,F32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,F32
+
+// I32: %dx.types.ResRet.[[TY:i32]] = type { [[TYPE:i32]]
+// F32: %dx.types.ResRet.[[TY:f32]] = type { [[TYPE:float]]
+
+// CHECK-LABEL: define void @main
+
+ByteAddressBuffer buf;
+
+float4 main() : SV_Target {
+
+  // CHECK-DAG: [[SAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 0
+  // CHECK-DAG: [[SAX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SAR]], 0
+  // I32-DAG: [[SA:%.*]] = icmp ne i32 [[SAX]], 0
+  // F32-DAG: [[SA:%.*]] = fcmp fast une float [[SAX]], 0.000000e+00
+
+  // CHECK-DAG: [[SBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 8
+  // CHECK-DAG: [[SBX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SBR]], 0
+  // I32-DAG: [[SB:%.*]] = icmp ne i32 [[SBX]], 0
+  // F32-DAG: [[SB:%.*]] = fcmp fast une float [[SBX]], 0.000000e+00
+
+  TYPE sb = buf.Load<TYPE>(8);
+  TYPE sa = buf.Load<TYPE>(0);
+
+  // CHECK: and i1 [[SB]], [[SA]]
+  TYPE res = and(sa, sb);
+
+  // CHECK-DAG: [[V1AR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 16
+  // CHECK-DAG: [[V1AX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1AR]], 0
+  // I32-DAG: [[V1A:%.*]] = icmp ne i32 [[V1AX]], 0
+  // F32-DAG: [[V1A:%.*]] = fcmp fast une float [[V1AX]], 0.000000e+00
+
+  // CHECK-DAG: [[V1BR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 24
+  // CHECK-DAG: [[V1BX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1BR]], 0
+  // I32-DAG: [[V1B:%.*]] = icmp ne i32 [[V1BX]], 0
+  // F32-DAG: [[V1B:%.*]] = fcmp fast une float [[V1BX]], 0.000000e+00
+
+  vector<TYPE, 1> v1b = buf.Load< vector<TYPE, 1> >(24);
+  vector<TYPE, 1> v1a = buf.Load< vector<TYPE, 1> >(16);
+
+  // CHECK: and i1 [[V1B]], [[V1A]]
+  vector<TYPE, 1> res1 = and(v1a, v1b);
+
+  // CHECK-DAG: [[V3AR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 32
+  // CHECK-DAG: [[V3AX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 0
+  // CHECK-DAG: [[V3AX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 1
+  // CHECK-DAG: [[V3AX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 2
+
+  // I32-DAG: [[V3A0:%.*]] = icmp ne i32 [[V3AX0]], 0
+  // I32-DAG: [[V3A1:%.*]] = icmp ne i32 [[V3AX1]], 0
+  // I32-DAG: [[V3A2:%.*]] = icmp ne i32 [[V3AX2]], 0
+
+  // F32-DAG: [[V3A0:%.*]] = fcmp fast une float [[V3AX0]], 0.000000e+00
+  // F32-DAG: [[V3A1:%.*]] = fcmp fast une float [[V3AX1]], 0.000000e+00
+  // F32-DAG: [[V3A2:%.*]] = fcmp fast une float [[V3AX2]], 0.000000e+00
+
+  // CHECK-DAG: [[V3BR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 56
+  // CHECK-DAG: [[V3BX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 0
+  // CHECK-DAG: [[V3BX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 1
+  // CHECK-DAG: [[V3BX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 2
+
+  // I32-DAG: [[V3B0:%.*]] = icmp ne i32 [[V3BX0]], 0
+  // I32-DAG: [[V3B1:%.*]] = icmp ne i32 [[V3BX1]], 0
+  // I32-DAG: [[V3B2:%.*]] = icmp ne i32 [[V3BX2]], 0
+
+  // F32-DAG: [[V3B0:%.*]] = fcmp fast une float [[V3BX0]], 0.000000e+00
+  // F32-DAG: [[V3B1:%.*]] = fcmp fast une float [[V3BX1]], 0.000000e+00
+  // F32-DAG: [[V3B2:%.*]] = fcmp fast une float [[V3BX2]], 0.000000e+00
+
+  vector<TYPE, 3> v3b = buf.Load< vector<TYPE, 3> >(56);
+  vector<TYPE, 3> v3a = buf.Load< vector<TYPE, 3> >(32);
+
+  // CHECK: and i1 [[V3B0]], [[V3A0]]
+  // CHECK: and i1 [[V3B1]], [[V3A1]]
+  // CHECK: and i1 [[V3B2]], [[V3A2]]
+  vector<TYPE, 3> res3 = and(v3a, v3b);
+
+  // CHECK-DAG: [[MAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 80
+  // CHECK-DAG: [[MAX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // CHECK-DAG: [[MAX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+  // CHECK-DAG: [[MAX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 2
+  // CHECK-DAG: [[MAX3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 3
+  // CHECK-DAG: [[MAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 96
+  // CHECK-DAG: [[MAX4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // CHECK-DAG: [[MAX5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+
+  // I32-DAG: [[MA0:%.*]] = icmp ne i32 [[MAX0]], 0
+  // I32-DAG: [[MA1:%.*]] = icmp ne i32 [[MAX1]], 0
+  // I32-DAG: [[MA2:%.*]] = icmp ne i32 [[MAX2]], 0
+  // I32-DAG: [[MA3:%.*]] = icmp ne i32 [[MAX3]], 0
+  // I32-DAG: [[MA4:%.*]] = icmp ne i32 [[MAX4]], 0
+  // I32-DAG: [[MA5:%.*]] = icmp ne i32 [[MAX5]], 0
+
+  // F32-DAG: [[MA0:%.*]] = fcmp fast une float [[MAX0]], 0.000000e+00
+  // F32-DAG: [[MA1:%.*]] = fcmp fast une float [[MAX1]], 0.000000e+00
+  // F32-DAG: [[MA2:%.*]] = fcmp fast une float [[MAX2]], 0.000000e+00
+  // F32-DAG: [[MA3:%.*]] = fcmp fast une float [[MAX3]], 0.000000e+00
+  // F32-DAG: [[MA4:%.*]] = fcmp fast une float [[MAX4]], 0.000000e+00
+  // F32-DAG: [[MA5:%.*]] = fcmp fast une float [[MAX5]], 0.000000e+00
+
+  // CHECK-DAG: [[MBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 128
+  // CHECK-DAG: [[MBX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // CHECK-DAG: [[MBX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+  // CHECK-DAG: [[MBX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 2
+  // CHECK-DAG: [[MBX3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 3
+  // CHECK-DAG: [[MBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 144
+  // CHECK-DAG: [[MBX4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // CHECK-DAG: [[MBX5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+
+  // I32-DAG: [[MB0:%.*]] = icmp ne i32 [[MBX0]], 0
+  // I32-DAG: [[MB1:%.*]] = icmp ne i32 [[MBX1]], 0
+  // I32-DAG: [[MB2:%.*]] = icmp ne i32 [[MBX2]], 0
+  // I32-DAG: [[MB3:%.*]] = icmp ne i32 [[MBX3]], 0
+  // I32-DAG: [[MB4:%.*]] = icmp ne i32 [[MBX4]], 0
+  // I32-DAG: [[MB5:%.*]] = icmp ne i32 [[MBX5]], 0
+
+  // F32-DAG: [[MB0:%.*]] = fcmp fast une float [[MBX0]], 0.000000e+00
+  // F32-DAG: [[MB1:%.*]] = fcmp fast une float [[MBX1]], 0.000000e+00
+  // F32-DAG: [[MB2:%.*]] = fcmp fast une float [[MBX2]], 0.000000e+00
+  // F32-DAG: [[MB3:%.*]] = fcmp fast une float [[MBX3]], 0.000000e+00
+  // F32-DAG: [[MB4:%.*]] = fcmp fast une float [[MBX4]], 0.000000e+00
+  // F32-DAG: [[MB5:%.*]] = fcmp fast une float [[MBX5]], 0.000000e+00
+
+  matrix<TYPE, 2, 3> matb = buf.Load< matrix<TYPE, 2, 3> >(128);
+  matrix<TYPE, 2, 3> mata = buf.Load< matrix<TYPE, 2, 3> >(80);
+
+  // CHECK: and i1 [[MB0]], [[MA0]]
+  // CHECK: and i1 [[MB1]], [[MA1]]
+  // CHECK: and i1 [[MB2]], [[MA2]]
+  // CHECK: and i1 [[MB3]], [[MA3]]
+  // CHECK: and i1 [[MB4]], [[MA4]]
+  // CHECK: and i1 [[MB5]], [[MA5]]
+  matrix<TYPE, 2, 3> resmat = and(mata, matb);
+
+  return float4(res3 + resmat[0] + resmat[1], res + res1.x);
+}
diff --git a/tools/clang/test/CodeGenHLSL/vector-or.hlsl b/tools/clang/test/CodeGenHLSL/vector-or.hlsl
new file mode 100644
index 0000000000..2fe6c72434
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/vector-or.hlsl
@@ -0,0 +1,164 @@
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=bool %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=bool %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=int %s | FileCheck %s --check-prefixes=CHECK,I32
+// RUN: %dxc -T ps_6_0 -HV 2021 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,F32
+// RUN: %dxc -T ps_6_0 -HV 2018 -DTYPE=float %s | FileCheck %s --check-prefixes=CHECK,F32
+
+// I32: %dx.types.ResRet.[[TY:i32]] = type { [[TYPE:i32]]
+// F32: %dx.types.ResRet.[[TY:f32]] = type { [[TYPE:float]]
+
+// CHECK-LABEL: define void @main
+
+ByteAddressBuffer buf;
+
+float4 main() : SV_Target {
+
+  // CHECK: [[SAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 0
+  // F32: [[SAX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SAR]], 0
+  // I32: [[SA:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SAR]], 0
+
+  // CHECK: [[SBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 8
+  // F32: [[SBX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SBR]], 0
+  // I32: [[SB:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[SBR]], 0
+
+  // F32: [[SA:%.*]] = fcmp fast une float [[SAX]], 0.000000e+00
+  // F32: [[SB:%.*]] = fcmp fast une float [[SBX]], 0.000000e+00
+
+  TYPE sa = buf.Load<TYPE>(0);
+  TYPE sb = buf.Load<TYPE>(8);
+
+  // I32: or i32 [[SB]], [[SA]]
+  // F32: or i1 [[SA]], [[SB]]
+
+  TYPE res = or(sb, sa);
+
+  // CHECK: [[V1AR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 16
+  // F32: [[V1AX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1AR]], 0
+  // I32: [[V1A:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1AR]], 0
+
+  // CHECK: [[V1BR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 24
+  // F32: [[V1BX:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1BR]], 0
+  // I32: [[V1B:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V1BR]], 0
+
+  // F32: [[V1B:%.*]] = fcmp fast une float [[V1BX]], 0.000000e+00
+  // F32: [[V1A:%.*]] = fcmp fast une float [[V1AX]], 0.000000e+00
+
+  vector<TYPE, 1> v1a = buf.Load< vector<TYPE, 1> >(16);
+  vector<TYPE, 1> v1b = buf.Load< vector<TYPE, 1> >(24);
+
+  // I32: or i32 [[V1B]], [[V1A]]
+  // F32: or i1 [[V1A]], [[V1B]]
+
+  vector<TYPE, 1> res1 = or(v1a, v1b);
+
+  // CHECK: [[V3AR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 32
+  // F32: [[V3AX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 0
+  // F32: [[V3AX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 1
+  // F32: [[V3AX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 2
+
+  // I32: [[V3A0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 0
+  // I32: [[V3A1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 1
+  // I32: [[V3A2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3AR]], 2
+
+  // CHECK: [[V3BR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 56
+  // F32: [[V3BX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 0
+  // F32: [[V3BX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 1
+  // F32: [[V3BX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 2
+
+  // I32: [[V3B0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 0
+  // I32: [[V3B1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 1
+  // I32: [[V3B2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[V3BR]], 2
+
+  // F32: [[V3B0:%.*]] = fcmp fast une float [[V3BX0]], 0.000000e+00
+  // F32: [[V3B1:%.*]] = fcmp fast une float [[V3BX1]], 0.000000e+00
+  // F32: [[V3B2:%.*]] = fcmp fast une float [[V3BX2]], 0.000000e+00
+
+  // F32: [[V3A0:%.*]] = fcmp fast une float [[V3AX0]], 0.000000e+00
+  // F32: [[V3A1:%.*]] = fcmp fast une float [[V3AX1]], 0.000000e+00
+  // F32: [[V3A2:%.*]] = fcmp fast une float [[V3AX2]], 0.000000e+00
+
+  vector<TYPE, 3> v3a = buf.Load< vector<TYPE, 3> >(32);
+  vector<TYPE, 3> v3b = buf.Load< vector<TYPE, 3> >(56);
+
+  // I32: or i32 [[V3B0]], [[V3A0]]
+  // I32: or i32 [[V3B1]], [[V3A1]]
+  // I32: or i32 [[V3B2]], [[V3A2]]
+
+  // F32: or i1 [[V3A0]], [[V3B0]]
+  // F32: or i1 [[V3A1]], [[V3B1]]
+  // F32: or i1 [[V3A2]], [[V3B2]]
+
+  vector<TYPE, 3> res3 = or(v3a, v3b);
+
+  // CHECK: [[MAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 80
+  // F32: [[MAX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // F32: [[MAX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+  // F32: [[MAX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 2
+  // F32: [[MAX3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 3
+
+  // I32: [[MA0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // I32: [[MA1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+  // I32: [[MA2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 2
+  // I32: [[MA3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 3
+
+  // CHECK: [[MAR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 96
+  // F32: [[MAX4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // F32: [[MAX5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+
+  // I32: [[MA4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 0
+  // I32: [[MA5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MAR]], 1
+
+  // CHECK: [[MBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 128
+  // F32: [[MBX0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // F32: [[MBX1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+  // F32: [[MBX2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 2
+  // F32: [[MBX3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 3
+
+  // I32: [[MB0:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // I32: [[MB1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+  // I32: [[MB2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 2
+  // I32: [[MB3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 3
+
+  // CHECK: [[MBR:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.bufferLoad.[[TY]](i32 68, %dx.types.Handle %{{.*}}, i32 144
+  // F32: [[MBX4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // F32: [[MBX5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+
+  // I32: [[MB4:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 0
+  // I32: [[MB5:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[MBR]], 1
+
+  // F32: [[MB0:%.*]] = fcmp fast une float [[MBX0]], 0.000000e+00
+  // F32: [[MB1:%.*]] = fcmp fast une float [[MBX1]], 0.000000e+00
+  // F32: [[MB2:%.*]] = fcmp fast une float [[MBX2]], 0.000000e+00
+  // F32: [[MB3:%.*]] = fcmp fast une float [[MBX3]], 0.000000e+00
+  // F32: [[MB4:%.*]] = fcmp fast une float [[MBX4]], 0.000000e+00
+  // F32: [[MB5:%.*]] = fcmp fast une float [[MBX5]], 0.000000e+00
+
+  // F32: [[MA0:%.*]] = fcmp fast une float [[MAX0]], 0.000000e+00
+  // F32: [[MA1:%.*]] = fcmp fast une float [[MAX1]], 0.000000e+00
+  // F32: [[MA2:%.*]] = fcmp fast une float [[MAX2]], 0.000000e+00
+  // F32: [[MA3:%.*]] = fcmp fast une float [[MAX3]], 0.000000e+00
+  // F32: [[MA4:%.*]] = fcmp fast une float [[MAX4]], 0.000000e+00
+  // F32: [[MA5:%.*]] = fcmp fast une float [[MAX5]], 0.000000e+00
+
+  matrix<TYPE, 2, 3> mata = buf.Load< matrix<TYPE, 2, 3> >(80);
+  matrix<TYPE, 2, 3> matb = buf.Load< matrix<TYPE, 2, 3> >(128);
+
+  // I32: or i32 [[MB0]], [[MA0]]
+  // I32: or i32 [[MB1]], [[MA1]]
+  // I32: or i32 [[MB2]], [[MA2]]
+  // I32: or i32 [[MB3]], [[MA3]]
+  // I32: or i32 [[MB4]], [[MA4]]
+  // I32: or i32 [[MB5]], [[MA5]]
+
+  // F32: or i1 [[MA0]], [[MB0]]
+  // F32: or i1 [[MA1]], [[MB1]]
+  // F32: or i1 [[MA2]], [[MB2]]
+  // F32: or i1 [[MA3]], [[MB3]]
+  // F32: or i1 [[MA4]], [[MB4]]
+  // F32: or i1 [[MA5]], [[MB5]]
+
+  matrix<TYPE, 2, 3> resmat = or(mata, matb);
+
+  return float4(res3 + resmat[0] + resmat[1], res + res1.x);
+}
diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.and.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.and.hlsl
index a983e20c45..970faf6a8d 100644
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.and.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.and.hlsl
@@ -1,6 +1,9 @@
 // RUN: %dxc -T ps_6_0 -E main -HV 2021 -fcgl  %s -spirv | FileCheck %s
 // RUN: %dxc -T ps_6_0 -E main -HV 2018 -fcgl  %s -spirv | FileCheck %s
 
+// CHECK-DAG: [[v3_0:%[0-9]+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+// CHECK-DAG: [[v3_1:%[0-9]+]] = OpConstantComposite %v3int %int_1 %int_1 %int_1
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -33,4 +36,46 @@ void main() {
 // CHECK-NEXT: [[and3:%[0-9]+]] = OpLogicalAnd %bool [[a1]] [[b1]]
 // CHECK-NEXT:      {{%[0-9]+}} = OpCompositeConstruct %v2bool [[and3]] %true
     bool2 t = bool2(and(a, b), true);
+
+    int a_0, b_0, c_0;
+    // Plain assign (scalar)
+// CHECK:      [[a0_int:%[0-9]+]] = OpLoad %int %a_0
+// CHECK-NEXT: [[a0:%[0-9]+]] = OpINotEqual %bool [[a0_int]] %int_0
+// CHECK-NEXT: [[b0_int:%[0-9]+]] = OpLoad %int %b_0
+// CHECK-NEXT: [[b0:%[0-9]+]] = OpINotEqual %bool [[b0_int]] %int_0
+// CHECK-NEXT: [[and0:%[0-9]+]] = OpLogicalAnd %bool [[a0]] [[b0]]
+// CHECK-NEXT: [[sel:%[0-9]+]] = OpSelect %int [[and0]] %int_1 %int_0
+// CHECK-NEXT: OpStore %c_0 [[sel]]
+    c_0 = and(a_0, b_0);
+
+    int1 i_0, j_0, k_0;
+    int3 o_0, p_0, q_0;
+    // Plain assign (vector)
+// CHECK-NEXT: [[i0_int:%[0-9]+]] = OpLoad %int %i_0
+// CHECK-NEXT: [[i0:%[0-9]+]] = OpINotEqual %bool [[i0_int]] %int_0
+// CHECK-NEXT: [[j0_int:%[0-9]+]] = OpLoad %int %j_0
+// CHECK-NEXT: [[j0:%[0-9]+]] = OpINotEqual %bool [[j0_int]] %int_0
+// CHECK-NEXT: [[and1:%[0-9]+]] = OpLogicalAnd %bool [[i0]] [[j0]]
+// CHECK-NEXT: [[sel:%[0-9]+]] = OpSelect %int [[and1]] %int_1 %int_0
+// CHECK-NEXT: OpStore %k_0 [[sel]]
+    k_0 = and(i_0, j_0);
+
+// CHECK-NEXT: [[o0_int:%[0-9]+]] = OpLoad %v3int %o_0
+// CHECK-NEXT: [[o0:%[0-9]+]] = OpINotEqual %v3bool [[o0_int]] [[v3_0]]
+// CHECK-NEXT: [[p0_int:%[0-9]+]] = OpLoad %v3int %p_0
+// CHECK-NEXT: [[p0:%[0-9]+]] = OpINotEqual %v3bool [[p0_int]] [[v3_0]]
+// CHECK-NEXT: [[and2:%[0-9]+]] = OpLogicalAnd %v3bool [[o0]] [[p0]]
+// CHECK-NEXT: [[sel:%[0-9]+]] = OpSelect %v3int [[and2]] [[v3_1]] [[v3_0]]
+// CHECK-NEXT: OpStore %q_0 [[sel]]
+    q_0 = and(o_0, p_0);
+
+// The result of '&&' could be 'const bool'. In such cases, make sure
+// the result type is correct.
+// CHECK:      [[a0_int:%[0-9]+]] = OpLoad %int %a_0
+// CHECK-NEXT: [[a0:%[0-9]+]] = OpINotEqual %bool [[a0_int]] %int_0
+// CHECK-NEXT: [[b0_int:%[0-9]+]] = OpLoad %int %b_0
+// CHECK-NEXT: [[b0:%[0-9]+]] = OpINotEqual %bool [[b0_int]] %int_0
+// CHECK-NEXT: [[and0:%[0-9]+]] = OpLogicalAnd %bool [[a0]] [[b0]]
+// CHECK-NEXT:      {{%[0-9]+}} = OpCompositeConstruct %v2bool [[and0]] %true
+    t = bool2(and(a_0, b_0), true);
 }
diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.or.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.or.hlsl
index a61272211e..39a39062f1 100644
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.or.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.or.hlsl
@@ -1,6 +1,8 @@
 // RUN: %dxc -T ps_6_0 -E main -HV 2021 -fcgl  %s -spirv | FileCheck %s
 // RUN: %dxc -T ps_6_0 -E main -HV 2018 -fcgl  %s -spirv | FileCheck %s
 
+// CHECK: [[v3_0:%[0-9]+]] = OpConstantComposite %v3int %int_0 %int_0 %int_0
+
 void main() {
 // CHECK-LABEL: %bb_entry = OpLabel
 
@@ -25,4 +27,36 @@ void main() {
 // CHECK-NEXT: OpStore %q [[or2]]
     k = or(i, j);
     q = or(o, p);
+
+    int r, s;
+    bool t;
+    // Plain assign (scalar)
+// CHECK:      [[r0_int:%[0-9]+]] = OpLoad %int %r
+// CHECK-NEXT: [[r0:%[0-9]+]] = OpINotEqual %bool [[r0_int]] %int_0
+// CHECK-NEXT: [[s0_int:%[0-9]+]] = OpLoad %int %s
+// CHECK-NEXT: [[s0:%[0-9]+]] = OpINotEqual %bool [[s0_int]] %int_0
+// CHECK-NEXT: [[or0:%[0-9]+]] = OpLogicalOr %bool [[r0]] [[s0]]
+// CHECK-NEXT: OpStore %t [[or0]]
+    t = or(r, s);
+
+    int1 u, v;
+    bool1 w;
+    // Plain assign (vector)
+// CHECK-NEXT: [[u0_int:%[0-9]+]] = OpLoad %int %u
+// CHECK-NEXT: [[u0:%[0-9]+]] = OpINotEqual %bool [[u0_int]] %int_0
+// CHECK-NEXT: [[v0_int:%[0-9]+]] = OpLoad %int %v
+// CHECK-NEXT: [[v0:%[0-9]+]] = OpINotEqual %bool [[v0_int]] %int_0
+// CHECK-NEXT: [[or1:%[0-9]+]] = OpLogicalOr %bool [[u0]] [[v0]]
+// CHECK-NEXT: OpStore %w [[or1]]
+    w = or(u, v);
+
+    int3 x, y;
+    bool3 z;
+// CHECK-NEXT: [[x0_int:%[0-9]+]] = OpLoad %v3int %x
+// CHECK-NEXT: [[x0:%[0-9]+]] = OpINotEqual %v3bool [[x0_int]] [[v3_0]]
+// CHECK-NEXT: [[y0_int:%[0-9]+]] = OpLoad %v3int %y
+// CHECK-NEXT: [[y0:%[0-9]+]] = OpINotEqual %v3bool [[y0_int]] [[v3_0]]
+// CHECK-NEXT: [[or2:%[0-9]+]] = OpLogicalOr %v3bool [[x0]] [[y0]]
+// CHECK-NEXT: OpStore %z [[or2]]
+    z = or(x, y);
 }
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 40b5f6d96a..8ee8d0bc99 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -369,8 +369,8 @@ resource [[hidden]] CreateResourceFromHeap(in uint index);
 // Replacement for vector logical &&, ||, and ternary conditional operators,
 // For use when HLSL changes to support short-circuiting and only scalar
 // conditions to maintain clarity.
-$match<1, 0> bool<> [[rn]] and(in any<> x, in $type1 y);
-$match<1, 0> bool<> [[rn]] or(in any<> x, in $type1 y);
+$match<1, 0> bool<> [[rn]] and(in bool<> x, in $type1 y);
+$match<1, 0> bool<> [[rn]] or(in bool<> x, in $type1 y);
 $type2 [[rn]] select(in bool<> cond, in $match<1, 2> any<> t, in $type2 f);
 $type2 [[rn]] select(in bool cond, in any_sampler t, in $type2 f);
 

From c9e3339e9dbff6b0adf431a2cd816d699f0d9377 Mon Sep 17 00:00:00 2001
From: Greg Roth <grroth@microsoft.com>
Date: Mon, 27 Jan 2025 15:31:28 -0700
Subject: [PATCH 2/2] Add release note for and/or change

---
 docs/ReleaseNotes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md
index b47f17b600..27853bfd30 100644
--- a/docs/ReleaseNotes.md
+++ b/docs/ReleaseNotes.md
@@ -22,6 +22,7 @@ The included licenses apply to the following files:
 - The incomplete WaveMatrix implementation has been removed.
 - DXIL Validator Hash is open sourced.
 - DXIL container validation for PSV0 part allows any content ordering inside string and semantic index tables.
+- The and() and or() intrinsics will now accept non-integer parameters by casting them to bools.
 
 ### Version 1.8.2407