diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8814,18 +8814,20 @@ } // Returns true if argument is a boolean value which is not serialized into -// memory or argument and does not require v_cmdmask_b32 to be deserialized. +// memory or argument and does not require v_cndmask_b32 to be deserialized. static bool isBoolSGPR(SDValue V) { if (V.getValueType() != MVT::i1) return false; switch (V.getOpcode()) { - default: break; + default: + break; case ISD::SETCC: + case AMDGPUISD::FP_CLASS: + return true; case ISD::AND: case ISD::OR: case ISD::XOR: - case AMDGPUISD::FP_CLASS: - return true; + return isBoolSGPR(V.getOperand(0)) && isBoolSGPR(V.getOperand(1)); } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll @@ -0,0 +1,183 @@ +; RUN: llc -march=amdgcn -start-before=amdgpu-isel -stop-after=amdgpu-isel -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck %s + +; Test that unused lanes in the s_xor result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_add_zext_xor +; CHECK: S_XOR_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_ADD + +define void @combine_add_zext_xor() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = xor i1 %t4, true + %t6 = zext i1 %t5 to i32 + %t7 = add i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +; Test that unused lanes in the s_and result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_add_zext_and +; CHECK: S_AND_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_ADD + +define void @combine_add_zext_and() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t4a = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = and i1 %t4, %t4a + %t6 = zext i1 %t5 to i32 + %t7 = add i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +; Test that unused lanes in the s_or result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_add_zext_or +; CHECK: S_OR_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_ADD + +define void @combine_add_zext_or() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t4a = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = or i1 %t4, %t4a + %t6 = zext i1 %t5 to i32 + %t7 = add i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +; Test that unused lanes in the s_xor result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_sub_zext_xor +; CHECK: S_XOR_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_SUB + +define void @combine_sub_zext_xor() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = xor i1 %t4, true + %t6 = zext i1 %t5 to i32 + %t7 = sub i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +; Test that unused lanes in the s_and result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_sub_zext_and +; CHECK: S_AND_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_SUB + +define void @combine_sub_zext_and() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t4a = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = and i1 %t4, %t4a + %t6 = zext i1 %t5 to i32 + %t7 = sub i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +; Test that unused lanes in the s_or result are masked out with v_cndmask. + +; CHECK-LABEL: name: combine_sub_zext_or +; CHECK: S_OR_B64 +; CHECK-NEXT: V_CNDMASK +; CHECK: S_SUB + +define void @combine_sub_zext_or() { +.entry: + br label %.exit, !amdgpu.uniform !0 + +.exit: ; preds = %bb7, %.entry + %t = phi i32 [ 1050, %.entry ], [ 0, %bb7 ] + %t1 = phi i32 [ 0, %.entry ], [ %t7, %bb7 ] + br i1 undef, label %bb7, label %bb, !amdgpu.uniform !0 + +bb: ; preds = %.exit + %t2 = add i32 %t, 2 + %t3 = icmp eq i32 %t2, 0 + br label %bb7, !amdgpu.uniform !0 + +bb7: ; preds = %bb, %.exit + %t4 = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t4a = phi i1 [ %t3, %bb ], [ undef, %.exit ] + %t5 = or i1 %t4, %t4a + %t6 = zext i1 %t5 to i32 + %t7 = sub i32 %t1, %t6 + br label %.exit, !amdgpu.uniform !0 +} + +attributes #0 = { nounwind readonly willreturn } + +!0 = !{}