Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -110,6 +110,8 @@ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1828,6 +1828,12 @@ !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine()); } +bool SITargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, + EVT) const { + // Prefer to keep i1 flags around so that boolean logic is done with SALU. + return false; +} + static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, unsigned Offset, EVT PtrVT, unsigned GAFlags = SIInstrInfo::MO_NONE) { Index: test/CodeGen/AMDGPU/select-andor.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/select-andor.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}select_and: +; CHECK: v_cmp_lt +; CHECK-NEXT: v_cmp_lt +; CHECK-NEXT: s_and_b64 +; CHECK-NEXT: v_cndmask +define amdgpu_vs float @select_and(i32 %cond1, i32 %cond2, float %a, float %b) nounwind { + %cc1 = icmp ugt i32 %cond1, 5 + %cc2 = icmp ugt i32 %cond2, 7 + %cc = and i1 %cc1, %cc2 + %sel = select i1 %cc, float %a, float %b + ret float %sel +} + +; CHECK-LABEL: {{^}}select_or: +; CHECK: v_cmp_lt +; CHECK-NEXT: v_cmp_lt +; CHECK-NEXT: s_or_b64 +; CHECK-NEXT: v_cndmask +define amdgpu_vs float @select_or(i32 %cond1, i32 %cond2, float %a, float %b) nounwind { + %cc1 = icmp ugt i32 %cond1, 5 + %cc2 = icmp ugt i32 %cond2, 7 + %cc = or i1 %cc1, %cc2 + %sel = select i1 %cc, float %a, float %b + ret float %sel +}