Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -1090,6 +1090,18 @@ virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { return nullptr; } + + /// Returns true if we should normalize + /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and + /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely + /// that it saves us from materializing N0 and N1 in an integer register. + /// Targets that are able to perform and/or on flags should return false here. + virtual bool shouldNormalizeToSelectSequence(const SelectInst &SI) const { + (void)SI; + // If SELECTs are expensive then we should not do this. + return !isSelectExpensive(); + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -177,6 +177,7 @@ bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); bool MoveExtToFormExtLoad(Instruction *&I); bool OptimizeExtUses(Instruction *I); + bool OptimizeSelectAndOr(SelectInst &SI); bool OptimizeSelectInst(SelectInst *SI); bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); bool OptimizeExtractElementInst(Instruction *Inst); @@ -3692,15 +3693,85 @@ (isa(CmpOp1) && CmpOp1->hasOneUse())); } +/// Test whether @p CondVal is a tree of And/Or/Compare instructions. If it is +/// not then it is hard to decide whether breaking selects apart is helpful. +static bool isTreeOfAndOrCompare(const Value &CondVal) { + // Be conservative and restrict ourself ot 1 user. The other uses may force + // putting the value into an int register. + if (!CondVal.hasOneUse()) + return false; + + // Additional And/Or instructions will all be broken down. + const Instruction *CondInstr = dyn_cast(&CondVal); + if (CondInstr == nullptr) + return false; + switch (CondInstr->getOpcode()) { + case Instruction::ICmp: + case Instruction::FCmp: + return true; + case Instruction::And: + case Instruction::Or: { + const Value *Op0 = CondInstr->getOperand(0); + const Value *Op1 = CondInstr->getOperand(1); + return isTreeOfAndOrCompare(*Op0) && isTreeOfAndOrCompare(*Op1); + } + default: + return false; + } +} + +bool CodeGenPrepare::OptimizeSelectAndOr(SelectInst &SI) { + Value *Condition = SI.getCondition(); + Instruction *CondI = dyn_cast(Condition); + if (CondI == nullptr) + return false; + unsigned Opcode = CondI->getOpcode(); + if (Opcode != Instruction::And && Opcode != Instruction::Or) + return false; + if (!TLI->shouldNormalizeToSelectSequence(SI)) + return false; + + // Conservative estimation on whether we can save materializing the condition + // in an integer register when expanding the select. + // If condition only constis of and/or i1 or Compare predecessors and + // all other users are such and/ors/selects as well we are fine. + if (!isTreeOfAndOrCompare(*CondI)) + return false; + + // select((N0 & N1), a, b) => select(N0, select(N1, a, b), b) + // select((N0 | N1), a, b) => select(N0, a, select(N1, a, b)) + IRBuilder<> Builder(SI.getContext()); + Builder.SetInsertPoint(&SI); + Value *N0 = CondI->getOperand(0); + Value *N1 = CondI->getOperand(1); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + Value *InnerSelect = Builder.CreateSelect(N1, TrueVal, FalseVal, + SI.getName()+".0"); + SI.setOperand(0, N0); + unsigned SelectOp = Opcode == Instruction::And ? 1 : 2; + SI.setOperand(SelectOp, InnerSelect); + + // SI was the only user of CondI, we can remove it. + CondI->eraseFromParent(); + + // Optimzie the new/changed select instructions. + if (SelectInst *InnerSelectSI = dyn_cast(InnerSelect)) + OptimizeSelectAndOr(*InnerSelectSI); + OptimizeSelectAndOr(SI); + return true; +} /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { + bool Changed = OptimizeSelectAndOr(*SI); + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? if (DisableSelectToBranch || OptSize || !TLI || VectorCond) - return false; + return Changed; TargetLowering::SelectSupportKind SelectKind; if (VectorCond) @@ -3716,7 +3787,7 @@ // Check if it is profitable to keep this 'select'. if (!TLI->isPredictableSelectExpensive() || !isFormingBranchFromSelectProfitable(SI)) - return false; + return Changed; } ModifiedDT = true; @@ -3749,7 +3820,8 @@ // Instruct OptimizeBlock to skip to the next block. CurInstIterator = StartBlock->end(); ++NumSelectsExpanded; - return true; + Changed = true; + return Changed; } static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -591,6 +591,12 @@ return Ty->isArrayTy(); } + /// No gain in select sequences as we can natively perform and/or on flag + /// registers. + bool shouldNormalizeToSelectSequence(const SelectInst &SI) const override { + return false; + } + private: struct ReuseLoadInfo { Index: test/CodeGen/R600/or.ll =================================================================== --- test/CodeGen/R600/or.ll +++ test/CodeGen/R600/or.ll @@ -156,14 +156,14 @@ ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { +define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { %a = load float addrspace(1)* %in0 %b = load float addrspace(1)* %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 0.000000e+00 %or = or i1 %acmp, %bcmp - %result = select i1 %or, float %a, float %b - store float %result, float addrspace(1)* %out + %result = zext i1 %or to i32 + store i32 %result, i32 addrspace(1)* %out ret void } Index: test/CodeGen/X86/jump_sign.ll =================================================================== --- test/CodeGen/X86/jump_sign.ll +++ test/CodeGen/X86/jump_sign.ll @@ -217,17 +217,15 @@ ; PR13475 ; If we have sub a, b and cmp b, a and the result of cmp is used ; by sbb, we should not optimize cmp away. -define i32 @func_q(i32 %j.4, i32 %w, i32 %el) { +define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: cmp ; CHECK-NEXT: sbb - %tmp532 = add i32 %j.4, %w - %tmp533 = icmp ugt i32 %tmp532, %el - %tmp534 = icmp ult i32 %w, %el - %or.cond = and i1 %tmp533, %tmp534 - %tmp535 = sub i32 %el, %w - %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4 - ret i32 %j.5 + %1 = icmp ult i32 %a0, %a1 + %2 = sub i32 %a1, %a0 + %3 = select i1 %1, i32 -1, i32 0 + %4 = xor i32 %2, %3 + ret i32 %4 } ; rdar://11873276 define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind { Index: test/CodeGen/X86/zext-sext.ll =================================================================== --- test/CodeGen/X86/zext-sext.ll +++ test/CodeGen/X86/zext-sext.ll @@ -34,11 +34,12 @@ %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] +; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK-NOT: [[REGISTER_zext]] -; CHECK-DAG: testl %e[[REGISTER_zext]] -; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] +; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]] +; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]] +; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]] %tmp13 = sub i64 %tmp12, 2138875574 %tmp14 = zext i32 %tmp4 to i64 Index: test/Transforms/CodeGenPrepare/select_and_or.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/select_and_or.ll @@ -0,0 +1,61 @@ +; RUN: opt -mtriple=x86_64-unknown-unknown -codegenprepare -S -o - %s | FileCheck %s +; RUN: opt -mtriple=aarch64-unknown-unknown -codegenprepare -S -o - %s | FileCheck %s +; RUN: opt -mtriple=powerpc-unknown-unknown -codegenprepare -S -o - %s | not FileCheck %s + +define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_or +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: select +; CHECK: select + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} + +@var32 = global i32 0 + +define i32 @select_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_or2 +; CHECK: or +; CHECK: select +; CHECK-NOT: select + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %zero_one = zext i1 %or to i32 + store volatile i32 %zero_one, i32* @var32 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} + +define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_and +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: select +; CHECK: select + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = fcmp uge float %a2, %a3 + %or = and i1 %cmp0, %cmp1 + %res = select i1 %or, i32 %a4, i32 %a5 + ret i32 %res +} + +define i32 @select_multi(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_multi +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: select +; CHECK: select +; CHECK: select + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp uge i32 %a4, %a1 + %cmp2 = fcmp uge float %a2, %a3 + %or = or i1 %cmp0, %cmp1 + %and = and i1 %or, %cmp2 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +}