Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -1097,6 +1097,25 @@ virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { return nullptr; } + + /// Returns true if we should normalize + /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and + /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely + /// that it saves us from materializing N0 and N1 in an integer register. + /// Targets that are able to perform and/or on flags should return false here. + virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, + EVT VT) const { + // If a target has multiple condition registers, then it likely has logical + // operations on those registers. + if (hasMultipleConditionRegisters()) + return false; + // Only do the transform if the value won't be split into multiple + // registers. + LegalizeTypeAction Action = getTypeAction(Context, VT); + return Action != TypeExpandInteger && Action != TypeExpandFloat && + Action != TypeSplitVector; + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4819,6 +4819,69 @@ return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } Index: llvm/trunk/test/CodeGen/ARM/movcc-double.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/movcc-double.ll +++ llvm/trunk/test/CodeGen/ARM/movcc-double.ll @@ -0,0 +1,50 @@ +; RUN: llc -o - %s | FileCheck %s +target triple = "arm-unknown-unknown" + +; select with and i1/or i1 condition should be implemented as a series of 2 +; cmovs, not by producing two conditions and using and on them. + +define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_and +; CHECK-NOT: tst +; CHECK-NOT: movne +; CHECK: mov{{lo|hs}} +; CHECK: mov{{lo|hs}} + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a2, %a3 + %and = and i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; select with and i1 condition should be implemented as a series of 2 cmovs, not +; by producing two conditions and using and on them. +; CHECK-LABEL: select_or +; CHECK-NOT: orss +; CHECK-NOT: tst +; CHECK: mov{{lo|hs}} +; CHECK: mov{{lo|hs}} + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a2, %a3 + %and = or i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +; If one of the conditions is materialized as a 0/1 value anyway, then the +; sequence of 2 cmovs should not be used. + +@var32 = global i32 0 +define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_noopt +; CHECK: orrs +; CHECK: movne + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %zero_one = zext i1 %or to i32 + store volatile i32 %zero_one, i32* @var32 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} Index: llvm/trunk/test/CodeGen/R600/or.ll =================================================================== --- llvm/trunk/test/CodeGen/R600/or.ll +++ llvm/trunk/test/CodeGen/R600/or.ll @@ -156,14 +156,14 @@ ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { +define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { %a = load float, float addrspace(1)* %in0 %b = load float, float addrspace(1)* %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 0.000000e+00 %or = or i1 %acmp, %bcmp - %result = select i1 %or, float %a, float %b - store float %result, float addrspace(1)* %out + %result = zext i1 %or to i32 + store i32 %result, i32 addrspace(1)* %out ret void } Index: llvm/trunk/test/CodeGen/X86/cmov-double.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cmov-double.ll +++ llvm/trunk/test/CodeGen/X86/cmov-double.ll @@ -0,0 +1,52 @@ +; RUN: llc -o - %s | FileCheck %s +target triple = "x86_64-unknown-unknown" + +; select with and i1/or i1 condition should be implemented as a series of 2 +; cmovs, not by producing two conditions and using and on them. + +define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_and +; CHECK-NOT: set +; CHECK-NOT: and[lb] +; CHECK-NOT: test +; CHECK: cmov +; CHECK: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = fcmp olt float %a2, %a3 + %and = and i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; select with and i1 condition should be implemented as a series of 2 cmovs, not +; by producing two conditions and using and on them. +; CHECK-LABEL: select_or +; CHECK-NOT: set +; CHECK-NOT: or[lb] +; CHECK-NOT: test +; CHECK: cmov +; CHECK: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = fcmp olt float %a2, %a3 + %and = or i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +; If one of the conditions is materialized as a 0/1 value anyway, then the +; sequence of 2 cmovs should not be used. + +@var32 = global i32 0 +define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_noopt +; CHECK: cmov +; CHECK-NOT: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %zero_one = zext i1 %or to i32 + store volatile i32 %zero_one, i32* @var32 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} Index: llvm/trunk/test/CodeGen/X86/jump_sign.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/jump_sign.ll +++ llvm/trunk/test/CodeGen/X86/jump_sign.ll @@ -217,17 +217,15 @@ ; PR13475 ; If we have sub a, b and cmp b, a and the result of cmp is used ; by sbb, we should not optimize cmp away. -define i32 @func_q(i32 %j.4, i32 %w, i32 %el) { +define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: cmp ; CHECK-NEXT: sbb - %tmp532 = add i32 %j.4, %w - %tmp533 = icmp ugt i32 %tmp532, %el - %tmp534 = icmp ult i32 %w, %el - %or.cond = and i1 %tmp533, %tmp534 - %tmp535 = sub i32 %el, %w - %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4 - ret i32 %j.5 + %1 = icmp ult i32 %a0, %a1 + %2 = sub i32 %a1, %a0 + %3 = select i1 %1, i32 -1, i32 0 + %4 = xor i32 %2, %3 + ret i32 %4 } ; rdar://11873276 define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind { Index: llvm/trunk/test/CodeGen/X86/zext-sext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/zext-sext.ll +++ llvm/trunk/test/CodeGen/X86/zext-sext.ll @@ -34,11 +34,12 @@ %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] +; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK-NOT: [[REGISTER_zext]] -; CHECK-DAG: testl %e[[REGISTER_zext]] -; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] +; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]] +; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]] +; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]] %tmp13 = sub i64 %tmp12, 2138875574 %tmp14 = zext i32 %tmp4 to i64