Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -415,7 +415,8 @@ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - const SDLoc &DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans); + SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -7157,9 +7158,33 @@ } SDValue DAGCombiner::visitSETCC(SDNode *N) { - return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), - cast(N->getOperand(2))->get(), - SDLoc(N)); + // setcc is very commonly used as an argument to brcond. This pattern + // also lend itself to numerous combines and, as a result, it is desired + // we keep the argument to a brcond as a setcc as much as possible. + bool PreferSetCC = + N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; + + SDValue Combined = SimplifySetCC( + N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); + + if (!Combined) + return SDValue(); + + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); + + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); + + if (NewSetCC) + return NewSetCC; + } + + return Combined; } SDValue DAGCombiner::visitSETCCE(SDNode *N) { @@ -11151,16 +11176,22 @@ N1.getOperand(0), N1.getOperand(1), N2); } - if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || - ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && - (N1.getOperand(0).hasOneUse() && - N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = nullptr; - if (N1.getOpcode() == ISD::TRUNCATE) { - // Look pass the truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if (N1.hasOneUse()) { + if (SDValue NewN1 = rebuildSetCC(N1)) + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { + if (N.getOpcode() == ISD::SRL || + (N.getOpcode() == ISD::TRUNCATE && + (N.getOperand(0).hasOneUse() && + N.getOperand(0).getOpcode() == ISD::SRL))) { + // Look pass the truncate. + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); // Match this pattern so that we can generate simpler code: // @@ -11179,75 +11210,43 @@ // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. - SDValue Op0 = N1.getOperand(0); - SDValue Op1 = N1.getOperand(1); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); - if (Op0.getOpcode() == ISD::AND && - Op1.getOpcode() == ISD::Constant) { + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && - cast(Op1)->getAPIntValue()==AndConst.logBase2()) { + cast(Op1)->getAPIntValue() == AndConst.logBase2()) { SDLoc DL(N); - SDValue SetCC = - DAG.getSetCC(DL, - getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, DL, Op0.getValueType()), - ISD::SETNE); - - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, - MVT::Other, Chain, SetCC, N2); - // Don't add the new BRCond into the worklist or else SimplifySelectCC - // will convert it back to (X & C1) >> C2. - CombineTo(N, NewBRCond, false); - // Truncate is dead. - if (Trunc) - deleteAndRecombine(Trunc); - // Replace the uses of SRL with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); } } } - - if (Trunc) - // Restore N1 if the above transformation doesn't match. - N1 = N->getOperand(1); } // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) - if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { - SDNode *TheXor = N1.getNode(); - SDValue Op0 = TheXor->getOperand(0); - SDValue Op1 = TheXor->getOperand(1); - if (Op0.getOpcode() == Op1.getOpcode()) { - // Avoid missing important xor optimizations. - if (SDValue Tmp = visitXOR(TheXor)) { - if (Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - deleteAndRecombine(TheXor); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, Tmp, N2); - } + if (N.getOpcode() == ISD::XOR) { + SDNode *TheXor = N.getNode(); - // visitXOR has changed XOR's operands or replaced the XOR completely, - // bail out. - return SDValue(N, 0); - } + // Avoid missing important xor optimizations. + while (SDValue Tmp = visitXOR(TheXor)) { + // We don't have a XOR anymore, bail. + if (Tmp.getOpcode() != ISD::XOR) + return Tmp; + + TheXor = Tmp.getNode(); } + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; if (isOneConstant(Op0) && Op0.hasOneUse() && @@ -11256,19 +11255,12 @@ Equal = true; } - EVT SetCCVT = N1.getValueType(); + EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), - SetCCVT, - Op0, Op1, - Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, SetCC, N2); + return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); } } Index: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td +++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td @@ -2800,6 +2800,8 @@ (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), (J2_jumpt I1:$Pu, bb:$dst)>; Index: llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ llvm/trunk/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -7,7 +7,6 @@ ; GCN-LABEL: {{^}}vcc_shrink_vcc_def: ; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc -; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) { bb0: %tmp = icmp sgt i32 %arg1, 4 @@ -34,7 +33,6 @@ ; GCN-LABEL: {{^}}preserve_condition_undef_flag: ; GCN-NOT: vcc ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc -; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) { bb0: %tmp = icmp sgt i32 %arg1, 4 Index: llvm/trunk/test/CodeGen/AMDGPU/setcc.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/setcc.ll +++ llvm/trunk/test/CodeGen/AMDGPU/setcc.ll @@ -397,9 +397,9 @@ } ; FUNC-LABEL: setcc-i1-and-xor -; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 -; GCN: s_and_b64 s[2:3], [[A]], [[B]] +; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 +; GCN: s_or_b64 s[2:3], [[A]], [[B]] define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 { bb0: %tmp5 = fcmp oge float %cond, 0.000000e+00 Index: llvm/trunk/test/CodeGen/X86/and-sink.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/and-sink.ll +++ llvm/trunk/test/CodeGen/X86/and-sink.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: # %bb0 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $0, A ; CHECK-NEXT: testb $4, %al +; CHECK-NEXT: movl $0, A ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.2: # %bb1 ; CHECK-NEXT: movl $1, %eax @@ -61,8 +61,8 @@ ; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: # %bb.3: # %bb1 ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movl $0, C ; CHECK-NEXT: testb $4, %cl +; CHECK-NEXT: movl $0, C ; CHECK-NEXT: jne .LBB1_2 ; CHECK-NEXT: # %bb.4: # %bb2 ; CHECK-NEXT: movl $1, %eax Index: llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll +++ llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll @@ -1146,12 +1146,9 @@ define void @and32_imm_br() nounwind { ; CHECK-LABEL: and32_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80] +; CHECK-NEXT: andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x80000000 -; CHECK-NEXT: andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte -; CHECK-NEXT: movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte ; CHECK-NEXT: jne .LBB35_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %a @@ -1244,13 +1241,9 @@ define void @and16_imm_br() nounwind { ; CHECK-LABEL: and16_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte -; CHECK-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] +; CHECK-NEXT: andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x8000 -; CHECK-NEXT: movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte -; CHECK-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0] ; CHECK-NEXT: jne .LBB38_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %a Index: llvm/trunk/test/CodeGen/X86/or-branch.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/or-branch.ll +++ llvm/trunk/test/CodeGen/X86/or-branch.ll @@ -19,11 +19,10 @@ ; JUMP1-LABEL: foo: ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP1-NEXT: sete %al -; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setl %cl -; JUMP1-NEXT: orb %al, %cl -; JUMP1-NEXT: cmpb $1, %cl +; JUMP1-NEXT: setne %al +; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB0_1 ; JUMP1-NEXT: # %bb.2: # %cond_true ; JUMP1-NEXT: jmp bar # TAILCALL @@ -50,11 +49,10 @@ ; JUMP2-LABEL: unpredictable: ; JUMP2: # %bb.0: # %entry ; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP2-NEXT: sete %al -; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP2-NEXT: setl %cl -; JUMP2-NEXT: orb %al, %cl -; JUMP2-NEXT: cmpb $1, %cl +; JUMP2-NEXT: setne %al +; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setg %cl +; JUMP2-NEXT: testb %al, %cl ; JUMP2-NEXT: jne .LBB1_1 ; JUMP2-NEXT: # %bb.2: # %cond_true ; JUMP2-NEXT: jmp bar # TAILCALL @@ -64,11 +62,10 @@ ; JUMP1-LABEL: unpredictable: ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP1-NEXT: sete %al -; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setl %cl -; JUMP1-NEXT: orb %al, %cl -; JUMP1-NEXT: cmpb $1, %cl +; JUMP1-NEXT: setne %al +; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB1_1 ; JUMP1-NEXT: # %bb.2: # %cond_true ; JUMP1-NEXT: jmp bar # TAILCALL