Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -84,6 +84,9 @@ "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -2551,6 +2554,10 @@ return true; } +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} + /// If this node is a sign/zero extension of an integer comparison, /// it can usually be computed in GPR's rather than using comparison /// instructions and ISEL. @@ -2560,12 +2567,21 @@ assert((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); - if (N->getOperand(0).getOpcode() != ISD::SETCC) + + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = getLogicalOpInGPR(N->getOperand(0), true); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) return false; + else + WideRes = getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND); SDLoc dl(N); - SDValue WideRes = getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND); bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; bool Output32Bit = N->getValueType(0) == MVT::i32; @@ -2723,10 +2739,6 @@ return true; } -static bool isLogicOp(unsigned Opc) { - return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; -} - // Lower a logical operation on i1 values into a GPR sequence if possible. // The result can be kept in a GPR if requested. // Three types of inputs can be handled: @@ -2736,11 +2748,104 @@ // There is also a special case that is handled (namely a complement operation // achieved with xor %a, -1). SDValue PPCDAGToDAGISel::getLogicalOpInGPR(SDValue LogicOp, bool KeepInGPR) { - return SDValue(); + unsigned Op1Opc = LogicOp.getOperand(0).getOpcode(); + unsigned Op2Opc = LogicOp.getOperand(1).getOpcode(); + SDLoc dl(LogicOp); + SDValue Op1, Op2; + + // Special case: xor %a, -1 + ConstantSDNode *InputConst = dyn_cast(LogicOp.getOperand(1)); + bool IsNot = InputConst && InputConst->isAllOnesValue() && + LogicOp.getOpcode() == ISD::XOR; + + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, false); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + // If this is a (xor (trunc to i1), -1), no need to truncate twice. + if (IsNot && !KeepInGPR) + return InVT == MVT::i64 ? InputOp : addExtOrTrunc(InputOp, true, false); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return getLogicalOpInGPR(Operand, true); + return SDValue(); + }; + Op1 = getLogicOperand(LogicOp.getOperand(0)); + Op2 = getLogicOperand(LogicOp.getOperand(1)); + + if (!Op1 || (!Op2 && !IsNot)) + return SDValue(); + + NumLogicOpsOnComparison++; + + if (Op1.getValueType() == MVT::i32) + Op1 = addExtOrTrunc(Op1, true, false); + if (!IsNot && Op2.getValueType() == MVT::i32) + Op2 = addExtOrTrunc(Op2, true, false); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logical operation."); + case ISD::AND: NewOpc = KeepInGPR ? PPC::AND8 : PPC::AND8o; break; + case ISD::OR: NewOpc = KeepInGPR ? PPC::OR8 : PPC::OR8o; break; + case ISD::XOR: NewOpc = KeepInGPR ? PPC::XOR8 : PPC::XOR8o; break; + } + + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + // We can negate an i1 value (in a GPR) by clearing the top 63 bits and using + // the EQ bit. If we had a zero in the least significant bit, the result is + // zero and the EQ bit is set. Otherwise, it isn't set. + if (IsNot) { + if (!KeepInGPR) { + SDValue WideOp = + SDValue(CurDAG->getMachineNode(PPC::RLDICLo, dl, MVT::i64, MVT::Glue, + Op1, getI64Imm(0, dl), + getI64Imm(63, dl)), 0); + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + } + Op2 = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + SDValue WideOp = KeepInGPR ? + SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, Op1, Op2), 0) : + SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, MVT::Glue, Op1, + Op2), 0); + if (KeepInGPR) { + return WideOp; + } + + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_gt, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); } bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { - return false; + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + SDLoc dl(N); + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logical operation on setcc results."); + SDValue LoweredLogical = getLogicalOpInGPR(SDValue(N, 0), false); + if (!LoweredLogical) + return false; + + ReplaceNode(N, LoweredLogical.getNode()); + return true; } /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. Index: test/CodeGen/PowerPC/crbits.ll =================================================================== --- test/CodeGen/PowerPC/crbits.ll +++ test/CodeGen/PowerPC/crbits.ll @@ -94,13 +94,15 @@ ret i1 %or7 ; CHECK-LABEL: @test5 +; CHECK-DAG: li [[NEG2:[0-9]+]], -2 ; CHECK-DAG: and [[REG1:[0-9]+]], 3, 4 -; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 -; CHECK-DAG: li [[REG3:[0-9]+]], 1 -; CHECK-DAG: andi. {{[0-9]+}}, [[REG1]], 1 -; CHECK-DAG: crandc [[REG5:[0-9]+]], -; CHECK: isel 3, 0, [[REG3]], [[REG5]] -; CHECK: blr +; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]] +; CHECK-DAG: clrldi [[TRUNC:[0-9]+]], [[REG1]], 63 +; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]] +; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5 +; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1 +; CHECK: or 3, [[TRUNC]], [[NE4]] +; CHECK-NEXT: blr } ; Function Attrs: nounwind readnone @@ -112,15 +114,16 @@ ret i1 %and7 ; CHECK-LABEL: @test6 -; CHECK-DAG: andi. {{[0-9]+}}, 3, 1 -; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 -; CHECK-DAG: crmove [[REG1:[0-9]+]], 1 -; CHECK-DAG: andi. {{[0-9]+}}, 4, 1 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG4:[0-9]+]], 1, -; CHECK-DAG: crnand [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK: isel 3, 0, [[REG2]], [[REG5]] -; CHECK: blr +; CHECK-DAG: li [[NEG2:[0-9]+]], -2 +; CHECK-DAG: clrldi [[CLR1:[0-9]+]], 4, 63 +; CHECK-DAG: clrldi [[CLR2:[0-9]+]], 3, 63 +; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]] +; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]] +; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5 +; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1 +; CHECK: or [[OR:[0-9]+]], [[NE4]], [[CLR1]] +; CHECK: and 3, [[OR]], [[CLR2]] +; CHECK-NEXT: blr } ; Function Attrs: nounwind readnone @@ -187,12 +190,13 @@ ret i32 %and ; CHECK-LABEL: @test10 -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] -; CHECK: blr +; CHECK-DAG: cntlzw 3, 3 +; CHECK-DAG: cntlzw 4, 4 +; CHECK-DAG: srwi 3, 3, 5 +; CHECK-DAG: srwi 4, 4, 5 +; CHECK: xori 3, 3, 1 +; CHECK: and 3, 3, 4 +; CHECK-NEXT: blr } attributes #0 = { nounwind readnone } Index: test/CodeGen/PowerPC/extend-i1-logicals.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/extend-i1-logicals.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl + +@gi = local_unnamed_addr global i32 0, align 4 +@gl = local_unnamed_addr global i64 0, align 8 + +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z18chainedLogicalOps1ii(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: _Z18chainedLogicalOps1ii: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %0 = and i1 %cmp, %cmp1 + ret i1 %0 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z18chainedLogicalOps2iib(i32 signext %a, i32 signext %b, i1 zeroext %c) { +; CHECK-LABEL: _Z18chainedLogicalOps2iib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: clrldi r12, r5, 63 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: or r3, r3, r12 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + %.c = or i1 %or.cond, %c + ret i1 %.c +} + +; Function Attrs: norecurse nounwind readnone +; FIXME: This includes a compare as SDAG puts the truncate into entry rather +; than where it's needed. +define zeroext i1 @_Z18chainedLogicalOps3iiib(i32 signext %a, i32 signext %b, i32 signext %c, i1 zeroext %d) { +; CHECK-LABEL: _Z18chainedLogicalOps3iiib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: andi. r6, r6, 1 +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: srwi r6, r4, 31 +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: and. r3, r3, r6 +; CHECK-NEXT: bc 4, 1, .LBB2_2 +; CHECK-NEXT: # BB#1: # %land.rhs +; CHECK-NEXT: cmpw cr0, r4, r5 +; CHECK-NEXT: crorc 20, 20, 2 +; CHECK-NEXT: b .LBB2_3 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: .LBB2_3: # %land.end +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: isel r3, r4, r3, 20 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + %cmp2 = icmp ne i32 %b, %c + %0 = or i1 %cmp2, %d + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %1 = phi i1 [ false, %entry ], [ %0, %land.rhs ] + ret i1 %1 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z19chainedLogicalOps1Lxx(i64 %a, i64 %b) { +; CHECK-LABEL: _Z19chainedLogicalOps1Lxx: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r5, r4, 63 +; CHECK-NEXT: rldicl r6, r3, 1, 63 +; CHECK-NEXT: subfc r12, r3, r4 +; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: adde r3, r6, r5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %0 = and i1 %cmp, %cmp1 + ret i1 %0 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z19chainedLogicalOps2Lxxb(i64 %a, i64 %b, i1 zeroext %c) { +; CHECK-LABEL: _Z19chainedLogicalOps2Lxxb: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r6, r4, 63 +; CHECK-NEXT: rldicl r7, r3, 1, 63 +; CHECK-NEXT: clrldi r12, r5, 63 +; CHECK-NEXT: subfc r11, r3, r4 +; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: adde r3, r7, r6 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: or r3, r3, r12 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + %.c = or i1 %or.cond, %c + ret i1 %.c +} + +; Function Attrs: norecurse nounwind readnone +; FIXME: This includes a compare as SDAG puts the truncate into entry rather +; than where it's needed. +define zeroext i1 @_Z19chainedLogicalOps3Lxxxb(i64 %a, i64 %b, i64 %c, i1 zeroext %d) { +; CHECK-LABEL: _Z19chainedLogicalOps3Lxxxb: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r7, r4, 63 +; CHECK-NEXT: andi. r6, r6, 1 +; CHECK-NEXT: rldicl r12, r3, 1, 63 +; CHECK-NEXT: subfc r8, r3, r4 +; CHECK-NEXT: rldicl r6, r4, 1, 63 +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: adde r3, r12, r7 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and. r3, r3, r6 +; CHECK-NEXT: bc 4, 1, .LBB5_2 +; CHECK-NEXT: # BB#1: # %land.rhs +; CHECK-NEXT: cmpd r4, r5 +; CHECK-NEXT: crorc 20, 20, 2 +; CHECK-NEXT: b .LBB5_3 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: .LBB5_3: # %land.end +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: isel r3, r4, r3, 20 +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %land.rhs, label %land.end + +land.rhs: ; preds = %entry + %cmp2 = icmp ne i64 %b, %c + %0 = or i1 %cmp2, %d + br label %land.end + +land.end: ; preds = %land.rhs, %entry + %1 = phi i1 [ false, %entry ], [ %0, %land.rhs ] + ret i1 %1 +} + +; Function Attrs: norecurse nounwind +define void @_Z19chainedLogicalOps1Sii(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: _Z19chainedLogicalOps1Sii: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: addis r6, r2, .LC1@toc@ha +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: ld r5, .LC0@toc@l(r5) +; CHECK-NEXT: ld r6, .LC1@toc@l(r6) +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: stw r3, 0(r5) +; CHECK-NEXT: std r3, 0(r6) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %0 = and i1 %cmp, %cmp1 + %conv = zext i1 %0 to i32 + store i32 %conv, i32* @gi, align 4 + %conv6 = zext i1 %0 to i64 + store i64 %conv6, i64* @gl, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @_Z19chainedLogicalOps2Siib(i32 signext %a, i32 signext %b, i1 zeroext %c) { +; CHECK-LABEL: _Z19chainedLogicalOps2Siib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: addis r6, r2, .LC0@toc@ha +; CHECK-NEXT: addis r7, r2, .LC1@toc@ha +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: clrldi r12, r5, 63 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: ld r6, .LC0@toc@l(r6) +; CHECK-NEXT: ld r7, .LC1@toc@l(r7) +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: or r3, r3, r12 +; CHECK-NEXT: stw r3, 0(r6) +; CHECK-NEXT: std r3, 0(r7) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + %narrow = or i1 %or.cond, %c + %.phitmp = zext i1 %narrow to i32 + store i32 %.phitmp, i32* @gi, align 4 + %0 = zext i1 %narrow to i64 + store i64 %0, i64* @gl, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +; FIXME: This includes a compare as SDAG puts the truncate into entry rather +; than where it's needed. +define void @_Z19chainedLogicalOps3Siiib(i32 signext %a, i32 signext %b, i32 signext %c, i1 zeroext %d) { +; CHECK-LABEL: _Z19chainedLogicalOps3Siiib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: andi. r6, r6, 1 +; CHECK-NEXT: subf r3, r3, r4 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: srwi r6, r4, 31 +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: and. r3, r3, r6 +; CHECK-NEXT: bc 4, 1, .LBB8_2 +; CHECK-NEXT: # BB#1: # %land.rhs +; CHECK-NEXT: cmpw cr0, r4, r5 +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r12, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: crandc 20, 2, 20 +; CHECK-NEXT: isel r4, 0, r12, 20 +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: isel r3, 0, r5, 20 +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %land.end11.critedge +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB8_3: # %land.end11 +; CHECK-NEXT: addis r4, r2, .LC1@toc@ha +; CHECK-NEXT: ld r4, .LC1@toc@l(r4) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, %b + %cmp1 = icmp slt i32 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %land.rhs, label %land.end11.critedge + +land.rhs: ; preds = %entry + %cmp2 = icmp ne i32 %b, %c + %0 = or i1 %cmp2, %d + %phitmp = zext i1 %0 to i32 + store i32 %phitmp, i32* @gi, align 4 + %phitmp22 = zext i1 %0 to i64 + br label %land.end11 + +land.end11.critedge: ; preds = %entry + store i32 0, i32* @gi, align 4 + br label %land.end11 + +land.end11: ; preds = %land.end11.critedge, %land.rhs + %1 = phi i64 [ %phitmp22, %land.rhs ], [ 0, %land.end11.critedge ] + store i64 %1, i64* @gl, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @_Z20chainedLogicalOps1LSxx(i64 %a, i64 %b) { +; CHECK-LABEL: _Z20chainedLogicalOps1LSxx: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r6, r4, 63 +; CHECK-NEXT: rldicl r7, r3, 1, 63 +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: subfc r8, r3, r4 +; CHECK-NEXT: ld r5, .LC0@toc@l(r5) +; CHECK-NEXT: adde r8, r7, r6 +; CHECK-NEXT: subfc r3, r3, r4 +; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-NEXT: adde r6, r7, r6 +; CHECK-NEXT: xori r7, r8, 1 +; CHECK-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-NEXT: xori r6, r6, 1 +; CHECK-NEXT: and r6, r6, r4 +; CHECK-NEXT: and r4, r7, r4 +; CHECK-NEXT: stw r4, 0(r5) +; CHECK-NEXT: std r6, 0(r3) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %0 = and i1 %cmp, %cmp1 + %conv = zext i1 %0 to i32 + store i32 %conv, i32* @gi, align 4 + %conv6 = zext i1 %0 to i64 + store i64 %conv6, i64* @gl, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @_Z20chainedLogicalOps2LSxxb(i64 %a, i64 %b, i1 zeroext %c) { +; CHECK-LABEL: _Z20chainedLogicalOps2LSxxb: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r7, r4, 63 +; CHECK-NEXT: rldicl r8, r3, 1, 63 +; CHECK-NEXT: addis r6, r2, .LC0@toc@ha +; CHECK-NEXT: clrldi r5, r5, 63 +; CHECK-NEXT: subfc r9, r3, r4 +; CHECK-NEXT: ld r6, .LC0@toc@l(r6) +; CHECK-NEXT: adde r9, r8, r7 +; CHECK-NEXT: subfc r3, r3, r4 +; CHECK-NEXT: rldicl r4, r4, 1, 63 +; CHECK-NEXT: adde r3, r8, r7 +; CHECK-NEXT: addis r7, r2, .LC1@toc@ha +; CHECK-NEXT: xori r8, r9, 1 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: ld r7, .LC1@toc@l(r7) +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: and r4, r8, r4 +; CHECK-NEXT: or r3, r3, r5 +; CHECK-NEXT: or r4, r4, r5 +; CHECK-NEXT: stw r3, 0(r6) +; CHECK-NEXT: std r4, 0(r7) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + %narrow = or i1 %or.cond, %c + %.phitmp = zext i1 %narrow to i32 + store i32 %.phitmp, i32* @gi, align 4 + %0 = zext i1 %narrow to i64 + store i64 %0, i64* @gl, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +; FIXME: This includes a compare as SDAG puts the truncate into entry rather +; than where it's needed. +define void @_Z20chainedLogicalOps3LSxxxb(i64 %a, i64 %b, i64 %c, i1 zeroext %d) { +; CHECK-LABEL: _Z20chainedLogicalOps3LSxxxb: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: sradi r7, r4, 63 +; CHECK-NEXT: andi. r6, r6, 1 +; CHECK-NEXT: rldicl r12, r3, 1, 63 +; CHECK-NEXT: subfc r8, r3, r4 +; CHECK-NEXT: rldicl r6, r4, 1, 63 +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: adde r3, r12, r7 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and. r3, r3, r6 +; CHECK-NEXT: bc 4, 1, .LBB11_2 +; CHECK-NEXT: # BB#1: # %land.rhs +; CHECK-NEXT: cmpd r4, r5 +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r12, 1 +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: crandc 20, 2, 20 +; CHECK-NEXT: isel r4, 0, r12, 20 +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: isel r3, 0, r5, 20 +; CHECK-NEXT: b .LBB11_3 +; CHECK-NEXT: .LBB11_2: # %land.end11.critedge +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB11_3: # %land.end11 +; CHECK-NEXT: addis r4, r2, .LC1@toc@ha +; CHECK-NEXT: ld r4, .LC1@toc@l(r4) +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp sgt i64 %a, %b + %cmp1 = icmp slt i64 %b, 0 + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %land.rhs, label %land.end11.critedge + +land.rhs: ; preds = %entry + %cmp2 = icmp ne i64 %b, %c + %0 = or i1 %cmp2, %d + %phitmp = zext i1 %0 to i32 + store i32 %phitmp, i32* @gi, align 4 + %phitmp22 = zext i1 %0 to i64 + br label %land.end11 + +land.end11.critedge: ; preds = %entry + store i32 0, i32* @gi, align 4 + br label %land.end11 + +land.end11: ; preds = %land.end11.critedge, %land.rhs + %1 = phi i64 [ %phitmp22, %land.rhs ], [ 0, %land.end11.critedge ] + store i64 %1, i64* @gl, align 8 + ret void +} Index: test/CodeGen/PowerPC/no-pref-jumps.ll =================================================================== --- test/CodeGen/PowerPC/no-pref-jumps.ll +++ test/CodeGen/PowerPC/no-pref-jumps.ll @@ -11,9 +11,11 @@ br i1 %or.cond, label %if.then, label %if.else ; CHECK-LABEL: @foo -; CHECK: cmpwi -; CHECK: cmpwi -; CHECK: cror +; CHECK: subf +; CHECK: subf +; CHECK: rldicl +; CHECK: rldicl +; CHECK: or. ; CHECK: blr if.then: ; preds = %entry Index: test/CodeGen/PowerPC/ppc-crbits-onoff.ll =================================================================== --- test/CodeGen/PowerPC/ppc-crbits-onoff.ll +++ test/CodeGen/PowerPC/ppc-crbits-onoff.ll @@ -1,5 +1,4 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -12,14 +11,14 @@ %and = zext i1 %and3 to i32 ret i32 %and -; CHECK-ALL-LABEL: @crbitsoff -; CHECK-ALL-DAG: cntlzw [[CNTv1:[0-9]+]], 3 -; CHECK-ALL-DAG: cntlzw [[CNTv2:[0-9]+]], 4 -; CHECK-ALL-DAG: srwi [[SH:[0-9]+]], [[CNTv1]], 5 -; CHECK-ALL-DAG: xori [[FLIP:[0-9]+]], [[SH]], 1 -; CHECK-ALL-DAG: rlwinm [[SH2:[0-9]+]], [[CNTv2]], 27, 5, 31 -; CHECK-ALL: and 3, [[FLIP]], [[SH2]] -; CHECK-ALL: blr +; CHECK-LABEL: @crbitsoff +; CHECK-DAG: cntlzw [[CNTv1:[0-9]+]], 3 +; CHECK-DAG: cntlzw [[CNTv2:[0-9]+]], 4 +; CHECK-DAG: srwi [[SH:[0-9]+]], [[CNTv1]], 5 +; CHECK-DAG: xori [[FLIP:[0-9]+]], [[SH]], 1 +; CHECK-DAG: rlwinm [[SH2:[0-9]+]], [[CNTv2]], 27, 5, 31 +; CHECK: and 3, [[FLIP]], [[SH2]] +; CHECK-NEXT: blr } define signext i32 @crbitson(i32 signext %v1, i32 signext %v2) #1 { @@ -32,17 +31,13 @@ ; CHECK-LABEL: @crbitson ; CHECK-NO-ISEL-LABEL: @crbitson -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] -; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 0, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK: blr +; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3 +; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4 +; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5 +; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5 +; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1 +; CHECK: and 3, [[REG5]], [[REG4]] +; CHECK-NEXT: blr }