Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -276,6 +276,7 @@ bool tryEXTEND(SDNode *N); bool trySELECT_CC(SDNode *N); bool tryLogicOpOfCompares(SDNode *N); + SDValue getLogicalOpInGPR(SDValue LogicOp, bool KeepInGPR); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, bool From32Bit, @@ -2725,49 +2726,116 @@ return true; } -// Logical operatnions on comparison results end up being lowered to CR-logical -// instructions which tend to have a high issue-to-issue latency. -bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { - if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) - return false; - SDLoc dl(N); - if (N->getValueType(0) != MVT::i1) - return false; - assert(N->getOperand(0).getOpcode() == ISD::SETCC && - N->getOperand(1).getOpcode() == ISD::SETCC && - (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::OR || - N->getOpcode() == ISD::XOR) && - "Expected a logical operation on setcc results."); +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} - SDValue SCC1InGPR = getSETCCInGPR(N->getOperand(0), false); - SDValue SCC2InGPR = getSETCCInGPR(N->getOperand(1), false); - if (!SCC1InGPR || !SCC2InGPR) - return false; +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue PPCDAGToDAGISel::getLogicalOpInGPR(SDValue LogicOp, bool KeepInGPR) { + unsigned Op1Opc = LogicOp.getOperand(0).getOpcode(); + unsigned Op2Opc = LogicOp.getOperand(1).getOpcode(); + SDLoc dl(LogicOp); + SDValue Op1, Op2; + + // Special case: xor %a, -1 + ConstantSDNode *InputConst = dyn_cast(LogicOp.getOperand(1)); + bool IsNot = InputConst && InputConst->isAllOnesValue() && + LogicOp.getOpcode() == ISD::XOR; + + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, false); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + // If this is a (xor (trunc to i1), -1), no need to truncate twice. + if (IsNot && !KeepInGPR) + return InVT == MVT::i64 ? InputOp : addExtOrTrunc(InputOp, true, false); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return getLogicalOpInGPR(Operand, true); + return SDValue(); + }; + Op1 = getLogicOperand(LogicOp.getOperand(0)); + Op2 = getLogicOperand(LogicOp.getOperand(1)); - EVT VT1 = SCC1InGPR.getValueType(); - EVT VT2 = SCC2InGPR.getValueType(); - unsigned Opc; - switch (N->getOpcode()) { + if (!Op1 || (!Op2 && !IsNot)) + return SDValue(); + + NumLogicOpsOnComparison++; + + if (Op1.getValueType() == MVT::i32) + Op1 = addExtOrTrunc(Op1, true, false); + if (!IsNot && Op2.getValueType() == MVT::i32) + Op2 = addExtOrTrunc(Op2, true, false); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { default: llvm_unreachable("Unknown logical operation."); - case ISD::AND: Opc = PPC::AND8o; break; - case ISD::OR: Opc = PPC::OR8o; break; - case ISD::XOR: Opc = PPC::XOR8o; break; + case ISD::AND: NewOpc = KeepInGPR ? PPC::AND8 : PPC::AND8o; break; + case ISD::OR: NewOpc = KeepInGPR ? PPC::OR8 : PPC::OR8o; break; + case ISD::XOR: NewOpc = KeepInGPR ? PPC::XOR8 : PPC::XOR8o; break; } - if (VT1 == MVT::i32) - SCC1InGPR = addExtOrTrunc(SCC1InGPR, true, false); - if (VT2 == MVT::i32) - SCC2InGPR = addExtOrTrunc(SCC2InGPR, true, false); - SDValue LogicOp = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i64, MVT::Glue, SCC1InGPR, - SCC2InGPR), 0); SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + // We can negate an i1 value (in a GPR) by clearing the top 63 bits and using + // the EQ bit. If we had a zero in the least significant bit, the result is + // zero and the EQ bit is set. Otherwise, it isn't set. + if (IsNot) { + if (!KeepInGPR) { + SDValue WideOp = + SDValue(CurDAG->getMachineNode(PPC::RLDICLo, dl, MVT::i64, MVT::Glue, + Op1, getI64Imm(0, dl), + getI64Imm(63, dl)), 0); + SDValue SRIdxVal = + CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + } + Op2 = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + SDValue WideOp = KeepInGPR ? + SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, Op1, Op2), 0) : + SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, MVT::Glue, Op1, + Op2), 0); + if (KeepInGPR) { + return WideOp; + } + SDValue SRIdxVal = CurDAG->getTargetConstant(PPC::sub_gt, dl, MVT::i32); - CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, - SRIdxVal, LogicOp.getValue(1)); - NumLogicOpsOnComparison++; + return SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); +} + +bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + SDLoc dl(N); + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logical operation on setcc results."); + SDValue LoweredLogical = getLogicalOpInGPR(SDValue(N, 0), false); + if (!LoweredLogical) + return false; + ReplaceNode(N, LoweredLogical.getNode()); return true; } @@ -3579,9 +3647,7 @@ } case ISD::AND: { - // Handle (and (setcc %a, %b, CC), (setcc %c, %d, CC)) in a GPR if possible. - if (N->getOperand(0).getOpcode() == ISD::SETCC && - N->getOperand(1).getOpcode() == ISD::SETCC && tryLogicOpOfCompares(N)) + if (tryLogicOpOfCompares(N)) return; unsigned Imm, Imm2, SH, MB, ME; @@ -3703,9 +3769,7 @@ if (tryBitfieldInsert(N)) return; - // Handle (or (setcc %a, %b, CC), (setcc %c, %d, CC)) in a GPR if possible. - if (N->getOperand(0).getOpcode() == ISD::SETCC && - N->getOperand(1).getOpcode() == ISD::SETCC && tryLogicOpOfCompares(N)) + if (tryLogicOpOfCompares(N)) return; short Imm; @@ -3726,9 +3790,7 @@ break; } case ISD::XOR: { - // Handle (xor (setcc %a, %b, CC), (setcc %c, %d, CC)) in a GPR if possible. - if (N->getOperand(0).getOpcode() == ISD::SETCC && - N->getOperand(1).getOpcode() == ISD::SETCC && tryLogicOpOfCompares(N)) + if (tryLogicOpOfCompares(N)) return; } case ISD::ADD: { Index: test/CodeGen/PowerPC/chained-i1-logicals-with-complement.ll =================================================================== --- test/CodeGen/PowerPC/chained-i1-logicals-with-complement.ll +++ test/CodeGen/PowerPC/chained-i1-logicals-with-complement.ll @@ -0,0 +1,57 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; Function Attrs: nounwind +define signext i32 @test(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: @test +entry: + %cmp = icmp slt i32 %a, %b +; CHECK: cmpw +; CHECK: bge {{cr[0-9]+}}, [[IFEND:.LBB0_[0-9]+]] + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %cmp1 = icmp ne i32 %a, 0 + %conv = zext i1 %cmp1 to i32 + %cmp2 = icmp eq i32 %b, 0 + %conv3 = zext i1 %cmp2 to i32 + %call = tail call signext i32 @foo(i32 signext %conv, i32 signext %conv3) +; CHECK: cntlzw r3, +; CHECK: cntlzw r4, +; CHECK: srwi r3, r3, 5 +; CHECK: srwi r4, r4, 5 +; CHECK: xori r3, r3, 1 + br label %return + +if.end: ; preds = %entry +; CHECK: [[IFEND]] +; CHECK: xor [[EQ:r[0-9]+]] +; CHECK-DAG: cntlzw [[CTZ:r[0-9]+]], [[EQ]] +; CHECK-DAG: srwi [[SR:r[0-9]+]], [[CTZ]], 5 +; CHECK-DAG: neg [[SGZ:r[0-9]+]] +; CHECK-DAG: rldicl [[SRD:r[0-9]+]], [[SGZ]], 1, 63 +; CHECK-DAG: xori [[FLIP:r[0-9]+]], [[SRD]], 1 +; CHECK: and. {{r[0-9]+}}, [[SRD]], [[SR]] +; CHECK: bl foo + %cmp4 = icmp sgt i32 %b, 0 + %cmp6 = icmp eq i32 %a, %b + %narrow = and i1 %cmp4, %cmp6 + %0 = zext i1 %narrow to i32 + %call8 = tail call signext i32 @foo(i32 signext %a, i32 signext %0) + %lnot = xor i1 %cmp4, true + %narrow24 = or i1 %cmp6, %lnot +; CHECK: or. {{r[0-9]+}}, [[SR]], [[FLIP]] + %1 = zext i1 %narrow24 to i32 + %call13 = tail call signext i32 @foo(i32 signext %b, i32 signext %1) + %add = add nsw i32 %call13, %call8 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %add, %if.end ] + ret i32 %retval.0 +} + +declare signext i32 @foo(i32 signext, i32 signext) Index: test/CodeGen/PowerPC/chained-i1-logicals.ll =================================================================== --- test/CodeGen/PowerPC/chained-i1-logicals.ll +++ test/CodeGen/PowerPC/chained-i1-logicals.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z15combineLogicalsii(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: _Z15combineLogicalsii: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r5, r3, r4 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: cntlzw r5, r5 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: srwi r5, r5, 5 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: and r3, r3, r5 +; CHECK-NEXT: and. r3, r4, r3 +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: isel r3, r4, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %cmp1 = icmp sgt i32 %a, 0 + %or.cond = and i1 %cmp1, %cmp + %cmp3 = icmp slt i32 %b, 0 + %or.cond4 = and i1 %cmp3, %or.cond + ret i1 %or.cond4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i1 @_Z22combineLogicalAndTrunciib(i32 signext %a, i32 signext %b, i1 zeroext %c) { +; CHECK-LABEL: _Z22combineLogicalAndTrunciib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r4, r3, r4 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: clrldi r12, r5, 63 +; CHECK-NEXT: cntlzw r4, r4 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: srwi r4, r4, 5 +; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: and. r3, r3, r12 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: isel r3, r4, r3, 1 +; CHECK-NEXT: blr +entry: + %notlhs4 = icmp sgt i32 %a, 0 + %notrhs5 = icmp ne i32 %a, %b + %or.cond.not.not = and i1 %notlhs4, %notrhs5 + %not.brmerge = and i1 %or.cond.not.not, %c + ret i1 %not.brmerge +} + +define zeroext i1 @_Z23combineLogicalAndTrunc2iib(i32 signext %a, i32 signext %b, i1 zeroext %c) { +; CHECK-LABEL: _Z23combineLogicalAndTrunc2iib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xor r6, r3, r4 +; CHECK-NEXT: nor r4, r4, r4 +; CHECK-NEXT: clrldi r5, r5, 63 +; CHECK-NEXT: clrldi r12, r3, 63 +; CHECK-NEXT: cntlzw r6, r6 +; CHECK-NEXT: srwi r4, r4, 31 +; CHECK-NEXT: xori r5, r5, 1 +; CHECK-NEXT: srwi r6, r6, 5 +; CHECK-NEXT: or r4, r4, r6 +; CHECK-NEXT: or r4, r4, r5 +; CHECK-NEXT: and. r4, r12, r4 +; CHECK-NEXT: li r4, 5 +; CHECK-NEXT: isel r3, r4, r3, 1 +; CHECK-NEXT: bl _Z3fooi +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %notlhs = icmp eq i32 %a, %b + %notrhs = icmp sgt i32 %b, -1 + %or.cond.not = or i1 %notrhs, %notlhs + %c.not = xor i1 %c, true + %brmerge = or i1 %or.cond.not, %c.not + %trunc = trunc i32 %a to i1 + %and = and i1 %trunc, %brmerge + %.sink = select i1 %and, i32 5, i32 %a + %call3 = tail call zeroext i1 @_Z3fooi(i32 signext %.sink) + ret i1 %call3 +} + + +define signext i32 @_Z31combineLogicalAndTruncNotInGPR2iib() { +; CHECK-LABEL: _Z31combineLogicalAndTruncNotInGPR2iib: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: mfocrf r12, 32 +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stw r12, 8(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Lcfi3: +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .Lcfi4: +; CHECK-NEXT: .cfi_offset cr2, 8 +; CHECK-NEXT: crxor 9, 9, 9 +; CHECK-NEXT: # implicit-def: %CR2LT +; CHECK-NEXT: b .LBB3_2 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB3_1: # %land.lhs.true +; CHECK-NEXT: +; CHECK-NEXT: bl fn1 +; CHECK-NEXT: nop +; CHECK-NEXT: rldicl. r3, r3, 0, 63 +; CHECK-NEXT: crmove 8, 2 +; CHECK-NEXT: .LBB3_2: # %for.cond +; CHECK-NEXT: +; CHECK-NEXT: bl fn3 +; CHECK-NEXT: nop +; CHECK-NEXT: cmpwi cr0, r3, 0 +; CHECK-NEXT: crorc 20, 8, 2 +; CHECK-NEXT: crmove 8, 9 +; CHECK-NEXT: bc 4, 20, .LBB3_2 +; CHECK-NEXT: b .LBB3_1 +entry: + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %a.0.off0 = phi i1 [ undef, %entry ], [ %a.0.off0.be, %for.cond.backedge ] + %call = tail call signext i32 bitcast (i32 (...)* @fn3 to i32 ()*)() + %not.tobool = icmp ne i32 %call, 0 + %a.0.off0. = or i1 %a.0.off0, %not.tobool + br i1 %a.0.off0., label %land.lhs.true, label %for.cond.backedge + +for.cond.backedge: ; preds = %for.cond, %land.lhs.true + %a.0.off0.be = phi i1 [ false, %for.cond ], [ %not.call2, %land.lhs.true ] + br label %for.cond + +land.lhs.true: ; preds = %for.cond + %call2 = tail call zeroext i1 bitcast (i1 (...)* @fn1 to i1 ()*)() + %not.call2 = xor i1 %call2, true + br label %for.cond.backedge +} + +declare signext i32 @fn3(...) +declare zeroext i1 @fn1(...) +declare zeroext i1 @_Z3fooi(i32 signext)