Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -75,8 +75,6 @@ "Number of (zext(setcc)) nodes expanded into GPR sequence."); STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); -STATISTIC(ZeroExtensionsAdded, - "Number of zero extensions for compare inputs added."); STATISTIC(NumLogicOpsOnComparison, "Number of logical ops on i1 values calculated in GPR."); STATISTIC(OmittedForNonExtendUses, @@ -298,7 +296,6 @@ bool tryLogicOpOfCompares(SDNode *N); SDValue computeLogicOpInGPR(SDValue LogicOp); SDValue signExtendInputIfNeeded(SDValue Input); - SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, ZeroCompare CmpTy); @@ -2560,15 +2557,15 @@ return false; SDLoc dl(N); - bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; + bool Input32Bit = WideRes.getValueType() == MVT::i32; bool Output32Bit = N->getValueType(0) == MVT::i32; NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; SDValue ConvOp = WideRes; - if (Inputs32Bit != Output32Bit) - ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext : + if (Input32Bit != Output32Bit) + ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : ExtOrTruncConversion::Trunc); ReplaceNode(N, ConvOp.getNode()); @@ -2729,6 +2726,7 @@ } /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. +/// Otherwise just reinterpret it as a 64-bit value. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) { @@ -2741,51 +2739,23 @@ if (Opc == ISD::TRUNCATE && (Input.getOperand(0).getOpcode() == ISD::AssertSext || Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) - return Input; + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); LoadSDNode *InputLoad = dyn_cast(Input); // The input is a sign-extending load. No reason to sign-extend. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) - return Input; + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); ConstantSDNode *InputConst = dyn_cast(Input); // We don't sign-extend constants and already sign-extended values. if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG || Opc == ISD::SIGN_EXTEND) - return Input; + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); SDLoc dl(Input); SignExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0); -} - -/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. -/// Useful when emitting comparison code for 32-bit values without using -/// the compare instruction (which only considers the lower 32-bits). -SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) { - assert(Input.getValueType() == MVT::i32 && - "Can only zero-extend 32-bit values here."); - LoadSDNode *InputLoad = dyn_cast(Input); - unsigned Opc = Input.getOpcode(); - - // No need to zero-extend loaded values (unless they're loaded with - // a sign-extending load). - if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) - return Input; - - ConstantSDNode *InputConst = dyn_cast(Input); - bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0; - // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have - // to conservatively actually clear the high bits. We also don't need to - // zero-extend constants or values that are already zero-extended. - if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND) - return Input; - - SDLoc dl(Input); - ZeroExtensionsAdded++; - return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input, - getI64Imm(0, dl), getI64Imm(32, dl)), - 0); + return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, + MVT::i64, Input), 0); } // Handle a 32-bit value in a 64-bit register and vice-versa. These are of @@ -2834,10 +2804,12 @@ case ZeroCompare::LEZExt: case ZeroCompare::LESExt: { if (Is32Bit) { + // Upper 32 bits cannot be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); SDValue Neg = - SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, LHS), 0); + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); ToExtend = - SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Neg, getI64Imm(1, dl), getI64Imm(63, dl)), 0); } else { @@ -2873,10 +2845,10 @@ return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, getI32Imm(31, dl)), 0); case ZeroCompare::LEZExt: - return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, ToExtend, - getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, + getI32Imm(1, dl)), 0); case ZeroCompare::LESExt: - return SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, ToExtend, + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, getI32Imm(-1, dl)), 0); } } @@ -2887,8 +2859,6 @@ ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { @@ -2932,13 +2902,18 @@ // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) if(IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); SDValue Sub = - SDValue(CurDAG->getMachineNode(PPC::SUBF, dl, MVT::i32, LHS, RHS), 0); + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue Shift = - SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Sub, + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, getI64Imm(1, dl), getI64Imm(63, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, - MVT::i32, Shift, getI32Imm(1, dl)), 0); + return + SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, + MVT::i64, Shift, getI32Imm(1, dl)), 0); } } } @@ -2949,8 +2924,6 @@ ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { bool IsRHSZero = RHSValue == 0; - bool IsRHSOne = RHSValue == 1; - bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { @@ -2962,11 +2935,11 @@ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Cntlzw = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); - SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) }; - SDValue Sldi = - SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0); - return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, - getI32Imm(63, dl)), 0); + SDValue SHLOps[] = { Cntlzw, getI32Imm(27, dl), + getI32Imm(5, dl), getI32Imm(31, dl) }; + SDValue Slwi = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); } case ISD::SETNE: { // Bitwise xor the operands, count leading zeros, shift right by 5 bits and @@ -3004,14 +2977,18 @@ // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = - SDValue(CurDAG->getMachineNode(PPC::SUBF, dl, MVT::i32, MVT::Glue, + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, LHS, RHS), 0); SDValue Srdi = - SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SUBFNode, getI64Imm(1, dl), getI64Imm(63, dl)), 0); - return SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Srdi, + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, getI32Imm(-1, dl)), 0); } } @@ -3144,13 +3121,6 @@ CC = ISD::getSetCCInverse(CC, true); bool Inputs32Bit = InputVT == MVT::i32; - if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { - LHS = signExtendInputIfNeeded(LHS); - RHS = signExtendInputIfNeeded(RHS); - } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { - LHS = zeroExtendInputIfNeeded(LHS); - RHS = zeroExtendInputIfNeeded(RHS); - } SDLoc dl(Compare); ConstantSDNode *RHSConst = dyn_cast(RHS); Index: test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll =================================================================== --- test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll +++ test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll @@ -0,0 +1,56 @@ +; The purpose of the test case is to ensure that a spill that happens during +; intermediate calculations for a comparison performed in a GPR spills the +; full register. Some i32 comparisons performed in GPRs use code that uses +; the full 64-bits of the register in intermediate stages. Spilling such a value +; as a 32-bit value is incorrect. +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +@glob = common local_unnamed_addr global i64 0, align 8 +@.str = private unnamed_addr constant [12 x i8] c"Value = %d\0A\00", align 1 + +; Function Attrs: noinline nounwind +define void @call(i64 %a) local_unnamed_addr #0 { +entry: + store i64 %a, i64* @glob, align 8 + tail call void asm sideeffect "#Do Nothing", "~{memory}"() + ret void +} + +; Function Attrs: noinline nounwind +define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %b, %a + %sub = sub nsw i32 %add, %c + %conv = sext i32 %sub to i64 + tail call void @call(i64 %conv) + tail call void asm sideeffect "#Do Nothing", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() + %cmp = icmp sle i32 %add, %c + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +; CHECK-LABEL: test +; CHECK: subf r3, +; CHECK: extsw r3, +; CHECK: bl call +; CHECK: sub r3, +; CHECK: rldicl r3, r3, 1, 63 +; CHECK: std r3, [[OFF:[0-9]+]](r1) +; CHECK: #APP +; CHECK: ld r3, [[OFF]](r1) +; CHECK: xori r3, r3, 1 +; CHECK: blr +} + +; Function Attrs: nounwind +define signext i32 @main() local_unnamed_addr #1 { +entry: + %call = tail call signext i32 @test(i32 signext 10, i32 signext -15, i32 signext 0) + %call1 = tail call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0), i32 signext %call) + ret i32 0 +} + +; Function Attrs: nounwind +declare signext i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #2