Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -1092,6 +1092,7 @@ SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1055,6 +1055,7 @@ setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -12470,6 +12471,8 @@ SDLoc dl(N); switch (N->getOpcode()) { default: break; + case ISD::ADD: + return combineADD(N, DCI); case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: @@ -14176,6 +14179,100 @@ return SDValue(); } +// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1)) +// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0)) +// When C is zero, the equation (addi Z, -C) can be simplified to Z +// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types +static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + if (!Subtarget.isPPC64()) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + auto isZextOfCompareWithConstant = [](SDValue Op) { + if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() || + Op.getValueType() != MVT::i64) + return false; + + SDValue Cmp = Op.getOperand(0); + if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() || + Cmp.getOperand(0).getValueType() != MVT::i64) + return false; + + if (auto *Constant = dyn_cast(Cmp.getOperand(1))) { + int64_t NegConstant = 0 - Constant->getSExtValue(); + // Due to the limitations of the addi instruction, + // -C is required to be [-32768, 32767]. + return isInt<16>(NegConstant); + } + + return false; + }; + + bool LHSHasPattern = isZextOfCompareWithConstant(LHS); + bool RHSHasPattern = isZextOfCompareWithConstant(RHS); + + // If there is a pattern, canonicalize a zext operand to the RHS. + if (LHSHasPattern && !RHSHasPattern) + std::swap(LHS, RHS); + else if (!LHSHasPattern && !RHSHasPattern) + return SDValue(); + + SDLoc DL(N); + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i64); + SDValue Cmp = RHS.getOperand(0); + SDValue Z = Cmp.getOperand(0); + auto *Constant = dyn_cast(Cmp.getOperand(1)); + + assert(Constant && "Constant Should not be a null pointer."); + int64_t NegConstant = 0 - Constant->getSExtValue(); + + switch(cast(Cmp.getOperand(2))->get()) { + default: break; + case ISD::SETNE: { + // when C == 0 + // --> addze X, (addic Z, -1).carry + // / + // add X, (zext(setne Z, C))-- + // \ when -32768 <= -C <= 32767 && C != 0 + // --> addze X, (addic (addi Z, -C), -1).carry + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, + DAG.getConstant(NegConstant, DL, MVT::i64)); + SDValue AddOrZ = NegConstant != 0 ? Add : Z; + SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), + AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); + return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), + SDValue(Addc.getNode(), 1)); + } + case ISD::SETEQ: { + // when C == 0 + // --> addze X, (subfic Z, 0).carry + // / + // add X, (zext(sete Z, C))-- + // \ when -32768 <= -C <= 32767 && C != 0 + // --> addze X, (subfic (addi Z, -C), 0).carry + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, + DAG.getConstant(NegConstant, DL, MVT::i64)); + SDValue AddOrZ = NegConstant != 0 ? Add : Z; + SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), + DAG.getConstant(0, DL, MVT::i64), AddOrZ); + return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), + SDValue(Subc.getNode(), 1)); + } + } + + return SDValue(); +} + +SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) + return Value; + + return SDValue(); +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) Index: llvm/trunk/test/CodeGen/PowerPC/addze.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/addze.ll +++ llvm/trunk/test/CodeGen/PowerPC/addze.ll @@ -0,0 +1,172 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s + +define i64 @addze1(i64 %X, i64 %Z) { +; CHECK-LABEL: addze1: +; CHECK: # %bb.0: +; CHECK-NEXT: addic [[REG1:r[0-9]+]], [[REG1]], -1 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Z, 0 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @addze2(i64 %X, i64 %Z) { +; CHECK-LABEL: addze2: +; CHECK: # %bb.0: +; CHECK-NEXT: subfic [[REG1:r[0-9]+]], [[REG1]], 0 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Z, 0 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @addze3(i64 %X, i64 %Z) { +; CHECK-LABEL: addze3: +; CHECK: # %bb.0: +; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768 +; CHECK-NEXT: addic [[REG1]], [[REG1]], -1 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Z, 32768 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @addze4(i64 %X, i64 %Z) { +; CHECK-LABEL: addze4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768 +; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Z, 32768 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @addze5(i64 %X, i64 %Z) { +; CHECK-LABEL: addze5: +; CHECK: # %bb.0: +; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767 +; CHECK-NEXT: addic [[REG1]], [[REG1]], -1 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Z, -32767 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @addze6(i64 %X, i64 %Z) { +; CHECK-LABEL: addze6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767 +; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0 +; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Z, -32767 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +; element is out of range +define i64 @test1(i64 %X, i64 %Z) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768 +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: addic [[REG1]], [[REG2]], -1 +; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]] +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Z, -32768 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @test2(i64 %X, i64 %Z) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768 +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: cntlzd [[REG2]], [[REG2]] +; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63 +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Z, -32768 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @test3(i64 %X, i64 %Z) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: li [[REG1:r[0-9]+]], 0 +; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769 +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: addic [[REG1]], [[REG2]], -1 +; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]] +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Z, 32769 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @test4(i64 %X, i64 %Z) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: +; CHECK-NEXT: li [[REG1:r[0-9]+]], 0 +; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769 +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: cntlzd [[REG2]], [[REG2]] +; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63 +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Z, 32769 + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +; comparison of two registers +define i64 @test5(i64 %X, i64 %Y, i64 %Z) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]] +; CHECK-NEXT: addic [[REG1]], [[REG2]], -1 +; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]] +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp ne i64 %Y, %Z + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +} + +define i64 @test6(i64 %X, i64 %Y, i64 %Z) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: +; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]] +; CHECK-NEXT: cntlzd [[REG2]], [[REG2]] +; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63 +; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: blr + %cmp = icmp eq i64 %Y, %Z + %conv1 = zext i1 %cmp to i64 + %add = add nsw i64 %conv1, %X + ret i64 %add +}