Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -149,6 +149,10 @@ /// For vector types, only the last n bits are used. See vsld. SRL, SRA, SHL, + /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign + /// word and shift left immediate. + EXTSWSLI, + /// The combination of sra[wd]i and addze used to implemented signed /// integer division by a power of 2. The first operand is the dividend, /// and the second is the constant shift amount (representing the Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1351,6 +1351,7 @@ case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; } return nullptr; } @@ -14102,7 +14103,30 @@ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; - return SDValue(); + SDValue N0 = N->getOperand(0); + ConstantSDNode *CN1 = dyn_cast(N->getOperand(1)); + if (!Subtarget.isISA3_0() || + N0.getOpcode() != ISD::SIGN_EXTEND || + N0.getOperand(0).getValueType() != MVT::i32 || + CN1 == nullptr) + return SDValue(); + + // We can't save an operation here if the value is already extended, and + // the existing shift is easier to combine. + SDValue ExtsSrc = N0.getOperand(0); + if (ExtsSrc.getOpcode() == ISD::TRUNCATE && + ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext) + return SDValue(); + + SDLoc DL(N0); + SDValue ShiftBy = SDValue(CN1, 0); + // We want the shift amount to be i32 on the extswli, but the shift could + // have an i64. + if (ShiftBy.getValueType() == MVT::i64) + ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); + + return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), + ShiftBy); } SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { Index: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td @@ -717,9 +717,10 @@ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), +defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH), "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, - []>, isPPC64; + [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, + isPPC64, Requires<[IsISA3_0]>; // For fast-isel: let isCodeGenOnly = 1, Defs = [CARRY] in Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -114,6 +114,10 @@ SDTCisVec<0>, SDTCisPtrTy<1> ]>; +def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli + SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -218,6 +222,8 @@ def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; + // Move 2 i64 values into a VSX register def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, Index: llvm/trunk/llvm/test/CodeGen/PowerPC/extswsli.ll =================================================================== --- llvm/trunk/llvm/test/CodeGen/PowerPC/extswsli.ll +++ llvm/trunk/llvm/test/CodeGen/PowerPC/extswsli.ll @@ -0,0 +1,17 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s + +@z = external local_unnamed_addr global i32*, align 8 + +; Function Attrs: norecurse nounwind readonly +define signext i32 @_Z2tcii(i32 signext %x, i32 signext %y) local_unnamed_addr #0 { +entry: + %0 = load i32*, i32** @z, align 8 + %add = add nsw i32 %y, %x + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom + %1 = load i32, i32* %arrayidx, align 4 + ret i32 %1 +; CHECK-LABEL: @_Z2tcii +; CHECK: extswsli {{r[0-9]+}}, {{r[0-9]+}}, 2 +} Index: llvm/trunk/test/CodeGen/PowerPC/extswsli.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/extswsli.ll +++ llvm/trunk/test/CodeGen/PowerPC/extswsli.ll @@ -0,0 +1,17 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s + +@z = external local_unnamed_addr global i32*, align 8 + +; Function Attrs: norecurse nounwind readonly +define signext i32 @_Z2tcii(i32 signext %x, i32 signext %y) local_unnamed_addr #0 { +entry: + %0 = load i32*, i32** @z, align 8 + %add = add nsw i32 %y, %x + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom + %1 = load i32, i32* %arrayidx, align 4 + ret i32 %1 +; CHECK-LABEL: @_Z2tcii +; CHECK: extswsli {{r[0-9]+}}, {{r[0-9]+}}, 2 +}