Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -149,6 +149,10 @@ /// For vector types, only the last n bits are used. See vsld. SRL, SRA, SHL, + /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign + /// word and shift left immediate. + EXTSWSLI, + /// The combination of sra[wd]i and addze used to implemented signed /// integer division by a power of 2. The first operand is the dividend, /// and the second is the constant shift amount (representing the Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1353,6 +1353,7 @@ case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; } return nullptr; } @@ -14104,6 +14105,35 @@ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (Subtarget.isISA3_0() && + N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getValueType() == MVT::i32 && + isConstOrConstSplat(N1)) { + + // We can't save an operation here if the value is already extended, and + // the existing shift is easier to combine. + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::TRUNCATE) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::AssertSext) + return SDValue(); + } + + SDLoc DL(N0); + SDValue ShiftBy = N1; + // We want the shift amount to be i32 on the extswli, but the shift could + // have an i64. + if (ShiftBy.getValueType() == MVT::i64) { + int Val = cast(N1)->getZExtValue(); + ShiftBy = DCI.DAG.getConstant(Val, DL, MVT::i32); + } + + return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), + ShiftBy); + } + return SDValue(); } Index: llvm/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -717,9 +717,10 @@ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), +defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH), "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, - []>, isPPC64; + [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, + isPPC64, Requires<[IsISA3_0]>; // For fast-isel: let isCodeGenOnly = 1, Defs = [CARRY] in Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -114,6 +114,10 @@ SDTCisVec<0>, SDTCisPtrTy<1> ]>; +def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli + SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -218,6 +222,8 @@ def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; + // Move 2 i64 values into a VSX register def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, Index: llvm/test/CodeGen/PowerPC/extswsli.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/extswsli.ll @@ -0,0 +1,17 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck %s + +@z = external local_unnamed_addr global i32*, align 8 + +; Function Attrs: norecurse nounwind readonly +define signext i32 @_Z2tcii(i32 signext %x, i32 signext %y) local_unnamed_addr #0 { +entry: + %0 = load i32*, i32** @z, align 8 + %add = add nsw i32 %y, %x + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom + %1 = load i32, i32* %arrayidx, align 4 + ret i32 %1 +; CHECK-LABEL: @_Z2tcii +; CHECK: extswsli {{r[0-9]+}}, {{r[0-9]+}}, 2 +}