diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1033,12 +1033,50 @@ return nullptr; } +// Try to select instructions to generate a 64 bit immediate using prefix as +// well as non prefix instructions. The function will return the SDNode +// to materialize that constant or it will return nullptr if it does not +// find one. The variable InstCnt is set to the number of instructions that +// were selected. +static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, + uint64_t Imm, unsigned &InstCnt) { + // Following patterns use 1 instruction to materialize Imm. + InstCnt = 1; + + // The pli instruction can materialize up to 34 bits directly. + // It is defined in the TD file and so we just return the constant. + if (isInt<34>(Imm)) + return cast(CurDAG->getConstant(Imm, dl, MVT::i64)); + + InstCnt = 0; + return nullptr; +} + static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt = nullptr) { unsigned InstCntDirect = 0; // No more than 3 instructions is used if we can select the i64 immediate // directly. SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); + + const PPCSubtarget &Subtarget = + CurDAG->getMachineFunction().getSubtarget(); + + if (Subtarget.hasPrefixInstrs()) { + unsigned InstCntDirectP = 0; + SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); + // Use the prefix case in either of two cases: + // 1) We have no result from the non-prefix case to use. + // 2) The non-prefix case uses more instructions than the prefix case. + // If the prefix and non-prefix cases use the same number of instructions + // we will prefer the non-prefix case. + if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { + if (InstCnt) + *InstCnt = InstCntDirectP; + return ResultP; + } + } + if (Result) { if (InstCnt) *InstCnt = InstCntDirect; @@ -4728,8 +4766,11 @@ case ISD::Constant: if (N->getValueType(0) == MVT::i64) { - ReplaceNode(N, selectI64Imm(CurDAG, N)); - return; + SDNode *ResNode = selectI64Imm(CurDAG, N); + if (!isa(ResNode)) { + ReplaceNode(N, ResNode); + return; + } } break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1096,6 +1096,8 @@ break; case PPC::LI: case PPC::LI8: + case PPC::PLI: + case PPC::PLI8: case PPC::LIS: case PPC::LIS8: case PPC::ADDIStocHA: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -398,6 +398,14 @@ SDLoc(N), MVT::i32); }]>; +def imm34 : PatLeaf<(imm), [{ + return isInt<34>(N->getSExtValue()); +}]>; + +def getImmAs64BitInt : SDNodeXFormgetSExtValue(), SDLoc(N)); +}]>; + def SHL32 : SDNodeXFormgetZExtValue(), SDLoc(N)); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2643,6 +2643,8 @@ } let Predicates = [PrefixInstrs] in { + def : Pat<(i32 imm34:$imm), (PLI (getImmAs64BitInt imm:$imm))>; + def : Pat<(i64 imm34:$imm), (PLI8 (getImmAs64BitInt imm:$imm))>; def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C, timm:$D)), (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $B, VSRC), diff --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-constants.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-constants.ll @@ -0,0 +1,290 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK32 + +; These test cases aim to test constant materialization using the pli instruction on Power10. + +define signext i32 @t_16BitsMinRequiring34Bits() { +; CHECK-LABEL: t_16BitsMinRequiring34Bits: +; CHECK: pli r3, 32768 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16BitsMinRequiring34Bits: +; CHECK32: pli r3, 32768 +; CHECK32-NEXT: blr + +entry: + ret i32 32768 +} + +define signext i32 @t_16Bits() { +; CHECK-LABEL: t_16Bits: +; CHECK: pli r3, 62004 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16Bits: +; CHECK32: pli r3, 62004 +; CHECK32-NEXT: blr + +entry: + ret i32 62004 +} + +define signext i32 @t_lt32gt16BitsNonShiftable() { +; CHECK-LABEL: t_lt32gt16BitsNonShiftable: +; CHECK: pli r3, 1193046 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt32gt16BitsNonShiftable: +; CHECK32: pli r3, 1193046 +; CHECK32-NEXT: blr + +entry: + ret i32 1193046 +} + +define signext i32 @t_32Bits() { +; CHECK-LABEL: t_32Bits: +; CHECK: pli r3, -231451016 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32Bits: +; CHECK32: pli r3, -231451016 +; CHECK32-NEXT: blr + +entry: + ret i32 -231451016 +} + +define i64 @t_34BitsLargestPositive() { +; CHECK-LABEL: t_34BitsLargestPositive: +; CHECK: pli r3, 8589934591 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34BitsLargestPositive: +; CHECK32: li r3, 1 +; CHECK32-NEXT: li r4, -1 +; CHECK32-NEXT: blr + +entry: + ret i64 8589934591 +} + +define i64 @t_neg34Bits() { +; CHECK-LABEL: t_neg34Bits: +; CHECK: pli r3, -8284514696 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neg34Bits: +; CHECK32: li r3, -2 +; CHECK32-NEXT: pli r4, 305419896 +; CHECK32-NEXT: blr + +entry: + ret i64 -8284514696 +} + +define signext i32 @t_16BitsMinRequiring34BitsMinusOne() { +; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne: +; CHECK: li r3, 32767 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_16BitsMinRequiring34BitsMinusOne: +; CHECK32: li r3, 32767 +; CHECK32-NEXT: blr + +entry: + ret i32 32767 +} + +define signext i32 @t_lt16Bits() { +; CHECK-LABEL: t_lt16Bits: +; CHECK: li r3, 291 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt16Bits: +; CHECK32: li r3, 291 +; CHECK32-NEXT: blr + +entry: + ret i32 291 +} + +define signext i32 @t_neglt16Bits() { +; CHECK-LABEL: t_neglt16Bits: +; CHECK: li r3, -3805 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neglt16Bits: +; CHECK32: li r3, -3805 +; CHECK32-NEXT: blr + +entry: + ret i32 -3805 +} + +define signext i32 @t_neg16Bits() { +; CHECK-LABEL: t_neg16Bits: +; CHECK: li r3, -32204 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_neg16Bits: +; CHECK32: li r3, -32204 +; CHECK32-NEXT: blr + +entry: + ret i32 -32204 +} + +define signext i32 @t_lt32gt16BitsShiftable() { +; CHECK-LABEL: t_lt32gt16BitsShiftable: +; CHECK: lis r3, 18 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_lt32gt16BitsShiftable: +; CHECK32: lis r3, 18 +; CHECK32-NEXT: blr + +entry: + ret i32 1179648 +} + +define signext i32 @t_32gt16BitsShiftable() { +; CHECK-LABEL: t_32gt16BitsShiftable: +; CHECK: lis r3, -3532 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32gt16BitsShiftable: +; CHECK32: lis r3, -3532 +; CHECK32-NEXT: blr + +entry: + ret i32 -231473152 +} + +define signext i32 @t_32BitsZero() { +; CHECK-LABEL: t_32BitsZero: +; CHECK: li r3, 0 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32BitsZero: +; CHECK32: li r3, 0 +; CHECK32-NEXT: blr + +entry: + ret i32 0 +} + +define signext i32 @t_32BitsAllOnes() { +; CHECK-LABEL: t_32BitsAllOnes: +; CHECK: li r3, -1 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_32BitsAllOnes: +; CHECK32: li r3, -1 +; CHECK32-NEXT: blr + +entry: + ret i32 -1 +} + +define i64 @t_34BitsLargestPositivePlus() { +; CHECK-LABEL: t_34BitsLargestPositivePlus: +; CHECK: li r3, 1 +; CHECK-NEXT: rldic r3, r3, 33, 30 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34BitsLargestPositivePlus: +; CHECK32: li r3, 2 +; CHECK32-NEXT: li r4, 0 +; CHECK32-NEXT: blr + +entry: + ret i64 8589934592 +} + +define i64 @t_34Bits() { +; CHECK-LABEL: t_34Bits: +; CHECK: lis r3, 25158 +; CHECK-NEXT: ori r3, r3, 35535 +; CHECK-NEXT: rldic r3, r3, 3, 30 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_34Bits: +; CHECK32: li r3, 3 +; CHECK32-NEXT: pli r4, 305419896 +; CHECK32-NEXT: blr + +entry: + ret i64 13190321784 +} + +define i64 @t_35Bits() { +; CHECK-LABEL: t_35Bits: +; CHECK: lis r3, -442 +; CHECK-NEXT: ori r3, r3, 35535 +; CHECK-NEXT: rldic r3, r3, 3, 29 +; CHECK-NEXT: blr +; CHECK32-LABEL: t_35Bits: +; CHECK32: li r3, 7 +; CHECK32-NEXT: pli r4, -231451016 +; CHECK32-NEXT: blr + +entry: + ret i64 34128287352 +} + +; The load immediates resulting from phi-nodes are needed to test whether +; li/lis is preferred to pli by the instruction selector. +define dso_local void @t_phiNode() { +; CHECK-LABEL: t_phiNode: +; CHECK: lis r6, 18 +; CHECK-NEXT: li r5, 291 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: cmpwi r3, 1 +; CHECK-NEXT: li r3, -1 +; CHECK: pli r6, 2147483647 +; CHECK-NEXT: pli r5, 1193046 +; CHECK-NEXT: pli r4, 32768 +; CHECK-NEXT: pli r3, -231451016 +; CHECK32-LABEL: t_phiNode: +; CHECK32: lis r6, 18 +; CHECK32-NEXT: li r5, 291 +; CHECK32-NEXT: li r4, 0 +; CHECK32-NEXT: cmpwi r3, 1 +; CHECK32-NEXT: li r3, -1 +; CHECK32: pli r6, 2147483647 +; CHECK32-NEXT: pli r5, 1193046 +; CHECK32-NEXT: pli r4, 32768 +; CHECK32-NEXT: pli r3, -231451016 + +entry: + br label %while.body + +while.body: ; preds = %if.else.i, %entry + br label %while.body.i + +while.body.i: ; preds = %sw.epilog.i, %while.body + %a.1.i = phi i32 [ %a.2.i, %sw.epilog.i ], [ -1, %while.body ] + %b.1.i = phi i32 [ %b.2.i, %sw.epilog.i ], [ 0, %while.body ] + %c.1.i = phi i32 [ %c.2.i, %sw.epilog.i ], [ 291, %while.body ] + %d.1.i = phi i32 [ %d.2.i, %sw.epilog.i ], [ 1179648, %while.body ] + %0 = load i8, i8* null, align 1 + %cmp1.i = icmp eq i8 %0, 1 + br i1 %cmp1.i, label %if.then.i, label %if.else.i + +if.then.i: ; preds = %while.body.i + switch i8 undef, label %sw.default.i [ + i8 3, label %sw.epilog.i + i8 2, label %sw.bb1.i + ] + +sw.bb1.i: ; preds = %if.then.i + br label %sw.epilog.i + +sw.default.i: ; preds = %if.then.i + unreachable + +sw.epilog.i: ; preds = %sw.bb2.i, %sw.bb1.i, %if.then.i + %a.2.i = phi i32 [ -231451016, %sw.bb1.i ], [ %a.1.i, %if.then.i ] + %b.2.i = phi i32 [ 32768, %sw.bb1.i ], [ %b.1.i, %if.then.i ] + %c.2.i = phi i32 [ 1193046, %sw.bb1.i ], [ %c.1.i, %if.then.i ] + %d.2.i = phi i32 [ 2147483647, %sw.bb1.i ], [ %d.1.i, %if.then.i ] + br label %while.body.i + +if.else.i: ; preds = %while.body.i + call void @func2(i32 signext %a.1.i, i32 signext %b.1.i, i32 signext %c.1.i, i32 signext %d.1.i) + br label %while.body +} + +declare void @func2(i32, i32, i32, i32) diff --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll @@ -317,32 +317,28 @@ ; test case is a constant that fits within 34-bits. ; CHECK-LABEL: test_ldst_7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: ori r5, r5, 32799 +; CHECK-NEXT: pli r5, 32799 ; CHECK-NEXT: lxvpx vsp0, r3, r5 ; CHECK-NEXT: stxvpx vsp0, r4, r5 ; CHECK-NEXT: blr ; ; CHECK-NOMMA-LABEL: test_ldst_7: ; CHECK-NOMMA: # %bb.0: # %entry -; CHECK-NOMMA-NEXT: li r5, 0 -; CHECK-NOMMA-NEXT: ori r5, r5, 32799 +; CHECK-NOMMA-NEXT: pli r5, 32799 ; CHECK-NOMMA-NEXT: lxvpx vsp0, r3, r5 ; CHECK-NOMMA-NEXT: stxvpx vsp0, r4, r5 ; CHECK-NOMMA-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: li r5, 0 -; CHECK-BE-NEXT: ori r5, r5, 32799 +; CHECK-BE-NEXT: pli r5, 32799 ; CHECK-BE-NEXT: lxvpx vsp0, r3, r5 ; CHECK-BE-NEXT: stxvpx vsp0, r4, r5 ; CHECK-BE-NEXT: blr ; ; CHECK-BE-NOMMA-LABEL: test_ldst_7: ; CHECK-BE-NOMMA: # %bb.0: # %entry -; CHECK-BE-NOMMA-NEXT: li r5, 0 -; CHECK-BE-NOMMA-NEXT: ori r5, r5, 32799 +; CHECK-BE-NOMMA-NEXT: pli r5, 32799 ; CHECK-BE-NOMMA-NEXT: lxvpx vsp0, r3, r5 ; CHECK-BE-NOMMA-NEXT: stxvpx vsp0, r4, r5 ; CHECK-BE-NOMMA-NEXT: blr