diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1040,6 +1040,21 @@ // were selected. static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt) { + unsigned TZ = countTrailingZeros(Imm); + unsigned LZ = countLeadingZeros(Imm); + unsigned TO = countTrailingOnes(Imm); + unsigned FO = countLeadingOnes(Imm << LZ); + unsigned Hi32 = Hi_32(Imm); + unsigned Lo32 = Lo_32(Imm); + + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + + auto getI64Imm = [CurDAG, dl](uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i64); + }; + // Following patterns use 1 instruction to materialize Imm. InstCnt = 1; @@ -1048,8 +1063,98 @@ if (isInt<34>(Imm)) return cast(CurDAG->getConstant(Imm, dl, MVT::i64)); - InstCnt = 0; - return nullptr; + // Require at least two instructions. + InstCnt = 2; + SDNode *Result = nullptr; + // Patterns : {zeros}{ones}{33-bit value}{zeros} + // {zeros}{33-bit value}{zeros} + // {zeros}{ones}{33-bit value} + // {ones}{33-bit value}{zeros} + // We can take advantage of PLI's sign-extension semantics to generate leading + // ones, and then use RLDIC to mask off the ones on both sides after rotation. + if ((LZ + FO + TZ) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // Pattern : {zeros}{33-bit value}{ones} + // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value, + // therefore we can take advantage of PLI's sign-extension semantics, and then + // mask them off after rotation. + // + // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ + // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // + // +----sext-----|--34-bit--+ +clear-|-----------------+ + // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + if ((LZ + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(30 - LZ), getI32Imm(LZ)); + } + // Patterns : {zeros}{ones}{33-bit value}{ones} + // {ones}{33-bit value}{ones} + // Similar to LI we can take advantage of PLI's sign-extension semantics to + // generate leading ones, and then use RLDICL to mask off the ones in left + // sides (if required) after rotation. + if ((LZ + FO + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // Patterns : {******}{31 zeros}{******} + // : {******}{31 ones}{******} + // If Imm contains 31 consecutive zeros/ones then the remaining bit count + // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI + // for the int<33> value and then use RLDICL without a mask to rotate it back. + // + // +------|--ones--|------+ +---ones--||---33 bit--+ + // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + for (unsigned Shift = 0; Shift < 63; ++Shift) { + uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); + if (isInt<34>(RotImm)) { + Result = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Shift), + getI32Imm(0)); + } + } + + // Patterns : High word == Low word + // This is basically a splat of a 32 bit immediate. + if (Hi32 == Lo32) { + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + + InstCnt = 3; + // Catch-all + // This pattern can form any 64 bit immediate in 3 instructions. + SDNode *ResultHi = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDNode *ResultLo = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); + SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll --- a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll @@ -22,11 +22,9 @@ ; CHECK-NEXT: paddi r3, 0, .L.str@PCREL, 1 ; CHECK-NEXT: bl printf@notoc ; CHECK-NEXT: ld r4, 104(r1) -; CHECK-NEXT: lis r3, 8201 -; CHECK-NEXT: ori r3, r3, 64225 -; CHECK-NEXT: rldic r3, r3, 33, 1 -; CHECK-NEXT: oris r3, r3, 36700 -; CHECK-NEXT: ori r3, r3, 10486 +; CHECK-NEXT: pli r5, 1075049922 +; CHECK-NEXT: pli r3, 2405181686 +; CHECK-NEXT: rldimi r3, r5, 32, 0 ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-constants.ll --- a/llvm/test/CodeGen/PowerPC/p10-constants.ll +++ b/llvm/test/CodeGen/PowerPC/p10-constants.ll @@ -196,8 +196,7 @@ define i64 @t_34Bits() { ; CHECK-LABEL: t_34Bits: -; CHECK: lis r3, 25158 -; CHECK-NEXT: ori r3, r3, 35535 +; CHECK: pli r3, 1648790223 ; CHECK-NEXT: rldic r3, r3, 3, 30 ; CHECK-NEXT: blr ; CHECK32-LABEL: t_34Bits: @@ -211,8 +210,7 @@ define i64 @t_35Bits() { ; CHECK-LABEL: t_35Bits: -; CHECK: lis r3, -442 -; CHECK-NEXT: ori r3, r3, 35535 +; CHECK: pli r3, 4266035919 ; CHECK-NEXT: rldic r3, r3, 3, 29 ; CHECK-NEXT: blr ; CHECK32-LABEL: t_35Bits: @@ -224,6 +222,87 @@ ret i64 34128287352 } +; (Value >> Shift) can be expressed in 34 bits +define i64 @t_Shift() { +; CHECK-LABEL: t_Shift: +; CHECK: pli r3, 8522759166 +; CHECK-NEXT: rotldi r3, r3, 48 +; CHECK-NEXT: blr + +entry: + ; 0xFBFE00000001FBFE + ret i64 18157950747604548606 +} + +; Leading Zeros + Following Ones + Trailing Zeros > 30 +define i64 @t_LZFOTZ() { +; CHECK-LABEL: t_LZFOTZ: +; CHECK: pli r3, -349233 +; CHECK-NEXT: rldic r3, r3, 4, 12 +; CHECK-NEXT: blr + +entry: + ; 0x000FFFFFFFAABCF0 + ret i64 4503599621782768 +} + +; Leading Zeros + Trailing Ones > 30 +define i64 @t_LZTO() { +; CHECK-LABEL: t_LZTO: +; CHECK: pli r3, -2684406441 +; CHECK-NEXT: rldicl r3, r3, 11, 19 +; CHECK-NEXT: blr +entry: + ; 0x00001AFFF9AABFFF + ret i64 29686707699711 +} + +; Leading Zeros + Trailing Ones + Following Zeros > 30 +define i64 @t_LZTOFO() { +; CHECK-LABEL: t_LZTOFO: +; CHECK: pli r3, -5720033968 +; CHECK-NEXT: rldicl r3, r3, 11, 12 +; CHECK-NEXT: blr +entry: + ; 0x000FF55879AA87FF + ret i64 4491884997806079 +} + +; Requires full expansion +define i64 @t_Full64Bits1() { +; CHECK-LABEL: t_Full64Bits1: +; CHECK: pli r4, 2146500607 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x7FF0FFFFF0FFF0FE + ret i64 9219149911952453886 +} + +; Requires full expansion +define i64 @t_Ful64Bits2() { +; CHECK-LABEL: t_Ful64Bits2: +; CHECK: pli r4, 4042326015 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0xF0F0FFFFF0FFF0FE + ret i64 17361658038238310654 +} + +; A splat of 32 bits: 32 Bits Low == 32 Bits High +define i64 @t_Splat32Bits() { +; CHECK-LABEL: t_Splat32Bits: +; CHECK: pli r3, 262916796 +; CHECK-NEXT: rldimi r3, r3, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x0FABCABC0FABCABC + ret i64 1129219040652020412 +} + ; The load immediates resulting from phi-nodes are needed to test whether ; li/lis is preferred to pli by the instruction selector. define dso_local void @t_phiNode() {