diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1024,6 +1024,21 @@ static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt) { + unsigned TZ = countTrailingZeros(Imm); + unsigned LZ = countLeadingZeros(Imm); + unsigned TO = countTrailingOnes(Imm); + unsigned FO = countLeadingOnes(Imm << LZ); + unsigned Hi32 = Hi_32(Imm); + unsigned Lo32 = Lo_32(Imm); + + auto getI32Imm = [CurDAG, dl](unsigned Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + }; + + auto getI64Imm = [CurDAG, dl](uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i64); + }; + // Following patterns use 1 instruction to materialize Imm. InstCnt = 1; @@ -1032,8 +1047,98 @@ if (isInt<34>(Imm)) return cast(CurDAG->getConstant(Imm, dl, MVT::i64)); - InstCnt = 0; - return nullptr; + // Require at least two instructions. + InstCnt = 2; + SDNode *Result = nullptr; + // Patterns : {zeros}{ones}{33-bit value}{zeros} + // {zeros}{33-bit value}{zeros} + // {zeros}{ones}{33-bit value} + // {ones}{33-bit value}{zeros} + // We can take advantage of PLI's sign-extension semantics to generate leading + // ones, and then use RLDIC to mask off the ones on both sides after rotation. + if ((LZ + FO + TZ) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TZ), getI32Imm(LZ)); + } + // Pattern : {zeros}{33-bit value}{ones} + // Shift right the Imm by (30 - LZ) bits to construct a negtive 34 bit value, + // therefore we can take advantage of PLI's sign-extension semantics, and then + // mask them off after rotation. + // + // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+ + // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| + // +------------------------+ +------------------------+ + // 63 0 63 0 + // + // +----sext-----|--34-bit--+ +clear-|-----------------+ + // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| + // +------------------------+ +------------------------+ + // 63 0 63 0 + if ((LZ + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(30 - LZ), getI32Imm(LZ)); + } + // Patterns : {zeros}{ones}{33-bit value}{ones} + // {ones}{33-bit value}{ones} + // Similar to LI we can take advantage of PLI's sign-extension semantics to + // generate leading ones, and then use RLDICL to mask off the ones in left + // sides (if required) after rotation. + if ((LZ + FO + TO) > 30) { + APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff); + APInt Extended = SignedInt34.sext(64); + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, + getI64Imm(*Extended.getRawData())); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TO), getI32Imm(LZ)); + } + // Patterns : {******}{31 zeros}{******} + // : {******}{31 ones}{******} + // If Imm contains 31 consecutive zeros/ones then the remaining bit count + // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI + // for the int<33> value and then use RLDICL without a mask to rotate it back. + // + // +------|--ones--|------+ +---ones--||---33 bit--+ + // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| + // +----------------------+ +----------------------+ + // 63 0 63 0 + for (unsigned Shift = 0; Shift < 63; ++Shift) { + uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); + if (isInt<34>(RotImm)) { + Result = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm)); + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Shift), + getI32Imm(0)); + } + } + + // Patterns : High word == Low word + // This is basically a splat of a 32 bit immediate. + if (Hi32 == Lo32) { + Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + + InstCnt = 3; + // Catch-all + // This pattern can form any 64 bit immediate in 3 instructions. + SDNode *ResultHi = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32)); + SDNode *ResultLo = + CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32)); + SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll --- a/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll @@ -22,11 +22,9 @@ ; CHECK-NEXT: paddi r3, 0, .L.str@PCREL, 1 ; CHECK-NEXT: bl printf@notoc ; CHECK-NEXT: ld r4, 104(r1) -; CHECK-NEXT: lis r3, 8201 -; CHECK-NEXT: ori r3, r3, 64225 -; CHECK-NEXT: rldic r3, r3, 33, 1 -; CHECK-NEXT: oris r3, r3, 36700 -; CHECK-NEXT: ori r3, r3, 10486 +; CHECK-NEXT: pli r5, 1075049922 +; CHECK-NEXT: pli r3, 2405181686 +; CHECK-NEXT: rldimi r3, r5, 32, 0 ; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-constants.ll --- a/llvm/test/CodeGen/PowerPC/p10-constants.ll +++ b/llvm/test/CodeGen/PowerPC/p10-constants.ll @@ -10,8 +10,9 @@ define signext i32 @t_16BitsMinRequiring34Bits() { ; CHECK-LABEL: t_16BitsMinRequiring34Bits: -; CHECK: pli r3, 32768 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 32768 +; CHECK-NEXT: blr entry: ret i32 32768 @@ -19,8 +20,9 @@ define signext i32 @t_16Bits() { ; CHECK-LABEL: t_16Bits: -; CHECK: pli r3, 62004 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 62004 +; CHECK-NEXT: blr entry: ret i32 62004 @@ -28,8 +30,9 @@ define signext i32 @t_lt32gt16BitsNonShiftable() { ; CHECK-LABEL: t_lt32gt16BitsNonShiftable: -; CHECK: pli r3, 1193046 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 1193046 +; CHECK-NEXT: blr entry: ret i32 1193046 @@ -37,8 +40,9 @@ define signext i32 @t_32Bits() { ; CHECK-LABEL: t_32Bits: -; CHECK: pli r3, -231451016 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, -231451016 +; CHECK-NEXT: blr entry: ret i32 -231451016 @@ -46,8 +50,9 @@ define i64 @t_34BitsLargestPositive() { ; CHECK-LABEL: t_34BitsLargestPositive: -; CHECK: pli r3, 8589934591 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 8589934591 +; CHECK-NEXT: blr entry: ret i64 8589934591 @@ -55,8 +60,9 @@ define i64 @t_neg34Bits() { ; CHECK-LABEL: t_neg34Bits: -; CHECK: pli r3, -8284514696 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, -8284514696 +; CHECK-NEXT: blr entry: ret i64 -8284514696 @@ -64,8 +70,9 @@ define signext i32 @t_16BitsMinRequiring34BitsMinusOne() { ; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne: -; CHECK: li r3, 32767 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, 32767 +; CHECK-NEXT: blr entry: ret i32 32767 @@ -73,8 +80,9 @@ define signext i32 @t_lt16Bits() { ; CHECK-LABEL: t_lt16Bits: -; CHECK: li r3, 291 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, 291 +; CHECK-NEXT: blr entry: ret i32 291 @@ -82,8 +90,9 @@ define signext i32 @t_neglt16Bits() { ; CHECK-LABEL: t_neglt16Bits: -; CHECK: li r3, -3805 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, -3805 +; CHECK-NEXT: blr entry: ret i32 -3805 @@ -91,8 +100,9 @@ define signext i32 @t_neg16Bits() { ; CHECK-LABEL: t_neg16Bits: -; CHECK: li r3, -32204 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, -32204 +; CHECK-NEXT: blr entry: ret i32 -32204 @@ -100,8 +110,9 @@ define signext i32 @t_lt32gt16BitsShiftable() { ; CHECK-LABEL: t_lt32gt16BitsShiftable: -; CHECK: lis r3, 18 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r3, 18 +; CHECK-NEXT: blr entry: ret i32 1179648 @@ -109,8 +120,9 @@ define signext i32 @t_32gt16BitsShiftable() { ; CHECK-LABEL: t_32gt16BitsShiftable: -; CHECK: lis r3, -3532 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r3, -3532 +; CHECK-NEXT: blr entry: ret i32 -231473152 @@ -118,8 +130,9 @@ define signext i32 @t_32BitsZero() { ; CHECK-LABEL: t_32BitsZero: -; CHECK: li r3, 0 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr entry: ret i32 0 @@ -127,8 +140,9 @@ define signext i32 @t_32BitsAllOnes() { ; CHECK-LABEL: t_32BitsAllOnes: -; CHECK: li r3, -1 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, -1 +; CHECK-NEXT: blr entry: ret i32 -1 @@ -136,9 +150,10 @@ define i64 @t_34BitsLargestPositivePlus() { ; CHECK-LABEL: t_34BitsLargestPositivePlus: -; CHECK: li r3, 1 -; CHECK-NEXT: rldic r3, r3, 33, 30 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: rldic r3, r3, 33, 30 +; CHECK-NEXT: blr entry: ret i64 8589934592 @@ -146,10 +161,10 @@ define i64 @t_34Bits() { ; CHECK-LABEL: t_34Bits: -; CHECK: lis r3, 25158 -; CHECK-NEXT: ori r3, r3, 35535 -; CHECK-NEXT: rldic r3, r3, 3, 30 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 1648790223 +; CHECK-NEXT: rldic r3, r3, 3, 30 +; CHECK-NEXT: blr entry: ret i64 13190321784 @@ -157,28 +172,115 @@ define i64 @t_35Bits() { ; CHECK-LABEL: t_35Bits: -; CHECK: lis r3, -442 -; CHECK-NEXT: ori r3, r3, 35535 -; CHECK-NEXT: rldic r3, r3, 3, 29 -; CHECK-NEXT: blr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 4266035919 +; CHECK-NEXT: rldic r3, r3, 3, 29 +; CHECK-NEXT: blr entry: ret i64 34128287352 } +; (Value >> Shift) can be expressed in 34 bits +define i64 @t_Shift() { +; CHECK-LABEL: t_Shift: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 8522759166 +; CHECK-NEXT: rotldi r3, r3, 48 +; CHECK-NEXT: blr + +entry: + ; 0xFBFE00000001FBFE + ret i64 18157950747604548606 +} + +; Leading Zeros + Following Ones + Trailing Zeros > 30 +define i64 @t_LZFOTZ() { +; CHECK-LABEL: t_LZFOTZ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, -349233 +; CHECK-NEXT: rldic r3, r3, 4, 12 +; CHECK-NEXT: blr + +entry: + ; 0x000FFFFFFFAABCF0 + ret i64 4503599621782768 +} + +; Leading Zeros + Trailing Ones > 30 +define i64 @t_LZTO() { +; CHECK-LABEL: t_LZTO: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, -2684406441 +; CHECK-NEXT: rldicl r3, r3, 11, 19 +; CHECK-NEXT: blr +entry: + ; 0x00001AFFF9AABFFF + ret i64 29686707699711 +} + +; Leading Zeros + Trailing Ones + Following Zeros > 30 +define i64 @t_LZTOFO() { +; CHECK-LABEL: t_LZTOFO: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, -5720033968 +; CHECK-NEXT: rldicl r3, r3, 11, 12 +; CHECK-NEXT: blr +entry: + ; 0x000FF55879AA87FF + ret i64 4491884997806079 +} + +; Requires full expansion +define i64 @t_Full64Bits1() { +; CHECK-LABEL: t_Full64Bits1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r4, 2146500607 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x7FF0FFFFF0FFF0FE + ret i64 9219149911952453886 +} + +; Requires full expansion +define i64 @t_Ful64Bits2() { +; CHECK-LABEL: t_Ful64Bits2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r4, 4042326015 +; CHECK-NEXT: pli r3, 4043305214 +; CHECK-NEXT: rldimi r3, r4, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0xF0F0FFFFF0FFF0FE + ret i64 17361658038238310654 +} + +; A splat of 32 bits: 32 Bits Low == 32 Bits High +define i64 @t_Splat32Bits() { +; CHECK-LABEL: t_Splat32Bits: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pli r3, 262916796 +; CHECK-NEXT: rldimi r3, r3, 32, 0 +; CHECK-NEXT: blr +entry: + ; 0x0FABCABC0FABCABC + ret i64 1129219040652020412 +} + ; The load immediates resulting from phi-nodes are needed to test whether ; li/lis is preferred to pli by the instruction selector. define dso_local void @t_phiNode() { -; CHECK-LABEL: t_phiNode: -; CHECK: lis r6, 18 -; CHECK-NEXT: li r5, 291 -; CHECK-NEXT: li r4, 0 +; CHECK: lis r6, 18 +; CHECK-NEXT: li r5, 291 +; CHECK-NEXT: li r4, 0 ; CHECK-NEXT: cmpwi r3, 1 -; CHECK-NEXT: li r3, -1 -; CHECK: pli r6, 2147483647 -; CHECK-NEXT: pli r5, 1193046 -; CHECK-NEXT: pli r4, 32768 -; CHECK-NEXT: pli r3, -231451016 +; CHECK-NEXT: li r3, -1 +; CHECK: pli r6, 2147483647 +; CHECK-NEXT: pli r5, 1193046 +; CHECK-NEXT: pli r4, 32768 +; CHECK-NEXT: pli r3, -231451016 entry: br label %while.body