diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -911,15 +911,38 @@ unsigned Remainder = 0; // Assume no shift required. unsigned Shift = 0; + // Count trailing zeros. + unsigned TrailingZeros = 0; + // Count trailing ones. + unsigned TrailingOnes = 0; + // Count leading zeros as RLDICL's mask. + unsigned ClearMask = 0; // If it can't be represented as a 32 bit value. if (!isInt<32>(Imm)) { - Shift = countTrailingZeros(Imm); - int64_t ImmSh = static_cast(Imm) >> Shift; - - // If the shifted value fits 32 bits. - if (isInt<32>(ImmSh)) { - // Go with the shifted value. + TrailingZeros = countTrailingZeros(Imm); + TrailingOnes = countTrailingOnes(Imm); + int64_t ImmSh = TrailingZeros ? static_cast(Imm) >> TrailingZeros + : static_cast(Imm) >> TrailingOnes; + + // If the immediate has many trailing zeros, we can take advantage of + // li/lis's sign-extension to generate leading zeros, and then shift it. + // Which requires the sign bit to be 0 during li/lis. + + // If the immediate has many trailing ones, we can take advantage of + // li/lis's sign-extension to generate leading ones, and then mask extra + // bits off after rotation. Which requires the sign bit to be 1 during + // li/lis. + + if (TrailingZeros && isInt<32>(ImmSh)) { + // If the immediate has many trailing zeros and the shifted value fits + // int32, go with the shifted value. + Shift = TrailingZeros; + Imm = ImmSh; + } else if (TrailingOnes && isUInt<32>(ImmSh)) { + // If the immediate has many trailing ones and the shifted value fits + // uint32, go with the shifted value. + ClearMask = countLeadingZeros(Imm); Imm = ImmSh; } else { // Still stuck with a 64 bit value. @@ -940,6 +963,29 @@ return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; + // If the immediate has many trailing ones. + if (ClearMask) { + if (isUInt<16>(Imm)) { + // Set the sign bit to 1. + Imm |= 0x8000; + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Imm)); + } else if (Lo) { + // Handle the Hi bits and set the sign bit to 1. + Hi |= 0x8000; + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); + // And the Lo bits. + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); + } else { + // Just the Hi bits. + Hi |= 0x8000; + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); + } + return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), + getI32Imm(TrailingOnes), + getI32Imm(ClearMask)); + } + // Simple value. if (isInt<16>(Imm)) { uint64_t SextImm = SignExtend64(Lo, 16); diff --git a/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll b/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll --- a/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll +++ b/llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll @@ -38,10 +38,9 @@ define i32 @pattern3(i1 %cond, i32 %x) { ; CHECK-LABEL: pattern3: ; CHECK: # %bb.0: -; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 5, -32768 ; CHECK-NEXT: andi. 3, 3, 1 -; CHECK-NEXT: oris 3, 5, 65535 -; CHECK-NEXT: ori 3, 3, 65535 +; CHECK-NEXT: rldicl 3, 5, 32, 32 ; CHECK-NEXT: iselgt 3, 0, 3 ; CHECK-NEXT: and 3, 3, 4 ; CHECK-NEXT: blr @@ -54,10 +53,10 @@ define i32 @pattern4(i1 %cond, i32 %x) { ; CHECK-LABEL: pattern4: ; CHECK: # %bb.0: -; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 5, -32768 ; CHECK-NEXT: andi. 3, 3, 1 -; CHECK-NEXT: oris 3, 5, 65535 -; CHECK-NEXT: ori 3, 3, 65535 +; CHECK-NEXT: rldicl 3, 5, 32, 32 +; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: iselgt 3, 3, 5 ; CHECK-NEXT: or 3, 4, 3 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -107,9 +107,8 @@ ret i64 4294967295 ; CHECK-LABEL: @uint32_2 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori 3, [[REG2]], 65535 +; CHECK: li [[REG1:[0-9]+]], -32768 +; CHECK: rldicl 3, [[REG1]], 32, 32 ; CHECK: blr } @@ -118,9 +117,8 @@ ret i32 -1 ; CHECK-LABEL: @uint32_2_i32 -; CHECK: li [[REG1:[0-9]+]], 0 -; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori 3, [[REG2]], 65535 +; CHECK: li [[REG1:[0-9]+]], -32768 +; CHECK: rldicl 3, [[REG1]], 32, 32 ; CHECK: blr } @@ -151,10 +149,8 @@ ret i64 10460594175 ; CHECK-LABEL: @cn_ones_1 -; CHECK: li [[REG1:[0-9]+]], 2 -; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 -; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28543 -; CHECK: ori 3, [[REG3]], 65535 +; CHECK: li [[REG1:[0-9]+]], -31522 +; CHECK: rldicl 3, [[REG1]], 23, 30 ; CHECK: blr } @@ -163,10 +159,19 @@ ret i64 10459119615 ; CHECK-LABEL: @cn_ones_2 -; CHECK: li [[REG1:[0-9]+]], 2 -; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 -; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28521 -; CHECK: ori 3, [[REG3]], 32767 +; CHECK: lis [[REG1:[0-9]+]], -32764 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 57042 +; CHECK: rldicl 3, [[REG2]], 15, 30 +; CHECK: blr +} + +define i64 @cn_ones_3() #0 { +entry: + ret i64 47244673023 + +; CHECK-LABEL: @cn_ones_3 +; CHECK: lis [[REG1:[0-9]+]], -32746 +; CHECK: rldicl 3, [[REG1]], 15, 28 ; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/negctr.ll b/llvm/test/CodeGen/PowerPC/negctr.ll --- a/llvm/test/CodeGen/PowerPC/negctr.ll +++ b/llvm/test/CodeGen/PowerPC/negctr.ll @@ -15,10 +15,9 @@ br i1 %exitcond, label %for.end, label %for.body ; CHECK: @main -; CHECK: li [[REG:[0-9]+]], 0 -; CHECK: oris [[REG2:[0-9]+]], [[REG]], 65535 -; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535 -; CHECK: mtctr [[REG3]] +; CHECK: li [[REG:[0-9]+]], -32768 +; CHECK: rldicl [[REG2:[0-9]+]], [[REG]], 32, 32 +; CHECK: mtctr [[REG2]] ; CHECK: bdnz for.end: ; preds = %for.body, %entry diff --git a/llvm/test/CodeGen/PowerPC/ori_imm64.ll b/llvm/test/CodeGen/PowerPC/ori_imm64.ll --- a/llvm/test/CodeGen/PowerPC/ori_imm64.ll +++ b/llvm/test/CodeGen/PowerPC/ori_imm64.ll @@ -37,10 +37,9 @@ define i64 @ori_test_4(i64 %a) { ; CHECK-LABEL: ori_test_4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: li 4, 4112 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: oris 4, 4, 4112 -; CHECK-NEXT: ori 4, 4, 65535 +; CHECK-NEXT: lis 4, -28656 +; CHECK-NEXT: ori 4, 4, 4112 +; CHECK-NEXT: rldicl 4, 4, 16, 19 ; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr entry: