Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1190,7 +1190,7 @@ case PPC::RLWINMbm: case PPC::RLWINMobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; @@ -1207,7 +1207,7 @@ case PPC::RLWIMIbm: case PPC::RLWIMIobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; @@ -1225,7 +1225,7 @@ case PPC::RLWNMbm: case PPC::RLWNMobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; Index: lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -63,11 +63,23 @@ /// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so /// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, /// since all 1s are not contiguous. -static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { +/// So far, isRunOfOnes supports only 32-bit and 64-bit unsigned integer types. +template +static inline bool isRunOfOnes(T Val, unsigned &MB, unsigned &ME) { + static_assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + (std::numeric_limits::digits == 32 || + std::numeric_limits::digits == 64), + "isRunOfOnes supports only 32-bit and 64-bit unsigned integer"); + if (!Val) return false; - if (isShiftedMask_32(Val)) { + const bool Is64Bit = (std::numeric_limits::digits == 64); + + bool IsShiftedMask = Is64Bit ? isShiftedMask_64(Val) : + isShiftedMask_32(Val); + if (IsShiftedMask) { // look for the first non-zero bit MB = countLeadingZeros(Val); // look for the first zero bit after the run of ones @@ -75,7 +87,9 @@ return true; } else { Val = ~Val; // invert mask - if (isShiftedMask_32(Val)) { + IsShiftedMask = Is64Bit ? isShiftedMask_64(Val) : + isShiftedMask_32(Val); + if (IsShiftedMask) { // effectively look for the first zero bit ME = countLeadingZeros(Val) - 1; // effectively look for the first one bit after the run of zeros Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -204,6 +204,10 @@ bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); + /// tryRotateThenMaskInsert - Returns true if N is replaced by + /// RLDIMI/RLWIMI instruction. + template + bool tryRotateThenMaskInsert(SDNode *N); // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into // an X-Form load instruction with the offset being a relocation coming from // the PPCISD::ADD_TLS. @@ -595,6 +599,38 @@ return false; } +/// Find a subtree generated for bitfield insert and convert it with +/// a rotate left then mask insert instruction. +bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { + // Expected nodes + // %and1 = and i64 %val1, MASK + // %and2 = and i64 %val2, ~MASK + // %N = or i64 %and1, %and2 + if (N->getOpcode() != ISD::OR) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0->getOpcode() != ISD::AND || Op1->getOpcode() != ISD::AND) + return false; + + if (N->getValueType(0) == MVT::i32) { + unsigned Mask1 = 0, Mask2 = 0; + if (isInt32Immediate(Op0->getOperand(1), Mask1) && + isInt32Immediate(Op1->getOperand(1), Mask2) && + Mask1 == ~Mask2) + return tryRotateThenMaskInsert(N); + } + if (N->getValueType(0) == MVT::i64) { + uint64_t Mask1 = 0, Mask2 = 0; + if (isInt64Immediate(Op0->getOperand(1), Mask1) && + isInt64Immediate(Op1->getOperand(1), Mask2) && + Mask1 == ~Mask2) + return tryRotateThenMaskInsert(N); + } + return false; +} + bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Base = ST->getBasePtr(); if (Base.getOpcode() != PPCISD::ADD_TLS) @@ -681,7 +717,14 @@ /// Turn an or of two masked values into the rotate left word immediate then /// mask insert (rlwimi) instruction. -bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { +template +bool PPCDAGToDAGISel::tryRotateThenMaskInsert(SDNode *N) { + static_assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + (std::numeric_limits::digits == 32 || + std::numeric_limits::digits == 64), + "tryRotateThenMaskInsert does not support this type"); + unsigned BitSize = std::numeric_limits::digits; SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); @@ -690,13 +733,15 @@ CurDAG->computeKnownBits(Op0, LKnown); CurDAG->computeKnownBits(Op1, RKnown); - unsigned TargetMask = LKnown.Zero.getZExtValue(); - unsigned InsertMask = RKnown.Zero.getZExtValue(); + T TargetMask = LKnown.Zero.getZExtValue(); + T InsertMask = RKnown.Zero.getZExtValue(); - if ((TargetMask | InsertMask) == 0xFFFFFFFF) { + // If all bits come from two source registers, + // we can use rldimi/rlwimi instruction. + if ((TargetMask | InsertMask) == (T)-1) { unsigned Op0Opc = Op0.getOpcode(); unsigned Op1Opc = Op1.getOpcode(); - unsigned Value, SH = 0; + unsigned Value = 0, SH = 0; TargetMask = ~TargetMask; InsertMask = ~InsertMask; @@ -726,7 +771,8 @@ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); - SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; + SH = (Op1Opc == ISD::SHL) ? Value : BitSize - Value; + Op1Opc = Op1.getOpcode(); } if (Op1Opc == ISD::AND) { // The AND mask might not be a constant, and we need to make sure that @@ -742,15 +788,44 @@ // Note that Value must be in range here (less than 32) because // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); - SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; + SH = (SHOpc == ISD::SHL) ? Value : BitSize - Value; + Op1Opc = Op1.getOpcode(); } } - SH &= 31; - SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; - ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); - return true; + SH &= (BitSize - 1); + + if (BitSize == 32) { + // We eliminate AND instructions if they are already folded into rlwimi. + if (Op1Opc == ISD::AND && isInt32Immediate(Op1.getOperand(1), Value) && + (Value << SH) == InsertMask) + Op1 = Op1.getOperand(0); + if (Op0Opc == ISD::AND && isInt32Immediate(Op0.getOperand(1), Value) && + Value == ~InsertMask) + Op0 = Op0.getOperand(0); + + SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), + getI32Imm(ME, dl) }; + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); + return true; + } + else { + // We cannot specify ME for rldimi; ~SH is used instead. + if (ME == 63 - SH) { + // We omit AND instructions if they are already folded into rldimi. + uint64_t Value64 = 0; + if (Op1Opc == ISD::AND && isInt64Immediate(Op1.getOperand(1), Value64) && + (Value64 << SH) == InsertMask) + Op1 = Op1.getOperand(0); + if (Op0Opc == ISD::AND && isInt64Immediate(Op0.getOperand(1), Value64) && + Value64 == ~InsertMask) + Op0 = Op0.getOperand(0); + + SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl) }; + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops)); + return true; + } + } } } return false; @@ -3978,7 +4053,14 @@ N->getOperand(1).getOpcode() == ISD::TargetConstant) llvm_unreachable("Invalid ADD with TargetConstant operand"); - // Try matching complex bit permutations before doing anything else. + // Find opportunity to use rotate left immediate then mask insert instruction + // for a simple bitfield insert, i.e. (or (and %a, MASK) (and %b, ~MASK)) + // before tryBitPermutation, which may generate suboptimal machine IR; + // Leave more complicated cases for tryBitPermutation. + if (tryBitfieldInsert(N)) + return; + + // Try matching complex bit permutations next. if (tryBitPermutation(N)) return; @@ -4268,9 +4350,9 @@ break; } case ISD::OR: { - if (N->getValueType(0) == MVT::i32) - if (tryBitfieldInsert(N)) - return; + if ((N->getValueType(0) == MVT::i32 && tryRotateThenMaskInsert(N)) || + (N->getValueType(0) == MVT::i64 && tryRotateThenMaskInsert(N))) + return; int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && Index: test/CodeGen/PowerPC/addi-offset-fold.ll =================================================================== --- test/CodeGen/PowerPC/addi-offset-fold.ll +++ test/CodeGen/PowerPC/addi-offset-fold.ll @@ -30,9 +30,8 @@ ; CHECK: ori 2, 2, 0 ; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1) ; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1) -; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32 -; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]] -; CHECK: rldicl 3, [[REG4]], 33, 57 +; CHECK-DAG: rldimi [[REG3:[0-9]+]], [[REG1]], 32, 24 +; CHECK: rldicl 3, [[REG3]], 33, 57 ; CHECK: blr } Index: test/CodeGen/PowerPC/bitfieldinsert.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/bitfieldinsert.ll @@ -0,0 +1,63 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; bitfieldinsert64: Test for rldimi +; equivalent C code +; struct s64 { +; int a:5; +; int b:16; +; long c:42; +; }; +; void bitfieldinsert64(struct s *p, unsigned short v) { +; p->b = v; +; } + +%struct.s64 = type { i64 } + +define void @bitfieldinsert64(%struct.s64* nocapture %p, i16 zeroext %v) { +; CHECK-LABEL: @bitfieldinsert64 +; CHECK: ld [[REG1:[0-9]+]], 0(3) +; CHECK: rldimi [[REG1]], 4, 5, 43 +; CHECK: std [[REG1]], 0(3) +; CHECK: blr +entry: + %0 = getelementptr inbounds %struct.s64, %struct.s64* %p, i64 0, i32 0 + %1 = zext i16 %v to i64 + %bf.load = load i64, i64* %0, align 8 + %bf.shl = shl nuw nsw i64 %1, 5 + %bf.clear = and i64 %bf.load, -2097121 + %bf.set = or i64 %bf.clear, %bf.shl + store i64 %bf.set, i64* %0, align 8 + ret void +} + +; bitfieldinsert32: Test for rlwimi +; equivalent C code +; struct s32 { +; int a:8; +; int b:16; +; int c:8; +; }; +; void bitfieldinsert32(struct s32 *p, unsigned int v) { +; p->b = v; +; } + +%struct.s32 = type { i32 } + +define void @bitfieldinsert32(%struct.s32* nocapture %p, i32 zeroext %v) { +; CHECK-LABEL: @bitfieldinsert32 +; CHECK: lwz [[REG1:[0-9]+]], 0(3) +; CHECK: rlwimi [[REG1]], 4, 8, 8, 23 +; CHECK: stw [[REG1]], 0(3) +; CHECK: blr +entry: + %0 = getelementptr inbounds %struct.s32, %struct.s32* %p, i64 0, i32 0 + %bf.load = load i32, i32* %0, align 4 + %bf.value = shl i32 %v, 8 + %bf.shl = and i32 %bf.value, 16776960 + %bf.clear = and i32 %bf.load, -16776961 + %bf.set = or i32 %bf.clear, %bf.shl + store i32 %bf.set, i32* %0, align 4 + ret void +} + Index: test/CodeGen/PowerPC/ppc64le-aggregates.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -236,14 +236,12 @@ ; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1) ; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1) ; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1) -; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1) +; CHECK-DAG: lwz 9, [[OFF0]](1) ; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1) -; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1) +; CHECK-DAG: lwz 10, [[OFF2]](1) ; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1) -; CHECK-DAG: sldi [[REG1]], [[REG1]], 32 -; CHECK-DAG: sldi [[REG3]], [[REG3]], 32 -; CHECK-DAG: or 9, [[REG0]], [[REG1]] -; CHECK-DAG: or 10, [[REG2]], [[REG3]] +; CHECK-DAG: rldimi 9, [[REG1]], 32, 0 +; CHECK-DAG: rldimi 10, [[REG3]], 32, 0 ; CHECK: bl test1 declare void @test1([8 x float], [8 x float])