Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1178,7 +1178,7 @@ case PPC::RLWINMbm: case PPC::RLWINMobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; @@ -1195,7 +1195,7 @@ case PPC::RLWIMIbm: case PPC::RLWIMIobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; @@ -1213,7 +1213,7 @@ case PPC::RLWNMbm: case PPC::RLWNMobm: { unsigned MB, ME; - int64_t BM = Inst.getOperand(3).getImm(); + unsigned BM = Inst.getOperand(3).getImm(); if (!isRunOfOnes(BM, MB, ME)) break; Index: lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -58,11 +58,23 @@ /// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so /// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not, /// since all 1s are not contiguous. -static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { +/// So far, isRunOfOnes supports only 32-bit and 64-bit unsigned integer types. +template +static inline bool isRunOfOnes(T Val, unsigned &MB, unsigned &ME) { + static_assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + (std::numeric_limits::digits == 32 || + std::numeric_limits::digits == 64), + "isRunOfOnes supports only 32-bit and 64-bit unsigned integer"); + if (!Val) return false; - if (isShiftedMask_32(Val)) { + const bool Is64Bit = (std::numeric_limits::digits == 64); + + bool IsShiftedMask = Is64Bit ? isShiftedMask_64(Val): + isShiftedMask_32(Val); + if (IsShiftedMask) { // look for the first non-zero bit MB = countLeadingZeros(Val); // look for the first zero bit after the run of ones @@ -70,7 +82,9 @@ return true; } else { Val = ~Val; // invert mask - if (isShiftedMask_32(Val)) { + IsShiftedMask = Is64Bit ? isShiftedMask_64(Val): + isShiftedMask_32(Val); + if (IsShiftedMask) { // effectively look for the first zero bit ME = countLeadingZeros(Val) - 1; // effectively look for the first one bit after the run of zeros Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -169,6 +169,11 @@ bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); + /// tryRotateThenMaskInsert - Returns true if N is replaced by + /// RLDIMI/RLWIMI instruction. + bool tryRotateThenMaskInsert32(SDNode *N); + bool tryRotateThenMaskInsert64(SDNode *N); + /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, @@ -464,6 +469,12 @@ return isInt32Immediate(N.getNode(), Imm); } +/// isInt64Immediate - This method tests to see if the value is a 64-bit +/// constant operand. If so Imm will receive the 64-bit value. +static bool isInt64Immediate(SDValue N, uint64_t &Imm) { + return isInt64Immediate(N.getNode(), Imm); +} + static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB) { assert(isa(DestMBB)); @@ -577,9 +588,41 @@ return false; } +/// Find a subtree generated for bitfield insert and convert it with +/// a rotate left then mask insert instruction. +bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { + // Expected nodes + // %and1 = and i64 %val1, MASK + // %and2 = and i64 %val2, ~MASK + // %N = or i64 %and1, %and2 + if (N->getOpcode() != ISD::OR) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0->getOpcode() != ISD::AND || Op1->getOpcode() != ISD::AND) + return false; + + if (N->getValueType(0) == MVT::i32) { + unsigned Mask1, Mask2; + if (isInt32Immediate(Op0->getOperand(1), Mask1) && + isInt32Immediate(Op1->getOperand(1), Mask2) && + Mask1 == ~Mask2) + return tryRotateThenMaskInsert32(N); + } + if (N->getValueType(0) == MVT::i64) { + uint64_t Mask1, Mask2; + if (isInt64Immediate(Op0->getOperand(1), Mask1) && + isInt64Immediate(Op1->getOperand(1), Mask2) && + Mask1 == ~Mask2) + return tryRotateThenMaskInsert64(N); + } + return false; +} + /// Turn an or of two masked values into the rotate left word immediate then /// mask insert (rlwimi) instruction. -bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { +bool PPCDAGToDAGISel::tryRotateThenMaskInsert32(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); @@ -594,7 +637,7 @@ if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); unsigned Op1Opc = Op1.getOpcode(); - unsigned Value, SH = 0; + unsigned Value = 0, SH = 0; TargetMask = ~TargetMask; InsertMask = ~InsertMask; @@ -621,12 +664,11 @@ unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { - SDValue Tmp1, Tmp2; - if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; + Op1Opc = Op1.getOpcode(); } if (Op1Opc == ISD::AND) { // The AND mask might not be a constant, and we need to make sure that @@ -643,10 +685,20 @@ // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; + Op1Opc = Op1.getOpcode(); } } SH &= 31; + + // We eliminate AND instructions if they are already folded into rlwimi. + if (Op1Opc == ISD::AND && isInt32Immediate(Op1.getOperand(1), Value) && + (Value << SH) == InsertMask) + Op1 = Op1.getOperand(0); + if (Op0Opc == ISD::AND && isInt32Immediate(Op0.getOperand(1), Value) && + Value == ~InsertMask) + Op0 = Op0.getOperand(0); + SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); @@ -656,6 +708,101 @@ return false; } +/// Turn an or of two masked values into the rotate left double word immediate +/// then mask insert (rldimi) instruction. +bool PPCDAGToDAGISel::tryRotateThenMaskInsert64(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + KnownBits LKnown, RKnown; + CurDAG->computeKnownBits(Op0, LKnown); + CurDAG->computeKnownBits(Op1, RKnown); + + uint64_t TargetMask = LKnown.Zero.getZExtValue(); + uint64_t InsertMask = RKnown.Zero.getZExtValue(); + + // If all bits come from two source registers, + // we can use rldimi instruction. + if ((TargetMask | InsertMask) == ~0ULL) { + uint64_t Op0Opc = Op0.getOpcode(); + uint64_t Op1Opc = Op1.getOpcode(); + uint64_t Value64 = 0; + unsigned Value = 0, SH = 0; + TargetMask = ~TargetMask; + InsertMask = ~InsertMask; + + // If the LHS has a foldable shift and the RHS does not, then swap it to the + // RHS so that we can fold the shift into the insert. + if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { + if (Op0.getOperand(0).getOpcode() == ISD::SHL || + Op0.getOperand(0).getOpcode() == ISD::SRL) { + if (Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { + if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && + Op1.getOperand(0).getOpcode() != ISD::SRL) { + std::swap(Op0, Op1); + std::swap(Op0Opc, Op1Opc); + std::swap(TargetMask, InsertMask); + } + } + + unsigned MB, ME; + if (isRunOfOnes(InsertMask, MB, ME)) { + // to opt: We will have additional opportunity to use rldimi + // by supporting patterns with ZERO_EXTEND in Op0Opc or Op1Opc + if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && + isInt32Immediate(Op1.getOperand(1), Value)) { + Op1 = Op1.getOperand(0); + SH = (Op1Opc == ISD::SHL) ? Value : 64 - Value; + Op1Opc = Op1.getOpcode(); + } + if (Op1Opc == ISD::AND) { + // The AND mask might not be a constant, and we need to make sure that + // if we're going to fold the masking with the insert, all bits not + // know to be zero in the mask are known to be one. + KnownBits MKnown; + CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); + bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); + + unsigned SHOpc = Op1.getOperand(0).getOpcode(); + if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && + isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { + // Note that Value must be in range here (less than 32) because + // otherwise there would not be any bits set in InsertMask. + Op1 = Op1.getOperand(0).getOperand(0); + SH = (SHOpc == ISD::SHL) ? Value : 64 - Value; + Op1Opc = Op1.getOpcode(); + } + } + + SH &= 63; + + // We cannot specify ME for rldimi; ~SH is used instead. + if (ME == 63 - SH) { + // We eliminate AND instructions if they are already folded into rldimi. + if (Op1Opc == ISD::AND && isInt64Immediate(Op1.getOperand(1), Value64) && + (Value64 << SH) == InsertMask) + Op1 = Op1.getOperand(0); + if (Op0Opc == ISD::AND && isInt64Immediate(Op0.getOperand(1), Value64) && + Value64 == ~InsertMask) + Op0 = Op0.getOperand(0); + + SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl) }; + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops)); + return true; + } + } + } + return false; +} + // Predict the number of instructions that would be generated by calling // getInt64(N). static unsigned getInt64CountDirect(int64_t Imm) { @@ -3035,7 +3182,14 @@ N->getOperand(1).getOpcode() == ISD::TargetConstant) llvm_unreachable("Invalid ADD with TargetConstant operand"); - // Try matching complex bit permutations before doing anything else. + // Find opportunity to use rotate left immediate then mask insert instruction + // for a simple bitfield insert, i.e. (or (and %a, MASK) (and %b, ~MASK)) + // before tryBitPermutation, which may generate suboptimal machine IR; + // Leave more complicated cases for tryBitPermutation. + if (tryBitfieldInsert(N)) + return; + + // Try matching complex bit permutations next. if (tryBitPermutation(N)) return; @@ -3316,9 +3470,9 @@ break; } case ISD::OR: { - if (N->getValueType(0) == MVT::i32) - if (tryBitfieldInsert(N)) - return; + if ((N->getValueType(0) == MVT::i32 && tryRotateThenMaskInsert32(N)) || + (N->getValueType(0) == MVT::i64 && tryRotateThenMaskInsert64(N))) + return; if (tryLogicOpOfCompares(N)) return; Index: test/CodeGen/PowerPC/addi-offset-fold.ll =================================================================== --- test/CodeGen/PowerPC/addi-offset-fold.ll +++ test/CodeGen/PowerPC/addi-offset-fold.ll @@ -30,9 +30,8 @@ ; CHECK: ori 2, 2, 0 ; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1) ; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1) -; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32 -; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]] -; CHECK: rldicl 3, [[REG4]], 33, 57 +; CHECK-DAG: rldimi [[REG3:[0-9]+]], [[REG1]], 32, 24 +; CHECK: rldicl 3, [[REG3]], 33, 57 ; CHECK: blr } Index: test/CodeGen/PowerPC/bitfieldinsert.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/bitfieldinsert.ll @@ -0,0 +1,63 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; bitfieldinsert64: Test for rldimi +; equivalent C code +; struct s64 { +; int a:5; +; int b:16; +; long c:42; +; }; +; void bitfieldinsert64(struct s *p, unsigned short v) { +; p->b = v; +; } + +%struct.s64 = type { i64 } + +define void @bitfieldinsert64(%struct.s64* nocapture %p, i16 zeroext %v) { +; CHECK-LABEL: @bitfieldinsert64 +; CHECK: ld [[REG1:[0-9]+]], 0(3) +; CHECK: rldimi [[REG1]], 4, 5, 43 +; CHECK: std [[REG1]], 0(3) +; CHECK: blr +entry: + %0 = getelementptr inbounds %struct.s64, %struct.s64* %p, i64 0, i32 0 + %1 = zext i16 %v to i64 + %bf.load = load i64, i64* %0, align 8 + %bf.shl = shl nuw nsw i64 %1, 5 + %bf.clear = and i64 %bf.load, -2097121 + %bf.set = or i64 %bf.clear, %bf.shl + store i64 %bf.set, i64* %0, align 8 + ret void +} + +; bitfieldinsert32: Test for rlwimi +; equivalent C code +; struct s32 { +; int a:8; +; int b:16; +; int c:8; +; }; +; void bitfieldinsert32(struct s32 *p, unsigned int v) { +; p->b = v; +; } + +%struct.s32 = type { i32 } + +define void @bitfieldinsert32(%struct.s32* nocapture %p, i32 zeroext %v) { +; CHECK-LABEL: @bitfieldinsert32 +; CHECK: lwz [[REG1:[0-9]+]], 0(3) +; CHECK: rlwimi [[REG1]], 4, 8, 8, 23 +; CHECK: stw [[REG1]], 0(3) +; CHECK: blr +entry: + %0 = getelementptr inbounds %struct.s32, %struct.s32* %p, i64 0, i32 0 + %bf.load = load i32, i32* %0, align 4 + %bf.value = shl i32 %v, 8 + %bf.shl = and i32 %bf.value, 16776960 + %bf.clear = and i32 %bf.load, -16776961 + %bf.set = or i32 %bf.clear, %bf.shl + store i32 %bf.set, i32* %0, align 4 + ret void +} + Index: test/CodeGen/PowerPC/ppc64le-aggregates.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -236,14 +236,12 @@ ; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1) ; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1) ; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1) -; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1) +; CHECK-DAG: lwz 9, [[OFF0]](1) ; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1) -; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1) +; CHECK-DAG: lwz 10, [[OFF2]](1) ; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1) -; CHECK-DAG: sldi [[REG1]], [[REG1]], 32 -; CHECK-DAG: sldi [[REG3]], [[REG3]], 32 -; CHECK-DAG: or 9, [[REG0]], [[REG1]] -; CHECK-DAG: or 10, [[REG2]], [[REG3]] +; CHECK-DAG: rldimi 9, [[REG1]], 32, 0 +; CHECK-DAG: rldimi 10, [[REG3]], 32, 0 ; CHECK: bl test1 declare void @test1([8 x float], [8 x float])