Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1103,6 +1103,65 @@ return std::make_pair(Interesting = true, &Bits); } + case ISD::ZERO_EXTEND: { + // Here we try to increase the opportunity by reordering ZEXT and ANDI. + // We want to avoid ZEXT between two nodes that can be folded. + // + // For example, we modify these nodes + // t9: i32 = add t7, Constant:i32<1> + // t11: i32 = and t9, Constant:i32<255> + // t12: i64 = zero_extend t11 + // t14: i64 = shl t12, Constant:i64<2> + // into + // t9: i32 = add t7, Constant:i32<1> + // t25: i64 = any_extend t9 + // t27: i64 = and t25, Constant:i64<255> + // t14: i64 = shl t12, Constant:i64<2> + // to fold t27 and t14 into a rotate-and-mask instruction. + // Such case often happens in array accesses with logical AND operation + // in an index, e.g. array[i & 0xFF]; + // + // We modify nodes only if the first operand of AND node (t9 in example) + // is not a supported logical opcode in tryBitPermutation to + // avoid distubing another tryBitPermutation. + + auto IsSupprtedInBitPermutation = [&](unsigned Opcode) { + return (Opcode == ISD::AND || Opcode == ISD::OR || + Opcode == ISD::SHL || Opcode == ISD::SRL || + Opcode == ISD::ROTL); + }; + + // We support only the case with zero extension from i32 to i64. + // Also we do not optimize if ZEXT or ANDI node has multiple uses. + if (V.getValueType() != MVT::i64 || !V.hasOneUse()) + break; + + SDValue AndVal = V.getOperand(0); + if (AndVal.getOpcode() != ISD::AND || + AndVal.getValueType() != MVT::i32 || + !AndVal.hasOneUse() || + !isa(AndVal.getOperand(1)) || + IsSupprtedInBitPermutation(AndVal.getOperand(0).getOpcode())) + break; + + uint64_t Mask = AndVal.getConstantOperandVal(1); + if ((Mask & 0xFFFFFFFF00000000uLL) != 0) + break; + + SDLoc DL(V.getNode()); + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, DL, MVT::i32); + SDValue Mask64 = CurDAG->getConstant(Mask, DL, MVT::i64); + SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, DL, + MVT::i64), 0); + SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, DL, + MVT::i64, ImDef, + AndVal.getOperand(0), + SubRegIdx), 0); + SDValue And64Val = CurDAG->getNode(ISD::AND, DL, MVT::i64, ExtVal, + Mask64); + + return getValueBits(And64Val, NumBits); + } } for (unsigned i = 0; i < NumBits; ++i) Index: test/CodeGen/PowerPC/zext-bitperm.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/zext-bitperm.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s + +; Test case for PPCTargetLowering::extendSubTreeForBitPermutation. +; We expect mask and rotate are folded into a rlwinm instruction. + +define zeroext i32 @func(i32* %p, i32 zeroext %i) { +; CHECK-LABEL: @func +; CHECK: addi [[REG1:[0-9]+]], 4, 1 +; CHECK: rlwinm [[REG2:[0-9]+]], [[REG1]], 2, 22, 29 +; CHECK-NOT: sldi +; CHECK: lwzx 3, 3, [[REG2]] +; CHECK: blr +entry: + %add = add i32 %i, 1 + %and = and i32 %add, 255 + %idxprom = zext i32 %and to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} +