Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -260,6 +260,7 @@ SDNode *Node) const override; SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; + SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -10397,6 +10397,104 @@ return SDValue(); } +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, + APInt &KnownOne) { + if (Op.getOpcode() == ARMISD::BFI) { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + APInt Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } + return DAG.computeKnownBits(Op, KnownZero, KnownOne); +} + +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { + // If we have a CMOV, OR and AND combination such as: + // if (x & CN) + // y |= CM; + // + // And: + // * CN is a single bit; + // * All bits covered by CM are known zero in y + // + // Then we can convert this into a sequence of BFI instructions. This will + // always be a win if CM is a single bit, will always be no worse than the + // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is + // three bits (due to the extra IT instruction). + + SDValue Op0 = CMOV->getOperand(0); + SDValue Op1 = CMOV->getOperand(1); + SDValue CmpZ = CMOV->getOperand(4); + + assert(CmpZ->getOpcode() == ARMISD::CMPZ); + SDValue And = CmpZ->getOperand(0); + if (And->getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + return SDValue(); + SDValue X = And->getOperand(0); + + // Canonicalize so that the OR is on the left. + if (Op1->getOpcode() == ISD::OR) + std::swap(Op0, Op1); + if (Op0->getOpcode() != ISD::OR) + return SDValue(); + + ConstantSDNode *OrC = dyn_cast(Op0->getOperand(1)); + if (!OrC) + return SDValue(); + SDValue Y = Op0->getOperand(0); + + if (Op1 != Y) + return SDValue(); + + // Now, is it profitable to continue? + APInt OrCI = OrC->getAPIntValue(); + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; + if (OrCI.countPopulation() > Heuristic) + return SDValue(); + + // Lastly, can we determine that the bits defined by OrCI + // are zero in Y? + APInt KnownZero, KnownOne; + computeKnownBits(DAG, Y, KnownZero, KnownOne); + if ((OrCI & KnownZero) != OrCI) + return SDValue(); + + // OK, we can do the combine. + SDValue V = Y; + SDLoc dl(X); + EVT VT = X.getValueType(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); + + if (BitInX != 0) { + // We must shift X first. + X = DAG.getNode(ISD::SRL, dl, VT, X, + DAG.getConstant(BitInX, dl, VT)); + } + + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); + BitInY < NumActiveBits; ++BitInY) { + if (OrCI[BitInY] == 0) + continue; + APInt Mask(VT.getSizeInBits(), 0); + Mask.setBit(BitInY); + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, + // Confusingly, the operand is an *inverted* mask... + DAG.getConstant(~Mask, dl, VT)); + } + + return V; +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10415,6 +10513,13 @@ ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + // BFI is only available on V6T2+ + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { + SDValue R = PerformCMOVToBFICombine(N, DAG); + if (R) + return R; + } + // Simplify // mov r1, r0 // cmp r1, x Index: test/CodeGen/ARM/bfi.ll =================================================================== --- test/CodeGen/ARM/bfi.ll +++ test/CodeGen/ARM/bfi.ll @@ -74,3 +74,14 @@ %or = or i32 %shl, %and ret i32 %or } + +define i32 @f7(i32 %x, i32 %y) { +; CHECK-LABEL: f7: +; CHECK: bfi r1, r0, #4, #1 + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 + %and = and i32 %x, 4 + %or = or i32 %y2, 16 + %cmp = icmp ne i32 %and, 0 + %sel = select i1 %cmp, i32 %or, i32 %y2 + ret i32 %sel +}