Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -10357,95 +10357,17 @@ return SDValue(OR, 0); } -/// PerformORCombine - Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VORR - BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); - SDLoc dl(N); - EVT VT = N->getValueType(0); - SelectionDAG &DAG = DCI.DAG; - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); - if (Val.getNode()) { - SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); - SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); - } - } - } - - if (!Subtarget->isThumb1Only()) { - // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) - return Result; - if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) - return Result; - } - - // The code below optimizes (or (and X, Y), Z). - // The AND operand needs to have a single user to make these optimizations - // profitable. - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - SDValue N1 = N->getOperand(1); - - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && - DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - APInt SplatBits0, SplatBits1; - BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - // Ensure that the second operand of both ands are constants - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - // Ensure that the bit width of the constants are the same and that - // the splat arguments are logical inverses as per the pattern we - // are trying to simplify. - if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection - // simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), - N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } - } - } - } - - // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when - // reasonable. - +static SDValue PerformORCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val @@ -10487,9 +10409,10 @@ DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask @@ -10513,9 +10436,10 @@ DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, @@ -10529,9 +10453,10 @@ DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } @@ -10549,10 +10474,102 @@ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); + } + + return SDValue(); +} + +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + SDLoc dl(N); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, dl, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } + } + + if (!Subtarget->isThumb1Only()) { + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) + return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; + } + + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + APInt SplatBits0, SplatBits1; + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget)) + return Res; + return SDValue(); }