# Changeset View

Changeset View

# Standalone View

Standalone View

# llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

- This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4172 Lines • ▼ Show 20 Line(s) | 4171 | bool X86TargetLowering::isCheapToSpeculateCttz() const { | |||
---|---|---|---|---|---|

4173 | return Subtarget.hasBMI(); | 4173 | return Subtarget.hasBMI(); | ||

4174 | } | 4174 | } | ||

4175 | 4175 | | |||

4176 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { | 4176 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { | ||

4177 | // Speculate ctlz only if we can directly use LZCNT. | 4177 | // Speculate ctlz only if we can directly use LZCNT. | ||

4178 | return Subtarget.hasLZCNT(); | 4178 | return Subtarget.hasLZCNT(); | ||

4179 | } | 4179 | } | ||

4180 | 4180 | | |||

4181 | bool X86TargetLowering::isCtlzFast() const { | ||||

4182 | return Subtarget.hasFastLZCNT(); | ||||

4183 | } | ||||

4184 | | ||||

4181 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { | 4185 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { | ||

4182 | if (!Subtarget.hasBMI()) | 4186 | if (!Subtarget.hasBMI()) | ||

4183 | return false; | 4187 | return false; | ||

4184 | 4188 | | |||

4185 | // There are only 32-bit and 64-bit forms for 'andn'. | 4189 | // There are only 32-bit and 64-bit forms for 'andn'. | ||

4186 | EVT VT = Y.getValueType(); | 4190 | EVT VT = Y.getValueType(); | ||

4187 | if (VT != MVT::i32 && VT != MVT::i64) | 4191 | if (VT != MVT::i32 && VT != MVT::i64) | ||

4188 | return false; | 4192 | return false; | ||

▲ Show 20 Lines • Show All 24896 Lines • ▼ Show 20 Line(s) | 28978 | static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, | |||

29085 | 29089 | | |||

29086 | X = DAG.getBitcast(BlendVT, X); | 29090 | X = DAG.getBitcast(BlendVT, X); | ||

29087 | Y = DAG.getBitcast(BlendVT, Y); | 29091 | Y = DAG.getBitcast(BlendVT, Y); | ||

29088 | Mask = DAG.getBitcast(BlendVT, Mask); | 29092 | Mask = DAG.getBitcast(BlendVT, Mask); | ||

29089 | Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); | 29093 | Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); | ||

29090 | return DAG.getBitcast(VT, Mask); | 29094 | return DAG.getBitcast(VT, Mask); | ||

29091 | } | 29095 | } | ||

29092 | 29096 | | |||

29097 | // Helper function for combineOrCmpEqZeroToCtlzSrl | ||||

29098 | // Transforms: | ||||

29099 | // seteq(cmp x, 0) | ||||

29100 | // into: | ||||

29101 | // srl(ctlz x), log2(bitsize(x)) | ||||

29102 | // Input pattern is checked by caller. | ||||

29103 | SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SelectionDAG &DAG) { | ||||

29104 | SDValue Cmp = Op.getOperand(1); | ||||

29105 | EVT VT = Cmp.getOperand(0).getValueType(); | ||||

29106 | unsigned Log2b = Log2_32(VT.getSizeInBits()); | ||||

29107 | SDLoc dl(Op); | ||||

29108 | SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); | ||||

29109 | // The result of the shift is true or false, and on X86, the 32-bit | ||||

29110 | // encoding of shr and lzcnt is more desirable. | ||||

29111 | SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); | ||||

29112 | SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, | ||||

29113 | DAG.getConstant(Log2b, dl, VT)); | ||||

29114 | return DAG.getZExtOrTrunc(Scc, dl, ExtTy); | ||||

29115 | } | ||||

29116 | | ||||

29117 | // Try to transform: | ||||

29118 | // zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0)))) | ||||

29119 | // into: | ||||

29120 | // srl(or(ctlz(x), ctlz(y)), log2(bitsize(x)) | ||||

29121 | // Will also attempt to match more generic cases, eg: | ||||

29122 | // zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0))) | ||||

29123 | // Only applies if the target supports the FastLZCNT feature. | ||||

29124 | static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, | ||||

29125 | TargetLowering::DAGCombinerInfo &DCI, | ||||

29126 | const X86Subtarget &Subtarget) { | ||||

29127 | if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) | ||||

29128 | return SDValue(); | ||||

29129 | | ||||

29130 | auto isORCandidate = [](SDValue N) { | ||||

29131 | return (N->getOpcode() == ISD::OR && N->hasOneUse()); | ||||

29132 | }; | ||||

29133 | | ||||

29134 | // Check the zero extend is extending to 32-bit or more. The code generated by | ||||

29135 | // srl(ctlz) for 16-bit or less variants of the pattern would require extra | ||||

29136 | // instructions to clear the upper bits. | ||||

29137 | if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || | ||||

29138 | !isORCandidate(N->getOperand(0))) | ||||

29139 | return SDValue(); | ||||

29140 | | ||||

29141 | // Check the node matches: setcc(eq, cmp 0) | ||||

29142 | auto isSetCCCandidate = [](SDValue N) { | ||||

29143 | return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && | ||||

29144 | X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && | ||||

29145 | N->getOperand(1).getOpcode() == X86ISD::CMP && | ||||

29146 | N->getOperand(1).getConstantOperandVal(1) == 0 && | ||||

29147 | N->getOperand(1).getValueType().bitsGE(MVT::i32); | ||||

29148 | }; | ||||

29149 | | ||||

29150 | SDNode *OR = N->getOperand(0).getNode(); | ||||

29151 | SDValue LHS = OR->getOperand(0); | ||||

29152 | SDValue RHS = OR->getOperand(1); | ||||

29153 | | ||||

29154 | // Save nodes matching or(or, setcc(eq, cmp 0)). | ||||

29155 | SmallVector<SDNode *, 2> ORNodes; | ||||

29156 | while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || | ||||

29157 | (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { | ||||

29158 | ORNodes.push_back(OR); | ||||

29159 | OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); | ||||

29160 | LHS = OR->getOperand(0); | ||||

29161 | RHS = OR->getOperand(1); | ||||

29162 | } | ||||

29163 | | ||||

29164 | // The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)). | ||||

29165 | if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || | ||||

29166 | !isORCandidate(SDValue(OR, 0))) | ||||

29167 | return SDValue(); | ||||

29168 | | ||||

29169 | // We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it | ||||

29170 | // to | ||||

29171 | // or(srl(ctlz),srl(ctlz)). | ||||

29172 | // The dag combiner can then fold it into: | ||||

29173 | // srl(or(ctlz, ctlz)). | ||||

29174 | EVT VT = OR->getValueType(0); | ||||

29175 | SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); | ||||

29176 | SDValue Ret, NewRHS; | ||||

29177 | if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) | ||||

29178 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); | ||||

29179 | | ||||

29180 | if (!Ret) | ||||

29181 | return SDValue(); | ||||

29182 | | ||||

29183 | // Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern. | ||||

29184 | while (ORNodes.size() > 0) { | ||||

29185 | OR = ORNodes.pop_back_val(); | ||||

29186 | LHS = OR->getOperand(0); | ||||

29187 | RHS = OR->getOperand(1); | ||||

29188 | // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). | ||||

29189 | if (RHS->getOpcode() == ISD::OR) | ||||

29190 | std::swap(LHS, RHS); | ||||

29191 | EVT VT = OR->getValueType(0); | ||||

29192 | SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); | ||||

29193 | if (!NewRHS) | ||||

29194 | return SDValue(); | ||||

29195 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); | ||||

29196 | } | ||||

29197 | | ||||

29198 | if (Ret) | ||||

29199 | Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); | ||||

29200 | | ||||

29201 | return Ret; | ||||

29202 | } | ||||

29203 | | ||||

29093 | static SDValue combineOr(SDNode *N, SelectionDAG &DAG, | 29204 | static SDValue combineOr(SDNode *N, SelectionDAG &DAG, | ||

29094 | TargetLowering::DAGCombinerInfo &DCI, | 29205 | TargetLowering::DAGCombinerInfo &DCI, | ||

29095 | const X86Subtarget &Subtarget) { | 29206 | const X86Subtarget &Subtarget) { | ||

29096 | if (DCI.isBeforeLegalizeOps()) | 29207 | if (DCI.isBeforeLegalizeOps()) | ||

29097 | return SDValue(); | 29208 | return SDValue(); | ||

29098 | 29209 | | |||

29099 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) | 29210 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) | ||

29100 | return R; | 29211 | return R; | ||

▲ Show 20 Lines • Show All 2015 Lines • ▼ Show 20 Line(s) | 31226 | if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) | |||

31116 | return R; | 31227 | return R; | ||

31117 | 31228 | | |||

31118 | if (SDValue DivRem8 = getDivRem8(N, DAG)) | 31229 | if (SDValue DivRem8 = getDivRem8(N, DAG)) | ||

31119 | return DivRem8; | 31230 | return DivRem8; | ||

31120 | 31231 | | |||

31121 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) | 31232 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) | ||

31122 | return NewAdd; | 31233 | return NewAdd; | ||

31123 | 31234 | | |||

31235 | if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) | ||||

31236 | return R; | ||||

31237 | | ||||

31124 | return SDValue(); | 31238 | return SDValue(); | ||

31125 | } | 31239 | } | ||

31126 | 31240 | | |||

31127 | /// Optimize x == -y --> x+y == 0 | 31241 | /// Optimize x == -y --> x+y == 0 | ||

31128 | /// x != -y --> x+y != 0 | 31242 | /// x != -y --> x+y != 0 | ||

31129 | static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, | 31243 | static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, | ||

31130 | const X86Subtarget &Subtarget) { | 31244 | const X86Subtarget &Subtarget) { | ||

31131 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); | 31245 | ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); | ||

▲ Show 20 Lines • Show All 1580 Lines • Show Last 20 Lines |