Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2889,16 +2889,6 @@ bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, TargetLoweringOpt &TLO) const; - /// Helper for SimplifyDemandedBits that can simplify an operation with - /// multiple uses. This function simplifies operand \p OpIdx of \p User and - /// then updates \p User with the simplified version. No other uses of - /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this - /// function behaves exactly like function SimplifyDemandedBits declared - /// below except that it also updates the DAG by calling - /// DCI.CommitTargetLoweringOpt. - bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, - DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; - /// Look at Op. At this point, we know that only the DemandedBits bits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -431,56 +431,6 @@ return false; } -bool -TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, - const APInt &DemandedBits, - DAGCombinerInfo &DCI, - TargetLoweringOpt &TLO) const { - SDValue Op = User->getOperand(OpIdx); - KnownBits Known; - - if (!SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, true)) - return false; - - - // Old will not always be the same as Op. For example: - // - // Demanded = 0xffffff - // Op = i64 truncate (i32 and x, 0xffffff) - // In this case simplify demand bits will want to replace the 'and' node - // with the value 'x', which will give us: - // Old = i32 and x, 0xffffff - // New = x - if (TLO.Old.hasOneUse()) { - // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(TLO); - return true; - } - - // If Old has more than one use then it must be Op, because the - // AssumeSingleUse flag is not propogated to recursive calls of - // SimplifyDemanded bits, so the only node with multiple use that - // it will attempt to combine will be Op. - assert(TLO.Old == Op); - - SmallVector NewOps; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - if (i == OpIdx) { - NewOps.push_back(TLO.New); - continue; - } - NewOps.push_back(User->getOperand(i)); - } - User = TLO.DAG.UpdateNodeOperands(User, NewOps); - // Op has less users now, so we may be able to perform additional combines - // with it. - DCI.AddToWorklist(Op.getNode()); - // User's operands have been updated, so we may be able to do new combines - // with it. - DCI.AddToWorklist(User); - return true; -} - bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2717,21 +2717,34 @@ AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24; } -static bool simplifyI24(SDNode *Node24, unsigned OpIdx, - TargetLowering::DAGCombinerInfo &DCI) { - +static SDValue simplifyI24(SDNode *Node24, + TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - SDValue Op = Node24->getOperand(OpIdx); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = Op.getValueType(); + EVT VT = Node24->getOperand(0).getValueType(); APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); - APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG, true, true); - if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO)) - return true; - return false; + // First try to simplify using GetDemandedBits which allows the operands to + // have other uses, but will only perform simplifications that involve + // bypassing some nodes for this user. + SDValue LHS = Node24->getOperand(0); + SDValue RHS = Node24->getOperand(1); + SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded); + SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded); + if (DemandedLHS || DemandedRHS) + return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(), + DemandedLHS ? DemandedLHS : LHS, + DemandedRHS ? DemandedRHS : RHS); + + // Now try SimplifyDemandedBits which can simplify the nodes used by our + // operands if this node is the only user. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) + return SDValue(Node24, 0); + if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) + return SDValue(Node24, 0); + + return SDValue(); } template @@ -3279,8 +3292,8 @@ SelectionDAG &DAG = DCI.DAG; // Simplify demanded bits before splitting into multiple users. - if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI)) - return SDValue(); + if (SDValue V = simplifyI24(N, DCI)) + return V; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3866,9 +3879,8 @@ case AMDGPUISD::MUL_U24: case AMDGPUISD::MULHI_I24: case AMDGPUISD::MULHI_U24: { - // If the first call to simplify is successfull, then N may end up being - // deleted, so we shouldn't call simplifyI24 again. - simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI); + if (SDValue V = simplifyI24(N, DCI)) + return V; return SDValue(); } case AMDGPUISD::MUL_LOHI_I24: @@ -4296,25 +4308,36 @@ RHSKnown.countMinTrailingZeros(); Known.Zero.setLowBits(std::min(TrailZ, 32u)); - unsigned LHSValBits = 32 - std::max(LHSKnown.countMinSignBits(), 8u); - unsigned RHSValBits = 32 - std::max(RHSKnown.countMinSignBits(), 8u); - unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u); - if (MaxValBits >= 32) - break; + // Truncate to 24 bits. + LHSKnown = LHSKnown.trunc(24); + RHSKnown = RHSKnown.trunc(24); + bool Negative = false; if (Opc == AMDGPUISD::MUL_I24) { - bool LHSNegative = !!(LHSKnown.One & (1 << 23)); - bool LHSPositive = !!(LHSKnown.Zero & (1 << 23)); - bool RHSNegative = !!(RHSKnown.One & (1 << 23)); - bool RHSPositive = !!(RHSKnown.Zero & (1 << 23)); + unsigned LHSValBits = 24 - LHSKnown.countMinSignBits(); + unsigned RHSValBits = 24 - RHSKnown.countMinSignBits(); + unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u); + if (MaxValBits >= 32) + break; + bool LHSNegative = LHSKnown.isNegative(); + bool LHSPositive = LHSKnown.isNonNegative(); + bool RHSNegative = RHSKnown.isNegative(); + bool RHSPositive = RHSKnown.isNonNegative(); if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive)) break; Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative); - } - if (Negative) - Known.One.setHighBits(32 - MaxValBits); - else + if (Negative) + Known.One.setHighBits(32 - MaxValBits); + else + Known.Zero.setHighBits(32 - MaxValBits); + } else { + unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros(); + unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros(); + unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u); + if (MaxValBits >= 32) + break; Known.Zero.setHighBits(32 - MaxValBits); + } break; } case AMDGPUISD::PERM: { Index: test/CodeGen/AMDGPU/lshl64-to-32.ll =================================================================== --- test/CodeGen/AMDGPU/lshl64-to-32.ll +++ test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -123,7 +123,7 @@ ; GCN-NEXT: s_mov_b64 s[2:3], s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v1, 0x800000, v1 -; GCN-NEXT: v_mul_i32_i24_e32 v1, -7, v1 +; GCN-NEXT: v_mul_i32_i24_e32 v1, 0xfffff9, v1 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 ; GCN-NEXT: v_lshlrev_b32_e32 v3, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v4, v2