Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -31876,31 +31876,29 @@ if (VT.is512BitVector()) return SDValue(); - assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask(APInt::getSignMask(BitWidth)); - KnownBits Known; - TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), - !DCI.isBeforeLegalizeOps()); - if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) { - // If we changed the computation somewhere in the DAG, this change will - // affect all users of Cond. Make sure it is fine and update all the nodes - // so that we do not use the generic VSELECT anymore. Otherwise, we may - // perform wrong optimizations as we messed with the actual expectation - // for the vector boolean values. - if (Cond != TLO.Old) { - // Check all uses of the condition operand to check whether it will be - // consumed by non-BLEND instructions. Those may require that all bits - // are set properly. - for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); - UI != UE; ++UI) { - // TODO: Add other opcodes eventually lowered into BLEND. - if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0) - return SDValue(); - } + bool CanShrinkCond = true; + for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); + UI != UE; ++UI) { + // TODO: Add other opcodes eventually lowered into BLEND. + if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0) { + CanShrinkCond = false; + break; + } + } - // Update all users of the condition before committing the change, so - // that the VSELECT optimizations that expect the correct vector boolean - // value will not be triggered. + if (CanShrinkCond) { + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask(APInt::getSignMask(BitWidth)); + KnownBits Known; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0, + /*AssumeSingleUse*/true)) { + // If we changed the computation somewhere in the DAG, this change will + // affect all users of Cond. Uppdate all the nodes so that we do not use + // the generic VSELECT anymore. Otherwise, we may perform wrong + // optimizations as we messed with the actual expectation for the vector + // boolean values. for (SDNode *U : Cond->uses()) { SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U), U->getValueType(0), Cond, U->getOperand(1), @@ -31908,14 +31906,8 @@ DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); } DCI.CommitTargetLoweringOpt(TLO); - return SDValue(); + return SDValue(N, 0); } - // Only Cond (rather than other nodes in the computation chain) was - // changed. Change the condition just for N to keep the opportunity to - // optimize all other users their own way. - SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB); - return SDValue(); } } Index: test/CodeGen/X86/vselect.ll =================================================================== --- test/CodeGen/X86/vselect.ll +++ test/CodeGen/X86/vselect.ll @@ -503,8 +503,6 @@ ; SSE41-LABEL: shrunkblend_2uses: ; SSE41: # %bb.0: ; SSE41-NEXT: psllq $63, %xmm0 -; SSE41-NEXT: psrad $31, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 ; SSE41-NEXT: paddq %xmm2, %xmm4 @@ -514,8 +512,6 @@ ; AVX-LABEL: shrunkblend_2uses: ; AVX: # %bb.0: ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX-NEXT: vpcmpgtq %xmm0, %xmm5, %xmm0 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0 ; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0