Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -33173,8 +33173,26 @@ if (Mst->isCompressingStore()) return SDValue(); - if (!Mst->isTruncatingStore()) - return reduceMaskedStoreToScalarStore(Mst, DAG); + if (!Mst->isTruncatingStore()) { + if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) + return ScalarStore; + + // If the mask is checking (0 > X), we're creating a vector with all-zeros + // or all-ones elements based on the sign bits of X. AVX masked store only + // cares about the sign bit of each mask element, so eliminate the compare: + // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X + SDValue Mask = Mst->getMask(); + if (Mask.getOpcode() == X86ISD::PCMPGT && + ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) { + assert(Mask.getValueType() == Mask.getOperand(1).getValueType() && + "Unexpected type for PCMPGT"); + return DAG.getMaskedStore( + Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(), + Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand()); + } + + return SDValue(); + } // Resolve truncating stores. EVT VT = Mst->getValue().getValueType(); Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -1140,21 +1140,18 @@ ret <8 x double> %res } -; FIXME: The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed. +; The mask bit for each data element is the most significant bit of the mask operand, so a compare isn't needed. +; TODO: Is the SKX code optimal? define void @trunc_mask(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <4 x i32> %mask) { ; AVX-LABEL: trunc_mask: ; AVX: ## BB#0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi) +; AVX-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) ; AVX-NEXT: retq ; ; AVX512F-LABEL: trunc_mask: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vmaskmovps %xmm0, %xmm1, (%rdi) +; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) ; AVX512F-NEXT: retq ; ; SKX-LABEL: trunc_mask: