Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7401,7 +7401,7 @@ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != TargetLowering::TypeSplitVector) return SDValue(); - SDValue MaskLo, MaskHi, Lo, Hi; + SDValue MaskLo, MaskHi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); EVT LoVT, HiVT; @@ -7429,17 +7429,15 @@ Alignment, MSC->getAAInfo(), MSC->getRanges()); SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); - - SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); + SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + DataLo.getValueType(), DL, OpsLo, MMO); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { Index: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll @@ -2857,11 +2857,11 @@ ; KNL_64-LABEL: test_scatter_setcc_split: ; KNL_64: # %bb.0: ; KNL_64-NEXT: vextractf64x4 $1, %zmm0, %ymm4 -; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; KNL_64-NEXT: vptestnmd %zmm5, %zmm5, %k1 ; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k2 -; KNL_64-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k2} -; KNL_64-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k1} +; KNL_64-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2} +; KNL_64-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; @@ -2877,11 +2877,11 @@ ; KNL_32-NEXT: vmovapd 72(%ebp), %zmm3 ; KNL_32-NEXT: movl 8(%ebp), %eax ; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm4 -; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; KNL_32-NEXT: vptestnmd %zmm5, %zmm5, %k1 ; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k2 -; KNL_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k2} -; KNL_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1} +; KNL_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2} +; KNL_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1} ; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: .cfi_def_cfa %esp, 4 @@ -2891,11 +2891,11 @@ ; SKX-LABEL: test_scatter_setcc_split: ; SKX: # %bb.0: ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm4 -; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 -; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; SKX-NEXT: vptestnmd %ymm5, %ymm5, %k1 ; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k2 -; SKX-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k2} -; SKX-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k1} +; SKX-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2} +; SKX-NEXT: vscatterdpd %zmm3, (%rdi,%ymm4,8) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; @@ -2911,11 +2911,11 @@ ; SKX_32-NEXT: vmovapd 72(%ebp), %zmm3 ; SKX_32-NEXT: movl 8(%ebp), %eax ; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm4 -; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k1 -; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm5 +; SKX_32-NEXT: vptestnmd %ymm5, %ymm5, %k1 ; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k2 -; SKX_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k2} -; SKX_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k1} +; SKX_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2} +; SKX_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm4,8) {%k1} ; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: .cfi_def_cfa %esp, 4