Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6835,12 +6835,12 @@ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MST->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, MST->getAAInfo(), - MST->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MST->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, + MST->getAAInfo(), MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, MST->isTruncatingStore(), @@ -6985,11 +6985,12 @@ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1210,16 +1210,16 @@ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, + HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), + MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ExtType, MLD->isExpandingLoad()); - // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -1928,10 +1928,12 @@ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore(), N->isCompressingStore()); Index: test/CodeGen/X86/compress_expand.ll =================================================================== --- test/CodeGen/X86/compress_expand.ll +++ test/CodeGen/X86/compress_expand.ll @@ -388,11 +388,11 @@ ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k2 +; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: popcntl %eax, %eax ; KNL-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2} -; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask) ret void Index: test/CodeGen/X86/vmaskmov-offset.ll =================================================================== --- test/CodeGen/X86/vmaskmov-offset.ll +++ test/CodeGen/X86/vmaskmov-offset.ll @@ -9,8 +9,8 @@ ; CHECK: bb.0.bb: ; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec, align 4) + ; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4) + ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4) ; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4) ; CHECK: RET 0 bb: @@ -29,8 +29,8 @@ ; CHECK: bb.0.bb: ; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4) - ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec, align 4) + ; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4) + ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4) ; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4) ; CHECK: RET 0 bb: