diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44446,7 +44446,8 @@ /// scalar element, and the alignment for the scalar memory access. static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, SelectionDAG &DAG, SDValue &Addr, - SDValue &Index, unsigned &Alignment) { + SDValue &Index, Align &Alignment, + unsigned &Offset) { int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); if (TrueMaskElt < 0) return false; @@ -44454,15 +44455,17 @@ // Get the address of the one scalar element that is specified by the mask // using the appropriate offset from the base pointer. EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); + Offset = 0; Addr = MaskedOp->getBasePtr(); if (TrueMaskElt != 0) { - unsigned Offset = TrueMaskElt * EltVT.getStoreSize(); + Offset = TrueMaskElt * EltVT.getStoreSize(); Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset), SDLoc(MaskedOp)); } Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); - Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize()); + Alignment = commonAlignment(MaskedOp->getOriginalAlign(), + EltVT.getStoreSize()); return true; } @@ -44479,8 +44482,9 @@ // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; - unsigned Alignment; - if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment)) + Align Alignment; + unsigned Offset; + if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset)) return SDValue(); // Load the one scalar element that is specified by the mask using the @@ -44489,7 +44493,8 @@ EVT VT = ML->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDValue Load = - DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(), + DAG.getLoad(EltVT, DL, ML->getChain(), Addr, + ML->getPointerInfo().getWithOffset(Offset), Alignment, ML->getMemOperand()->getFlags()); // Insert the loaded element into the appropriate place in the vector. @@ -44600,8 +44605,9 @@ // is profitable. Endianness would also have to be considered. SDValue Addr, VecIndex; - unsigned Alignment; - if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment)) + Align Alignment; + unsigned Offset; + if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset)) return SDValue(); // Extract the one scalar element that is actually being stored. @@ -44612,7 +44618,8 @@ MS->getValue(), VecIndex); // Store that element at the appropriate offset from the base pointer. - return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(), + return DAG.getStore(MS->getChain(), DL, Extract, Addr, + MS->getPointerInfo().getWithOffset(Offset), Alignment, MS->getMemOperand()->getFlags()); } diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -59,7 +59,7 @@ ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr, align 4) + ; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr + 8, align 4) ; CHECK: $xmm0 = COPY [[VMOVHPDrm]] ; CHECK: RET 0, $xmm0 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> , <2 x double> %dst) @@ -72,7 +72,7 @@ ; CHECK: liveins: $rdi, $xmm0 ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0 ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr) + ; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr + 8) ; CHECK: RET 0 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1>) ret void