diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -938,7 +938,8 @@ int64_t Offset, LLT Ty); MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size)); + return getMachineMemOperand( + MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size)); } /// getMachineMemOperand - Allocate a new MachineMemOperand by copying diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1820,10 +1820,10 @@ else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MLD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(), + MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1837,7 +1837,6 @@ // Generate hi masked load. Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) @@ -1847,8 +1846,8 @@ LoMemVT.getStoreSize().getFixedSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(), - MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2662,10 +2661,9 @@ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, - N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2689,10 +2687,9 @@ MPI = N->getPointerInfo().getWithOffset( LoMemVT.getStoreSize().getFixedSize()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), - N->getRanges()); + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4336,9 +4336,7 @@ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); + MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4496,22 +4494,14 @@ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - MemoryLocation ML; - if (VT.isScalableVector()) - ML = MemoryLocation::getAfter(PtrOperand); - else - ML = MemoryLocation(PtrOperand, LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -9,6 +9,7 @@ #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" @@ -1846,16 +1847,18 @@ SDValue Chain = MemN->getChain(); SDValue Base0 = MemN->getBasePtr(); SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); + unsigned MemOpc = MemN->getOpcode(); MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; if (MachineMemOperand *MMO = MemN->getMemOperand()) { MachineFunction &MF = DAG.getMachineFunction(); - MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); - MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); + uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) + ? (uint64_t)MemoryLocation::UnknownSize + : HwLen; + MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); + MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); } - unsigned MemOpc = MemN->getOpcode(); - if (MemOpc == ISD::LOAD) { assert(cast(Op)->isUnindexed()); SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store-mmo.ll @@ -11,15 +11,15 @@ ; CHECK-NEXT: adr r0, .LCPI0_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: add.w r0, sp, #2 -; CHECK-NEXT: ldrb.w r1, [sp, #3] ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: ldrb.w r2, [sp, #4] ; CHECK-NEXT: ldrb.w r0, [sp, #2] -; CHECK-NEXT: ldrb.w r3, [sp, #10] +; CHECK-NEXT: ldrb.w r1, [sp, #3] +; CHECK-NEXT: ldrb.w r2, [sp, #4] ; CHECK-NEXT: add r0, r1 -; CHECK-NEXT: ldrb.w r1, [sp, #11] +; CHECK-NEXT: ldrb.w r3, [sp, #10] ; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: ldrb.w r1, [sp, #11] ; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: add sp, #12 diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll --- a/llvm/test/CodeGen/X86/masked_compressstore.ll +++ b/llvm/test/CodeGen/X86/masked_compressstore.ll @@ -516,8 +516,6 @@ ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 ; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 -; AVX512F-NEXT: kshiftrw $8, %k1, %k2 -; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: kmovw %k1, %eax ; AVX512F-NEXT: movzbl %al, %eax ; AVX512F-NEXT: movl %eax, %ecx @@ -535,7 +533,9 @@ ; AVX512F-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F ; AVX512F-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101 ; AVX512F-NEXT: shrl $24, %eax +; AVX512F-NEXT: kshiftrw $8, %k1, %k2 ; AVX512F-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2} +; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -570,8 +570,6 @@ ; AVX512VLBW: ## %bb.0: ; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpmovb2m %xmm2, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2 -; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512VLBW-NEXT: kmovd %k1, %eax ; AVX512VLBW-NEXT: movzbl %al, %eax ; AVX512VLBW-NEXT: movl %eax, %ecx @@ -589,7 +587,9 @@ ; AVX512VLBW-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F ; AVX512VLBW-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101 ; AVX512VLBW-NEXT: shrl $24, %eax +; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2 ; AVX512VLBW-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2} +; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ; AVX512VLBW-NEXT: vzeroupper ; AVX512VLBW-NEXT: retq call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll --- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll +++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll @@ -14,10 +14,10 @@ ; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK-NEXT: [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]] ; CHECK-NEXT: [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) - ; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4) ; CHECK-NEXT: RET 0 bb: %stack_input_vec = alloca <16 x float>, align 64 @@ -41,10 +41,10 @@ ; CHECK-NEXT: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0 ; CHECK-NEXT: [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]] ; CHECK-NEXT: [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]] - ; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load (s256) from %ir.stack_input_vec, align 4) - ; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load (s256) from %ir.stack_input_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store (s256) into %ir.stack_output_vec + 32, align 4) - ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store (s256) into %ir.stack_output_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load unknown-size from %ir.stack_input_vec, align 4) + ; CHECK-NEXT: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load unknown-size from %ir.stack_input_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store unknown-size into %ir.stack_output_vec + 32, align 4) + ; CHECK-NEXT: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store unknown-size into %ir.stack_output_vec, align 4) ; CHECK-NEXT: RET 0 bb: %stack_input_vec = alloca <8 x double>, align 64