Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -47,14 +47,16 @@ uint8_t StackID; + /// Offset is unknown. + bool HasUnknownOffset; + explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0, uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + : V(v), Offset(offset), StackID(ID), HasUnknownOffset(false) {} - explicit MachinePointerInfo(const PseudoSourceValue *v, - int64_t offset = 0, + explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0, uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + : V(v), Offset(offset), StackID(ID), HasUnknownOffset(false) {} MachinePointerInfo getWithOffset(int64_t O) const { if (V.isNull()) return MachinePointerInfo(); @@ -72,6 +74,8 @@ /// Return the LLVM IR address space number that this pointer points into. unsigned getAddrSpace() const; + bool hasUnknownOffset() const { return HasUnknownOffset; } + /// Return a MachinePointerInfo record that refers to the constant pool. static MachinePointerInfo getConstantPool(MachineFunction &MF); @@ -89,6 +93,9 @@ /// Stack pointer relative access. static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID = 0); + + /// Stack memory with unknown offset. + static MachinePointerInfo getUnknownStack(MachineFunction &MF); }; Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -629,6 +629,12 @@ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset,ID); } +MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { + MachinePointerInfo Info(MF.getPSVManager().getStack()); + Info.HasUnknownOffset = true; + return Info; +} + MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, @@ -704,9 +710,12 @@ OS << "["; if (const Value *V = getValue()) V->printAsOperand(OS, /*PrintType=*/false, MST); - else if (const PseudoSourceValue *PSV = getPseudoValue()) + else if (const PseudoSourceValue *PSV = getPseudoValue()) { + if (getPointerInfo().hasUnknownOffset()) { + OS << "Unknown"; + } PSV->printCustom(OS); - else + } else OS << ""; unsigned AS = getAddrSpace(); @@ -1728,10 +1737,14 @@ int64_t WidthB = MMOb->getSize(); const Value *ValA = MMOa->getValue(); const Value *ValB = MMOb->getValue(); + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && MMOa->getPointerInfo().hasUnknownOffset()) + return PSVa->mayAlias(&MFI); + if (PSVb && MMOb->getPointerInfo().hasUnknownOffset()) + return PSVb->mayAlias(&MFI); bool SameVal = (ValA && ValB && (ValA == ValB)); if (!SameVal) { - const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); - const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); if (PSVa && ValB && !PSVa->mayAlias(&MFI)) return false; if (PSVb && ValA && !PSVb->mayAlias(&MFI)) Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1073,20 +1073,21 @@ EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = - DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, + MachinePointerInfo::getUnknownStack(MF), EltVT); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; @@ -1095,7 +1096,8 @@ StackPtr.getValueType())); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize)); } @@ -1771,13 +1773,16 @@ // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT); + return DAG.getExtLoad( + ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3802,7 +3802,7 @@ SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); EVT EltVT = VecVT.getVectorElementType(); Index: test/CodeGen/AMDGPU/extload-align.ll =================================================================== --- test/CodeGen/AMDGPU/extload-align.ll +++ test/CodeGen/AMDGPU/extload-align.ll @@ -1,4 +1,5 @@ -; RUN: llc -debug-only=machine-scheduler -march=amdgcn -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +target datalayout = "A5" ; REQUIRES: asserts ; Verify that the extload generated from %eval has the default @@ -6,18 +7,18 @@ ; size and not 4 corresponding to the sign-extended size (i32). ; DEBUG: {{^}}# Machine code for function extload_align: -; DEBUG: mem:LD2[]{{[^(]}} +; DEBUG: mem:LD2[UnknownStack(addrspace=5)]{{[^(]}} ; DEBUG: {{^}}# End machine code for function extload_align. -define amdgpu_kernel void @extload_align(i32* %out, i32 %index) #0 { - %v0 = alloca [4 x i16] - %a1 = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 0 - %a2 = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 1 - store i16 0, i16* %a1 - store i16 1, i16* %a2 - %a = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 %index - %val = load i16, i16* %a +define amdgpu_kernel void @extload_align(i32 addrspace(5)* %out, i32 %index) #0 { + %v0 = alloca [4 x i16], addrspace(5) + %a1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 0 + %a2 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 1 + store i16 0, i16 addrspace(5)* %a1 + store i16 1, i16 addrspace(5)* %a2 + %a = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 %index + %val = load i16, i16 addrspace(5)* %a %eval = sext i16 %val to i32 - store i32 %eval, i32* %out + store i32 %eval, i32 addrspace(5)* %out ret void } Index: test/CodeGen/AMDGPU/extract_vector_elt-f16.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-f16.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}extract_vector_elt_v2f16: ; GCN: s_load_dword [[VEC:s[0-9]+]] Index: test/CodeGen/AMDGPU/extract_vector_elt-f64.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2: ; GCN: buffer_load_dwordx4 Index: test/CodeGen/AMDGPU/extract_vector_elt-i16.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-i16.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}extract_vector_elt_v2i16: ; GCN: s_load_dword [[VEC:s[0-9]+]] Index: test/CodeGen/AMDGPU/extract_vector_elt-i64.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; How the replacement of i64 stores with v2i32 stores resulted in ; breaking other users of the bitcast if they already existed Index: test/CodeGen/AMDGPU/extract_vector_elt-i8.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-i8.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-i8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}extract_vector_elt_v1i8: ; GCN: buffer_load_ubyte Index: test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- test/CodeGen/AMDGPU/insert_vector_elt.ll +++ test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as Index: test/CodeGen/AMDGPU/scratch-simple.ll =================================================================== --- test/CodeGen/AMDGPU/scratch-simple.ll +++ test/CodeGen/AMDGPU/scratch-simple.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=gfx804 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx804 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s ; This used to fail due to a v_add_i32 instruction with an illegal immediate ; operand that was created during Local Stack Slot Allocation. Test case derived Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register. Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; This ends up using all 255 registers and requires register ; scavenging which will fail to find an unsued register.