Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -172,12 +172,6 @@ "GCN or newer GPU" >; -def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", - "GCN1Encoding", - "true", - "Encoding format for SI and CI" ->; - def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", "GCN3Encoding", "true", @@ -442,14 +436,14 @@ def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, - FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, + FeatureWavefrontSize64, FeatureGCN, FeatureLDSBankCount32, FeatureMovrel] >; def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, - FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel] + FeatureCIInsts, FeatureMovrel] >; def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", @@ -667,7 +661,7 @@ def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" ->, AssemblerPredicate<"FeatureGCN1Encoding">; +>, AssemblerPredicate<"!FeatureGCN3Encoding">; def isVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -139,7 +139,6 @@ // Subtarget statically properties set by tablegen bool FP64; bool IsGCN; - bool GCN1Encoding; bool GCN3Encoding; bool CIInsts; bool GFX9Insts; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -50,7 +50,7 @@ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; FullFS += FS; @@ -75,6 +75,18 @@ if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; + if (LDSBankCount == 0) + LDSBankCount = 32; + + if (TT.getArch() == Triple::amdgcn) { + if (LocalMemorySize == 0) + LocalMemorySize = 32768; + + // Do something sensible for unspecified target. + if (!HasMovrel && !HasVGPRIndexMode) + HasMovrel = true; + } + return *this; } @@ -117,7 +129,6 @@ FP64(false), IsGCN(false), - GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), GFX9Insts(false), Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -256,9 +256,8 @@ if (!GPU.empty()) return GPU; - // HSA only supports CI+, so change the default GPU to a CI for HSA. if (TT.getArch() == Triple::amdgcn) - return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; + return "generic"; return "r600"; } Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -25,8 +25,6 @@ let isPseudo = 1; let isCodeGenOnly = 1; - let SubtargetPredicate = isCIVI; - let FLAT = 1; let UseNamedOperandTable = 1; @@ -55,6 +53,9 @@ bits<1> has_glc = 1; bits<1> glcValue = 0; + let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, + !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); + // TODO: M0 if it could possibly access LDS (before gfx9? only)? let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); @@ -667,7 +668,7 @@ (inst $vaddr, $data, $offset, $slc) >; -let Predicates = [isCIVI] in { +let Predicates = [HasFlatAddressSpace] in { def : FlatLoadPat ; def : FlatLoadPat ; @@ -719,13 +720,9 @@ def : FlatAtomicPat ; def : FlatAtomicPat ; -} // End Predicates = [isCIVI] - -let Predicates = [isVI] in { - def : FlatStorePat ; - def : FlatStorePat ; -} - +def : FlatStorePat ; +def : FlatStorePat ; +} // End Predicates = [HasFlatAddressSpace] let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { Index: lib/Target/AMDGPU/Processors.td =================================================================== --- lib/Target/AMDGPU/Processors.td +++ lib/Target/AMDGPU/Processors.td @@ -10,6 +10,12 @@ class Proc Features> : Processor; +// The code produced for "generic" is only useful for tests and cannot +// reasonably be expected to execute on any particular target. +def : ProcessorModel<"generic", NoSchedModel, [ + FeatureGCN +]>; + //===----------------------------------------------------------------------===// // R600 //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -531,6 +531,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGCN3Encoding(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); @@ -773,16 +777,15 @@ } int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - if (isSI(ST) || isCI(ST)) - return ByteOffset >> 2; - - return ByteOffset; + if (isGCN3Encoding(ST)) + return ByteOffset; + return ByteOffset >> 2; } bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : - isUInt<20>(EncodedOffset); + return isGCN3Encoding(ST) ? + isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } } // end namespace AMDGPU Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll =================================================================== --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; The bitcast should be pushed through the bitcasts so the vectors can Index: test/CodeGen/AMDGPU/br_cc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/br_cc.f16.ll +++ test/CodeGen/AMDGPU/br_cc.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}br_cc_f16: Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}use_dispatch_ptr: Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt Index: test/CodeGen/AMDGPU/captured-frame-index.ll =================================================================== --- test/CodeGen/AMDGPU/captured-frame-index.ll +++ test/CodeGen/AMDGPU/captured-frame-index.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-- -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}store_fi_lifetime: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} Index: test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- test/CodeGen/AMDGPU/collapse-endcf.ll +++ test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}simple_nested_if: ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] Index: test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop.ll +++ test/CodeGen/AMDGPU/ctpop.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s Index: test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop64.ll +++ test/CodeGen/AMDGPU/ctpop64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -12,7 +12,7 @@ ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid + %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid %load = load i8, i8 addrspace(1)* %gep, align 1 %cvt = uitofp i8 %load to float store float %cvt, float addrspace(1)* %out, align 4 @@ -26,7 +26,7 @@ ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 @@ -42,7 +42,7 @@ ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4 %cvt = uitofp <3 x i8> %load to <3 x float> store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 @@ -60,7 +60,7 @@ ; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 @@ -86,7 +86,7 @@ ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 @@ -135,7 +135,7 @@ ; GCN: s_endpgm define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1 %cvt = uitofp <7 x i8> %load to <7 x float> store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 @@ -160,7 +160,7 @@ ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 @@ -174,7 +174,7 @@ ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %load = load i32, i32 addrspace(1)* %gep, align 4 %add = add i32 %load, 2 %inreg = and i32 %add, 255 @@ -186,7 +186,7 @@ ; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32: define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %load = load i32, i32 addrspace(1)* %gep, align 4 %inreg = and i32 %load, 65280 %shr = lshr i32 %inreg, 8 @@ -200,7 +200,7 @@ ; GCN-LABEL: {{^}}i8_zext_i32_to_f32: define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid + %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid %load = load i8, i8 addrspace(1)* %gep, align 1 %ext = zext i8 %load to i32 %cvt = uitofp i32 %ext to float @@ -211,7 +211,7 @@ ; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32: define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 %ext = zext <4 x i8> %load to <4 x i32> %cvt = uitofp <4 x i32> %ext to <4 x float> @@ -226,7 +226,7 @@ ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %and = and i32 %val, 255 %cvt = uitofp i32 %and to float @@ -241,7 +241,7 @@ ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 8 %and = and i32 %srl, 255 @@ -257,7 +257,7 @@ ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 16 %and = and i32 %srl, 255 @@ -273,7 +273,7 @@ ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 24 %and = and i32 %srl, 255 Index: test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f16.ll +++ test/CodeGen/AMDGPU/fabs.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s Index: test/CodeGen/AMDGPU/fadd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.f16.ll +++ test/CodeGen/AMDGPU/fadd.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fadd_f16 Index: test/CodeGen/AMDGPU/fcopysign.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f16.ll +++ test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.copysign.f16(half, half) declare float @llvm.copysign.f32(float, float) Index: test/CodeGen/AMDGPU/fence-amdgiz.ll =================================================================== --- test/CodeGen/AMDGPU/fence-amdgiz.ll +++ test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -1,7 +1,6 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=kaveri < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn-amd-amdhsa-amdgizcl" ; CHECK-LABEL: atomic_fence ; CHECK: BB#0: Index: test/CodeGen/AMDGPU/fmax3.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax3.f64.ll +++ test/CodeGen/AMDGPU/fmax3.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare double @llvm.maxnum.f64(double, double) nounwind readnone Index: test/CodeGen/AMDGPU/fmul.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmul.f16.ll +++ test/CodeGen/AMDGPU/fmul.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fmul_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fpext.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fpext.f16.ll +++ test/CodeGen/AMDGPU/fpext.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}fpext_f16_to_f32 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fptosi.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fptosi.f16.ll +++ test/CodeGen/AMDGPU/fptosi.f16.ll @@ -57,8 +57,8 @@ ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] -; SI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] -; SI: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] +; SI-DAG: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] +; SI-DAG: v_and_b32_e32 v[[R_I16_LO:[0-9]+]], 0xffff, v[[R_I16_0]] ; SI: v_lshlrev_b32_e32 v[[R_I16_HI:[0-9]+]], 16, v[[R_I16_1]] ; SI: v_or_b32_e32 v[[R_V2_I16:[0-9]+]], v[[R_I16_LO]], v[[R_I16_HI]] Index: test/CodeGen/AMDGPU/fptoui.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fptoui.f16.ll +++ test/CodeGen/AMDGPU/fptoui.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fptoui_f16_to_i16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/frame-index-amdgiz.ll =================================================================== --- test/CodeGen/AMDGPU/frame-index-amdgiz.ll +++ test/CodeGen/AMDGPU/frame-index-amdgiz.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn---amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s ; ; The original OpenCL kernel: ; kernel void f(global int *a, int i, int j) { @@ -6,10 +6,9 @@ ; x[i] = 7; ; a[0] = x[j]; ; } -; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - +; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -target triple = "amdgcn---amdgiz" define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 { entry: Index: test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/frame-index-elimination.ll +++ test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Test that non-entry function frame indices are expanded properly to ; give an index relative to the scratch wave offset register Index: test/CodeGen/AMDGPU/fsub.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fsub.f16.ll +++ test/CodeGen/AMDGPU/fsub.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fsub_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/half.ll =================================================================== --- test/CodeGen/AMDGPU/half.ll +++ test/CodeGen/AMDGPU/half.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; half args should be promoted to float for SI and lower. Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s ; FIXME: Merge into indirect-addressing-si.ll Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. Index: test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- test/CodeGen/AMDGPU/insert_vector_elt.ll +++ test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s ; FIXME: Broken on evergreen Index: test/CodeGen/AMDGPU/llvm.ceil.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.ceil.f16.ll +++ test/CodeGen/AMDGPU/llvm.ceil.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.ceil.f16(half %a) declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.cos.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.cos.f16(half %a) declare <2 x half> @llvm.cos.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.exp2.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.exp2.f16.ll +++ test/CodeGen/AMDGPU/llvm.exp2.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.exp2.f16(half %a) declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.floor.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.floor.f16.ll +++ test/CodeGen/AMDGPU/llvm.floor.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.floor.f16(half %a) declare <2 x half> @llvm.floor.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.fma.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.fma.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: test/CodeGen/AMDGPU/llvm.log2.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.log2.f16.ll +++ test/CodeGen/AMDGPU/llvm.log2.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.log2.f16(half %a) declare <2 x half> @llvm.log2.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.maxnum.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.maxnum.f16(half %a, half %b) declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) Index: test/CodeGen/AMDGPU/llvm.memcpy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.memcpy.ll +++ test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind Index: test/CodeGen/AMDGPU/llvm.minnum.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.minnum.f16(half %a, half %b) declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) Index: test/CodeGen/AMDGPU/llvm.rint.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.rint.f16.ll +++ test/CodeGen/AMDGPU/llvm.rint.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.rint.f16(half %a) declare <2 x half> @llvm.rint.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.round.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.ll +++ test/CodeGen/AMDGPU/llvm.round.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s Index: test/CodeGen/AMDGPU/llvm.sin.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.sin.f16(half %a) declare <2 x half> @llvm.sin.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.sqrt.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sqrt.f16.ll +++ test/CodeGen/AMDGPU/llvm.sqrt.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.sqrt.f16(half %a) declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.trunc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.trunc.f16.ll +++ test/CodeGen/AMDGPU/llvm.trunc.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.trunc.f16(half %a) declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir =================================================================== --- test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass machine-scheduler -o - %s | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: cluster_add_addc # GCN: S_NOP 0, implicit-def %vcc Index: test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- test/CodeGen/AMDGPU/merge-stores.ll +++ test/CodeGen/AMDGPU/merge-stores.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; This test is mostly to test DAG store merging, so disable the vectorizer. ; Run with devices with different unaligned load restrictions. Index: test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 Index: test/CodeGen/AMDGPU/promote-alloca-no-opts.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-no-opts.ll +++ test/CodeGen/AMDGPU/promote-alloca-no-opts.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s -; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=NOOPTS -check-prefix=ALL %s +; RUN: llc -O1 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=+promote-alloca < %s | FileCheck -check-prefix=OPTS -check-prefix=ALL %s ; ALL-LABEL: {{^}}promote_alloca_i32_array_array: ; NOOPTS: workgroup_group_segment_byte_size = 0{{$}} Index: test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN %s ; This shows that the amount of LDS estimate is sensitive to the order ; of the LDS globals. Index: test/CodeGen/AMDGPU/s_addk_i32.ll =================================================================== --- test/CodeGen/AMDGPU/s_addk_i32.ll +++ test/CodeGen/AMDGPU/s_addk_i32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_addk_i32_k0: Index: test/CodeGen/AMDGPU/s_mulk_i32.ll =================================================================== --- test/CodeGen/AMDGPU/s_mulk_i32.ll +++ test/CodeGen/AMDGPU/s_mulk_i32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_mulk_i32_k0: Index: test/CodeGen/AMDGPU/sad.ll =================================================================== --- test/CodeGen/AMDGPU/sad.ll +++ test/CodeGen/AMDGPU/sad.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}v_sad_u32_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll +++ test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI -check-prefix=GCN %s ; FUNC-LABEL: {{^}}cluster_arg_loads: Index: test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll +++ test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s Index: test/CodeGen/AMDGPU/select-vectors.ll =================================================================== --- test/CodeGen/AMDGPU/select-vectors.ll +++ test/CodeGen/AMDGPU/select-vectors.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; Test expansion of scalar selects on vectors. ; Evergreen not enabled since it seems to be having problems with doubles. Index: test/CodeGen/AMDGPU/select.f16.ll =================================================================== --- test/CodeGen/AMDGPU/select.f16.ll +++ test/CodeGen/AMDGPU/select.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}select_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/setcc-fneg-constant.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-fneg-constant.ll +++ test/CodeGen/AMDGPU/setcc-fneg-constant.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine. Index: test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Extract the high bit of the 1st quarter ; GCN-LABEL: {{^}}v_uextract_bit_31_i128: Index: test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll +++ test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll @@ -1,4 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; FIXME: Fails with -enable-var-scope ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half. Index: test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_constant.ll +++ test/CodeGen/AMDGPU/shl_add_constant.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -56,7 +56,7 @@ ; FUNC-LABEL: {{^}}test_add_shl_add_constant: ; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3 +; SI-DAG: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3 ; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], [[Y]] ; SI: s_addk_i32 [[RESULT]], 0x3d8 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] Index: test/CodeGen/AMDGPU/sitofp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/sitofp.f16.ll +++ test/CodeGen/AMDGPU/sitofp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}sitofp_i16_to_f16 ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]] Index: test/CodeGen/AMDGPU/trunc.ll =================================================================== --- test/CodeGen/AMDGPU/trunc.ll +++ test/CodeGen/AMDGPU/trunc.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s Index: test/CodeGen/AMDGPU/uitofp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/uitofp.f16.ll +++ test/CodeGen/AMDGPU/uitofp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uitofp_i16_to_f16 ; GCN: buffer_load_ushort v[[A_I16:[0-9]+]] Index: test/CodeGen/AMDGPU/v_mac_f16.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac_f16.ll +++ test/CodeGen/AMDGPU/v_mac_f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mac_f16: Index: test/CodeGen/AMDGPU/vector-extract-insert.ll =================================================================== --- test/CodeGen/AMDGPU/vector-extract-insert.ll +++ test/CodeGen/AMDGPU/vector-extract-insert.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Test that when extracting the same unknown vector index from an ; insertelement the dynamic indexing is folded away. @@ -81,4 +81,4 @@ } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } \ No newline at end of file +attributes #1 = { nounwind }