Index: test/CodeGen/AMDGPU/32-bit-local-address-space.ll =================================================================== --- test/CodeGen/AMDGPU/32-bit-local-address-space.ll +++ test/CodeGen/AMDGPU/32-bit-local-address-space.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and ; the global address space(1) uses 64-bit pointers. These tests check to make sure Index: test/CodeGen/AMDGPU/InlineAsmCrash.ll =================================================================== --- test/CodeGen/AMDGPU/InlineAsmCrash.ll +++ test/CodeGen/AMDGPU/InlineAsmCrash.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; CHECK: ;;#ASMSTART ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: ;;#ASMEND -define void @foo(i32* %ptr) { +define void @foo(i32 addrspace(5)* %ptr) { %tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2) %tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0 - store i32 %tmp2, i32* %ptr, align 4 + store i32 %tmp2, i32 addrspace(5)* %ptr, align 4 ret void } Index: test/CodeGen/AMDGPU/add-debug.ll =================================================================== --- test/CodeGen/AMDGPU/add-debug.ll +++ test/CodeGen/AMDGPU/add-debug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug -; RUN: llc < %s -march=amdgcn -mcpu=tonga -debug +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -debug +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -debug ; REQUIRES: asserts ; Check that SelectionDAGDumper does not crash on int_SI_if. Index: test/CodeGen/AMDGPU/add.i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.i16.ll +++ test/CodeGen/AMDGPU/add.i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_i16: Index: test/CodeGen/AMDGPU/add.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.v2i16.ll +++ test/CodeGen/AMDGPU/add.v2i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16: Index: test/CodeGen/AMDGPU/add_i128.ll =================================================================== --- test/CodeGen/AMDGPU/add_i128.ll +++ test/CodeGen/AMDGPU/add_i128.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_i128_vreg: ; GCN: v_add_i32_e32 v[[LO:[0-9]+]], vcc, Index: test/CodeGen/AMDGPU/add_i64.ll =================================================================== --- test/CodeGen/AMDGPU/add_i64.ll +++ test/CodeGen/AMDGPU/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() readnone Index: test/CodeGen/AMDGPU/addrspacecast-captured.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast-captured.ll +++ test/CodeGen/AMDGPU/addrspacecast-captured.ll @@ -1,45 +1,46 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "A5" ; Nothing should be done if the addrspacecast is captured. declare void @consume_ptr2int(i32) #0 ; CHECK-LABEL: @addrspacecast_captured( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: %ptr2int = ptrtoint i32* %cast to i32 ; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + %ptr2int = ptrtoint i32* %cast to i32 store i32 %ptr2int, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: @addrspacecast_captured_store( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out -define amdgpu_kernel void @addrspacecast_captured_store(i32 addrspace(4)* addrspace(1)* %out) #0 { +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: store i32* %cast, i32* addrspace(1)* %out +define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + store i32* %cast, i32* addrspace(1)* %out ret void } ; CHECK-LABEL: @addrspacecast_captured_call( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: %ptr2int = ptrtoint i32* %cast to i32 ; CHECK: call void @consume_ptr2int(i32 %ptr2int) define amdgpu_kernel void @addrspacecast_captured_call() #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + %ptr2int = ptrtoint i32* %cast to i32 call void @consume_ptr2int(i32 %ptr2int) ret void } Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s +target datalayout = "A5" ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -35,8 +36,8 @@ ; CI: NumSgprs: {{[0-9][0-9]+}} ; GFX9: NumSgprs: {{[0-9]+}} define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 7, i32* %stof ret void } @@ -73,9 +74,9 @@ ; CI: NumSgprs: {{[0-9][0-9]+}} ; GFX9: NumSgprs: {{[0-9]+}} -define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 { - %stof = addrspacecast i32* %ptr to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %stof +define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 { + %stof = addrspacecast i32 addrspace(5)* %ptr to i32* + store volatile i32 7, i32* %stof ret void } @@ -89,8 +90,8 @@ ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(1)* %ptr to i32* + store volatile i32 7, i32* %stof ret void } @@ -101,8 +102,8 @@ ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}} define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 { - %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* - %ld = load volatile i32, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(2)* %ptr to i32* + %ld = load volatile i32, i32* %stof ret void } @@ -117,8 +118,8 @@ ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: ds_write_b32 [[CASTPTR]], v[[K]] -define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* +define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* store volatile i32 0, i32 addrspace(3)* %ftos ret void } @@ -134,9 +135,9 @@ ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* - store volatile i32 0, i32* %ftos +define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* + store volatile i32 0, i32 addrspace(5)* %ftos ret void } @@ -148,8 +149,8 @@ ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0 ; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] -define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* +define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* store volatile i32 0, i32 addrspace(1)* %ftos ret void } @@ -159,8 +160,8 @@ ; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0 ; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0 -define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* +define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(2)* load volatile i32, i32 addrspace(2)* %ftos ret void } @@ -178,8 +179,8 @@ ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %cast + %cast = addrspacecast i32 addrspace(3)* null to i32* + store volatile i32 7, i32* %cast ret void } @@ -188,7 +189,7 @@ ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: ds_write_b32 [[PTR]], [[K]] define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)* + %cast = addrspacecast i32* null to i32 addrspace(3)* store volatile i32 7, i32 addrspace(3)* %cast ret void } @@ -199,8 +200,8 @@ ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %cast + %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32* + store volatile i32 7, i32* %cast ret void } @@ -209,7 +210,7 @@ ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: ds_write_b32 [[PTR]], [[K]] define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)* + %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)* store volatile i32 7, i32 addrspace(3)* %cast ret void } @@ -224,8 +225,8 @@ ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { - %cast = addrspacecast i32* null to i32 addrspace(4)* - store volatile i32 7, i32 addrspace(4)* %cast + %cast = addrspacecast i32 addrspace(5)* null to i32* + store volatile i32 7, i32* %cast ret void } @@ -233,8 +234,8 @@ ; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { - %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)* - store volatile i32 7, i32* %cast + %cast = addrspacecast i32* null to i32 addrspace(5)* + store volatile i32 7, i32 addrspace(5)* %cast ret void } @@ -250,17 +251,17 @@ br i1 %cmp, label %local, label %global local: - %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)* + %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32* br label %end global: - %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* + %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32* br label %end end: - %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ] - store volatile i32 %x, i32 addrspace(4)* %fptr, align 4 -; %val = load i32, i32 addrspace(4)* %fptr, align 4 + %fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ] + store volatile i32 %x, i32* %fptr, align 4 +; %val = load i32, i32* %fptr, align 4 ; store i32 %val, i32 addrspace(1)* %out, align 4 ret void } @@ -278,14 +279,14 @@ ; HSA: s_barrier ; HSA: {{flat|global}}_load_dword define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { - %alloca = alloca i32, i32 9, align 4 + %alloca = alloca i32, i32 9, align 4, addrspace(5) %x = call i32 @llvm.amdgcn.workitem.id.x() #2 - %pptr = getelementptr i32, i32* %alloca, i32 %x - %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* - store volatile i32 %x, i32 addrspace(4)* %fptr + %pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x + %fptr = addrspacecast i32 addrspace(5)* %pptr to i32* + store volatile i32 %x, i32* %fptr ; Dummy call call void @llvm.amdgcn.s.barrier() #1 - %reload = load volatile i32, i32 addrspace(4)* %fptr, align 4 + %reload = load volatile i32, i32* %fptr, align 4 store volatile i32 %reload, i32 addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/AMDGPU/alignbit-pat.ll =================================================================== --- test/CodeGen/AMDGPU/alignbit-pat.ll +++ test/CodeGen/AMDGPU/alignbit-pat.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}alignbit_shr_pat: ; GCN-DAG: s_load_dword s[[SHR:[0-9]+]] Index: test/CodeGen/AMDGPU/amdgcn.bitcast.ll =================================================================== --- test/CodeGen/AMDGPU/amdgcn.bitcast.ll +++ test/CodeGen/AMDGPU/amdgcn.bitcast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. Index: test/CodeGen/AMDGPU/amdgcn.private-memory.ll =================================================================== --- test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -1,9 +1,10 @@ -; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s -; RUN: llc -mattr=+promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE -check-prefix=HSA %s -; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s -; RUN: llc -mattr=-promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA -check-prefix=HSA %s -; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s -; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s +; RUN: llc -mattr=+promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE -check-prefix=HSA %s +; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +; RUN: llc -mattr=-promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA -check-prefix=HSA %s +; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s +; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +target datalayout = "A5" declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -17,13 +18,13 @@ ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: - %0 = alloca [2 x i32] - %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0 - %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1 - store i32 0, i32* %1 - store i32 1, i32* %2 - %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in - %4 = load i32, i32* %3 + %0 = alloca [2 x i32], addrspace(5) + %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 + %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %1 + store i32 1, i32 addrspace(5)* %2 + %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in + %4 = load i32, i32 addrspace(5)* %3 %5 = call i32 @llvm.amdgcn.workitem.id.x() %6 = add i32 %4, %5 store i32 %6, i32 addrspace(1)* %out Index: test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -1,9 +1,9 @@ -; RUN: opt -mtriple=amdgcn-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=r600---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; CHECK: NoAlias: i8 addrspace(1)* %p1, i8* %p +; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p -define void @test(i8* %p, i8 addrspace(1)* %p1) { +define void @test(i8 addrspace(5)* %p, i8 addrspace(1)* %p1) { ret void } Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-codegenprepare %s | FileCheck %s ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s ; Make sure this doesn't crash with no triple Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: @add_i3( ; SI: %r = add i3 %a, %b Index: test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll +++ test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}shader_cc: Index: test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; Legacy intrinsics that just read implicit parameters Index: test/CodeGen/AMDGPU/amdpal-cs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-cs.ll +++ test/CodeGen/AMDGPU/amdpal-cs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata ; GCN-LABEL: {{^}}cs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-es.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-es.ll +++ test/CodeGen/AMDGPU/amdpal-es.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata ; GCN-LABEL: {{^}}es_amdpal: Index: test/CodeGen/AMDGPU/amdpal-gs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-gs.ll +++ test/CodeGen/AMDGPU/amdpal-gs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata ; GCN-LABEL: {{^}}gs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-hs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-hs.ll +++ test/CodeGen/AMDGPU/amdpal-hs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata ; GCN-LABEL: {{^}}hs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-ls.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-ls.ll +++ test/CodeGen/AMDGPU/amdpal-ls.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata ; GCN-LABEL: {{^}}ls_amdpal: Index: test/CodeGen/AMDGPU/amdpal-psenable.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-psenable.ll +++ test/CodeGen/AMDGPU/amdpal-psenable.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; This pixel shader does not use the result of its interpolation, so it would ; end up with an interpolation mode set in PSAddr but not PSEnable. This test tests Index: test/CodeGen/AMDGPU/amdpal-vs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-vs.ll +++ test/CodeGen/AMDGPU/amdpal-vs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata ; GCN-LABEL: {{^}}vs_amdpal: Index: test/CodeGen/AMDGPU/amdpal.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal.ll +++ test/CodeGen/AMDGPU/amdpal.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefix=PAL --enable-var-scope %s +; RUN: llc < %s -mtriple=amdgcn--amdpal-amdgiz -mcpu=tahiti | FileCheck --check-prefix=PAL --enable-var-scope %s +target datalayout = "A5" ; PAL: .AMDGPU.config @@ -17,14 +18,14 @@ ; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]: ; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: -define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32* %out) { +define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) { entry: - %v = alloca [2 x i32] - %vv = bitcast [2 x i32]* %v to <2 x i32>* - store <2 x i32> %in, <2 x i32>* %vv - %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx - %x = load i32, i32* %e - store i32 %x, i32* %out + %v = alloca [2 x i32], addrspace(5) + %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)* + store <2 x i32> %in, <2 x i32> addrspace(5)* %vv + %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx + %x = load i32, i32 addrspace(5)* %e + store i32 %x, i32 addrspace(5)* %out ret void } @@ -41,14 +42,14 @@ ; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]: ; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: -define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32* %out) #0 { +define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32 addrspace(5)* %out) #0 { entry: - %v = alloca [2 x i32] - %vv = bitcast [2 x i32]* %v to <2 x i32>* - store <2 x i32> %in, <2 x i32>* %vv - %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx - %x = load i32, i32* %e - store i32 %x, i32* %out + %v = alloca [2 x i32], addrspace(5) + %vv = bitcast [2 x i32] addrspace(5)* %v to <2 x i32> addrspace(5)* + store <2 x i32> %in, <2 x i32> addrspace(5)* %vv + %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v, i32 0, i32 %idx + %x = load i32, i32 addrspace(5)* %e + store i32 %x, i32 addrspace(5)* %out ret void } Index: test/CodeGen/AMDGPU/and-gcn.ll =================================================================== --- test/CodeGen/AMDGPU/and-gcn.ll +++ test/CodeGen/AMDGPU/and-gcn.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_and_i64_br: ; SI: s_and_b64 Index: test/CodeGen/AMDGPU/and.ll =================================================================== --- test/CodeGen/AMDGPU/and.ll +++ test/CodeGen/AMDGPU/and.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -186,22 +186,22 @@ ; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 { define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 { define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 { define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof call void @func_indirect_use_queue_ptr() ret void } Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa-amdgiz -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -176,57 +176,57 @@ ; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 { -define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 { - %stof = addrspacecast i32* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof +; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { +define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { + %stof = addrspacecast i32 addrspace(5)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* +; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* store volatile i32 0, i32 addrspace(3)* %ftos ret void } -; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* - store volatile i32 0, i32* %ftos +; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* + store volatile i32 0, i32 addrspace(5)* %ftos ret void } ; No-op addrspacecast should not use queue ptr ; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(1)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* - %ld = load volatile i32, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(2)* %ptr to i32* + %ld = load volatile i32, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* +; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* store volatile i32 0, i32 addrspace(1)* %ftos ret void } -; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* +; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(2)* %ld = load volatile i32, i32 addrspace(2)* %ftos ret void } Index: test/CodeGen/AMDGPU/annotate-kernel-features.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown-amdgiz -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 Index: test/CodeGen/AMDGPU/anonymous-gv.ll =================================================================== --- test/CodeGen/AMDGPU/anonymous-gv.ll +++ test/CodeGen/AMDGPU/anonymous-gv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji | FileCheck %s ; Make sure we don't crash on a global variable with no name. @0 = external addrspace(1) global i32 Index: test/CodeGen/AMDGPU/any_extend_vector_inreg.ll =================================================================== --- test/CodeGen/AMDGPU/any_extend_vector_inreg.ll +++ test/CodeGen/AMDGPU/any_extend_vector_inreg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}any_extend_vector_inreg_v16i8_to_v4i32: ; GCN: s_load_dwordx4 Index: test/CodeGen/AMDGPU/anyext.ll =================================================================== --- test/CodeGen/AMDGPU/anyext.ll +++ test/CodeGen/AMDGPU/anyext.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -1,5 +1,6 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +target datalayout = "A5" declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 @@ -20,12 +21,12 @@ ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how ; to interpret: -; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b +; getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 { - %alloca = alloca [16 x i32], align 16 + %alloca = alloca [16 x i32], align 16, addrspace(5) %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0); %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid @@ -33,11 +34,11 @@ %a = load i32, i32 addrspace(1)* %a_ptr %b = load i32, i32 addrspace(1)* %b_ptr %result = add i32 %a, %b - %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b - store i32 %result, i32* %alloca_ptr, align 4 + %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b + store i32 %result, i32 addrspace(5)* %alloca_ptr, align 4 ; Dummy call call void @llvm.amdgcn.s.barrier() - %reload = load i32, i32* %alloca_ptr, align 4 + %reload = load i32, i32 addrspace(5)* %alloca_ptr, align 4 %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 ret void Index: test/CodeGen/AMDGPU/array-ptr-calc-i64.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i64.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 Index: test/CodeGen/AMDGPU/ashr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/ashr.v2i16.ll +++ test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s ; GCN-LABEL: {{^}}s_ashr_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] Index: test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll =================================================================== --- test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll +++ test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: ; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb Index: test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=HSAMD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 Index: test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s ; If spilling to smem, additional registers are used for the resource ; descriptor. @@ -65,7 +65,7 @@ ; %x.3 = call i64 @llvm.amdgcn.dispatch.id() ; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() ; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() -; store volatile i32 0, i32* undef +; store volatile i32 0, i32 addrspace(5)* undef ; br label %stores ; ;stores: @@ -100,7 +100,7 @@ ; i32 addrspace(1)* %out3, ; i32 addrspace(1)* %out4, ; i32 %one, i32 %two, i32 %three, i32 %four) #2 { -; store volatile i32 0, i32* undef +; store volatile i32 0, i32 addrspace(5)* undef ; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() ; store volatile i32 %x.0, i32 addrspace(1)* undef ; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() Index: test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s @var = addrspace(1) global float 0.0 Index: test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s ; Exactly 1 wave per execution unit. ; CHECK-LABEL: {{^}}empty_exactly_1: Index: test/CodeGen/AMDGPU/attr-unparseable.ll =================================================================== --- test/CodeGen/AMDGPU/attr-unparseable.ll +++ test/CodeGen/AMDGPU/attr-unparseable.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: can't parse integer attribute amdgpu-num-sgpr define amdgpu_kernel void @unparseable_single_0() #0 { Index: test/CodeGen/AMDGPU/barrier-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/barrier-elimination.ll +++ test/CodeGen/AMDGPU/barrier-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck %s ; CHECK-LABEL: {{^}}unknown_wgs: ; CHECK: s_barrier Index: test/CodeGen/AMDGPU/basic-branch.ll =================================================================== --- test/CodeGen/AMDGPU/basic-branch.ll +++ test/CodeGen/AMDGPU/basic-branch.ll @@ -1,7 +1,7 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_branch: ; GCNNOOPT: v_writelane_b32 Index: test/CodeGen/AMDGPU/basic-call-return.ll =================================================================== --- test/CodeGen/AMDGPU/basic-call-return.ll +++ test/CodeGen/AMDGPU/basic-call-return.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s define void @void_func_void() #2 { ret void Index: test/CodeGen/AMDGPU/basic-loop.ll =================================================================== --- test/CodeGen/AMDGPU/basic-loop.ll +++ test/CodeGen/AMDGPU/basic-loop.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck %s -; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: define amdgpu_kernel void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { Index: test/CodeGen/AMDGPU/bfe-combine.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-combine.ll +++ test/CodeGen/AMDGPU/bfe-combine.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s -; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s ; GCN-LABEL: {{^}}bfe_combine8: ; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 8, 8 Index: test/CodeGen/AMDGPU/bfe-patterns.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-patterns.ll +++ test/CodeGen/AMDGPU/bfe-patterns.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}v_ubfe_sub_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] Index: test/CodeGen/AMDGPU/bfe_uint.ll =================================================================== --- test/CodeGen/AMDGPU/bfe_uint.ll +++ test/CodeGen/AMDGPU/bfe_uint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}bfe_def: ; CHECK: BFE_UINT Index: test/CodeGen/AMDGPU/bfm.ll =================================================================== --- test/CodeGen/AMDGPU/bfm.ll +++ test/CodeGen/AMDGPU/bfm.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}bfm_pattern: ; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} Index: test/CodeGen/AMDGPU/big_alu.ll =================================================================== --- test/CodeGen/AMDGPU/big_alu.ll +++ test/CodeGen/AMDGPU/big_alu.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cedar < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cedar < %s ; This test ensures that R600 backend can handle ifcvt properly Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll =================================================================== --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; The bitcast should be pushed through the bitcasts so the vectors can ; be broken down and the shared components can be CSEd Index: test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll =================================================================== --- test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll +++ test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Test that materialization constants that are the bit reversed of ; inline immediates are replaced with bfrev of the inline immediate to Index: test/CodeGen/AMDGPU/bitreverse.ll =================================================================== --- test/CodeGen/AMDGPU/bitreverse.ll +++ test/CodeGen/AMDGPU/bitreverse.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/br_cc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/br_cc.f16.ll +++ test/CodeGen/AMDGPU/br_cc.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}br_cc_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/branch-condition-and.ll =================================================================== --- test/CodeGen/AMDGPU/branch-condition-and.ll +++ test/CodeGen/AMDGPU/branch-condition-and.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; This used to crash because during intermediate control flow lowering, there ; was a sequence Index: test/CodeGen/AMDGPU/branch-relax-bundle.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relax-bundle.ll +++ test/CodeGen/AMDGPU/branch-relax-bundle.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 < %s | FileCheck -check-prefix=GCN %s ; Restrict maximum branch to between +15 and -16 dwords Index: test/CodeGen/AMDGPU/branch-relax-spill.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relax-spill.ll +++ test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s ; FIXME: This should be able to compile, but requires inserting an ; extra block to restore the scavenged register. Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. ; See PR33579. -; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s ; RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=OBJ %s ; OBJ: Relocations [ Index: test/CodeGen/AMDGPU/branch-uniformity.ll =================================================================== --- test/CodeGen/AMDGPU/branch-uniformity.ll +++ test/CodeGen/AMDGPU/branch-uniformity.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; The branch instruction in LOOP49 has a uniform condition, but PHI instructions ; introduced by the structurizecfg pass previously caused a false divergence Index: test/CodeGen/AMDGPU/bswap.ll =================================================================== --- test/CodeGen/AMDGPU/bswap.ll +++ test/CodeGen/AMDGPU/bswap.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone Index: test/CodeGen/AMDGPU/bug-vopc-commute.ll =================================================================== --- test/CodeGen/AMDGPU/bug-vopc-commute.ll +++ test/CodeGen/AMDGPU/bug-vopc-commute.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; Index: test/CodeGen/AMDGPU/build_vector.ll =================================================================== --- test/CodeGen/AMDGPU/build_vector.ll +++ test/CodeGen/AMDGPU/build_vector.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI ; R600: {{^}}build_vector2: ; R600: MOV Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -1,5 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +target datalayout = "A5" %struct.ByValStruct = type { [4 x i32] } @@ -14,16 +15,16 @@ ; GCN-NOT: s32 ; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}} ; GCN-NOT: s32 -define void @void_func_byval_struct(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { +define void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { entry: - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 - %tmp = load volatile i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 + %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 %add = add nsw i32 %tmp, 1 - store volatile i32 %add, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 - %tmp1 = load volatile i32, i32* %arrayidx2, align 4 + store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 + %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 %add3 = add nsw i32 %tmp1, 2 - store volatile i32 %add3, i32* %arrayidx2, align 4 + store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 store volatile i32 9, i32 addrspace(1)* null, align 4 ret void } @@ -54,17 +55,17 @@ ; GCN: buffer_load_dword v33, ; GCN: s_sub_u32 s32, s32, 0xb00{{$}} ; GCN: s_setpc_b64 -define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { +define void @void_func_byval_struct_non_leaf(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 { entry: - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 - %tmp = load volatile i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 + %tmp = load volatile i32, i32 addrspace(5)* %arrayidx, align 4 %add = add nsw i32 %tmp, 1 - store volatile i32 %add, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 - %tmp1 = load volatile i32, i32* %arrayidx2, align 4 + store volatile i32 %add, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 + %tmp1 = load volatile i32, i32 addrspace(5)* %arrayidx2, align 4 %add3 = add nsw i32 %tmp1, 2 call void @external_void_func_void() - store volatile i32 %add3, i32* %arrayidx2, align 4 + store volatile i32 %add3, i32 addrspace(5)* %arrayidx2, align 4 store volatile i32 9, i32 addrspace(1)* null, align 4 ret void } @@ -114,19 +115,19 @@ ; GCN-NEXT: s_setpc_b64 define void @call_void_func_byval_struct_func() #0 { entry: - %arg0 = alloca %struct.ByValStruct, align 4 - %arg1 = alloca %struct.ByValStruct, align 4 - %tmp = bitcast %struct.ByValStruct* %arg0 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) - %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 - store volatile i32 9, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 - store volatile i32 13, i32* %arrayidx2, align 4 - call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) + %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) + %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) + %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp) + %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp1) + %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 + store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 + store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 + call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp) ret void } @@ -167,45 +168,45 @@ ; GCN: s_endpgm define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { entry: - %arg0 = alloca %struct.ByValStruct, align 4 - %arg1 = alloca %struct.ByValStruct, align 4 - %tmp = bitcast %struct.ByValStruct* %arg0 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) - %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 - store volatile i32 9, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 - store volatile i32 13, i32* %arrayidx2, align 4 - call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) + %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) + %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) + %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp) + %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp1) + %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 + store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 + store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 + call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp) ret void } ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel_no_frame_pointer_elim: define amdgpu_kernel void @call_void_func_byval_struct_kernel_no_frame_pointer_elim() #2 { entry: - %arg0 = alloca %struct.ByValStruct, align 4 - %arg1 = alloca %struct.ByValStruct, align 4 - %tmp = bitcast %struct.ByValStruct* %arg0 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp) - %tmp1 = bitcast %struct.ByValStruct* %arg1 to i8* - call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp1) - %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg0, i32 0, i32 0, i32 0 - store volatile i32 9, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct* %arg1, i32 0, i32 0, i32 0 - store volatile i32 13, i32* %arrayidx2, align 4 - call void @void_func_byval_struct(%struct.ByValStruct* byval nonnull align 4 %arg0, %struct.ByValStruct* byval nonnull align 4 %arg1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp1) - call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp) + %arg0 = alloca %struct.ByValStruct, align 4, addrspace(5) + %arg1 = alloca %struct.ByValStruct, align 4, addrspace(5) + %tmp = bitcast %struct.ByValStruct addrspace(5)* %arg0 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp) + %tmp1 = bitcast %struct.ByValStruct addrspace(5)* %arg1 to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 32, i8 addrspace(5)* %tmp1) + %arrayidx = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg0, i32 0, i32 0, i32 0 + store volatile i32 9, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.ByValStruct, %struct.ByValStruct addrspace(5)* %arg1, i32 0, i32 0, i32 0 + store volatile i32 13, i32 addrspace(5)* %arrayidx2, align 4 + call void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg0, %struct.ByValStruct addrspace(5)* byval nonnull align 4 %arg1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp1) + call void @llvm.lifetime.end.p0i8(i64 32, i8 addrspace(5)* %tmp) ret void } declare void @external_void_func_void() #0 -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #3 -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #3 +declare void @llvm.lifetime.start.p0i8(i64, i8 addrspace(5)* nocapture) #3 +declare void @llvm.lifetime.end.p0i8(i64, i8 addrspace(5)* nocapture) #3 attributes #0 = { nounwind } attributes #1 = { noinline norecurse nounwind } Index: test/CodeGen/AMDGPU/call-encoding.ll =================================================================== --- test/CodeGen/AMDGPU/call-encoding.ll +++ test/CodeGen/AMDGPU/call-encoding.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s -; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s +; XUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 Index: test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,6 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +target datalayout = "A5" ; Make sure to run a GPU with the SGPR allocation bug. @@ -132,24 +133,24 @@ ; GCN-LABEL: {{^}}use_stack0: ; GCN: ScratchSize: 2052 define void @use_stack0() #1 { - %alloca = alloca [512 x i32], align 4 - call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0 + %alloca = alloca [512 x i32], align 4, addrspace(5) + call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 ret void } ; GCN-LABEL: {{^}}use_stack1: ; GCN: ScratchSize: 404 define void @use_stack1() #1 { - %alloca = alloca [100 x i32], align 4 - call void asm sideeffect "; use $0", "v"([100 x i32]* %alloca) #0 + %alloca = alloca [100 x i32], align 4, addrspace(5) + call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0 ret void } ; GCN-LABEL: {{^}}indirect_use_stack: ; GCN: ScratchSize: 2124 define void @indirect_use_stack() #1 { - %alloca = alloca [16 x i32], align 4 - call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0 + %alloca = alloca [16 x i32], align 4, addrspace(5) + call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0 call void @use_stack0() ret void } @@ -201,8 +202,8 @@ ; GCN-LABEL: {{^}}direct_recursion_use_stack: ; GCN: ScratchSize: 2056 define void @direct_recursion_use_stack(i32 %val) #2 { - %alloca = alloca [512 x i32], align 4 - call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0 + %alloca = alloca [512 x i32], align 4, addrspace(5) + call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 %cmp = icmp eq i32 %val, 0 br i1 %cmp, label %ret, label %call Index: test/CodeGen/AMDGPU/call-preserved-registers.ll =================================================================== --- test/CodeGen/AMDGPU/call-preserved-registers.ll +++ test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @external_void_func_void() #0 Index: test/CodeGen/AMDGPU/call_fs.ll =================================================================== --- test/CodeGen/AMDGPU/call_fs.ll +++ test/CodeGen/AMDGPU/call_fs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s ; EG: .long 257 ; EG: {{^}}call_fs: Index: test/CodeGen/AMDGPU/callee-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/callee-frame-setup.ll +++ test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +target datalayout = "A5" ; GCN-LABEL: {{^}}callee_no_stack: ; GCN: ; BB#0: @@ -28,8 +29,8 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack() #0 { - %alloca = alloca i32 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca ret void } @@ -57,8 +58,8 @@ ; GCN: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_and_call() #0 { - %alloca = alloca i32 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca call void @external_void_func_void() ret void } Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -1,5 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +target datalayout = "A5" ; GCN-LABEL: {{^}}use_dispatch_ptr: ; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0 @@ -43,8 +44,8 @@ ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} define void @use_queue_ptr_addrspacecast() #1 { - %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %asc + %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* + store volatile i32 0, i32* %asc ret void } @@ -113,8 +114,8 @@ ; GCN: ; use s6 ; GCN: s_setpc_b64 define void @use_stack_workgroup_id_x() #1 { - %alloca = alloca i32 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca %val = call i32 @llvm.amdgcn.workgroup.id.x() call void asm sideeffect "; use $0", "s"(i32 %val) ret void @@ -432,8 +433,8 @@ ; GCN: ; use s15 ; GCN: ; use s16 define void @use_every_sgpr_input() #1 { - %alloca = alloca i32, align 4 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* @@ -512,8 +513,8 @@ ; GCN-DAG: s_mov_b32 s8, s16 ; GCN: s_swappc_b64 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { - %alloca = alloca i32, align 4 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* @@ -568,10 +569,10 @@ ; GCN: ; use [[SAVE_Y]] ; GCN: ; use [[SAVE_Z]] define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { - %alloca = alloca i32, align 4 + %alloca = alloca i32, align 4, addrspace(5) call void @use_workgroup_id_xyz() - store volatile i32 0, i32* %alloca + store volatile i32 0, i32 addrspace(5)* %alloca %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 %dispatch_ptr.bc = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +target datalayout = "A5" ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt @@ -368,7 +369,7 @@ i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, - i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32* byval %arg32) #1 { + i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 { %val = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %val, i32 addrspace(1)* undef @@ -407,7 +408,7 @@ store volatile i32 %arg29, i32 addrspace(1)* undef store volatile i32 %arg30, i32 addrspace(1)* undef store volatile i32 %arg31, i32 addrspace(1)* undef - %private = load volatile i32, i32* %arg32 + %private = load volatile i32, i32 addrspace(5)* %arg32 ret void } @@ -435,8 +436,8 @@ ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 { - %alloca = alloca i32, align 4 - store volatile i32 999, i32* %alloca + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 999, i32 addrspace(5)* %alloca call void @too_many_args_use_workitem_id_x_byval( i32 10, i32 20, i32 30, i32 40, i32 50, i32 60, i32 70, i32 80, @@ -446,7 +447,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32* %alloca) + i32 addrspace(5)* %alloca) ret void } @@ -460,8 +461,8 @@ ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], ; GCN: s_swappc_b64 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { - %alloca = alloca i32, align 4 - store volatile i32 999, i32* %alloca + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 999, i32 addrspace(5)* %alloca call void @too_many_args_use_workitem_id_x_byval( i32 10, i32 20, i32 30, i32 40, i32 50, i32 60, i32 70, i32 80, @@ -471,7 +472,7 @@ i32 210, i32 220, i32 230, i32 240, i32 250, i32 260, i32 270, i32 280, i32 290, i32 300, i32 310, i32 320, - i32* %alloca) + i32 addrspace(5)* %alloca) ret void } Index: test/CodeGen/AMDGPU/calling-conventions.ll =================================================================== --- test/CodeGen/AMDGPU/calling-conventions.ll +++ test/CodeGen/AMDGPU/calling-conventions.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Make sure we don't crash or assert on spir_kernel calling convention. Index: test/CodeGen/AMDGPU/captured-frame-index.ll =================================================================== --- test/CodeGen/AMDGPU/captured-frame-index.ll +++ test/CodeGen/AMDGPU/captured-frame-index.ll @@ -1,14 +1,15 @@ -; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +target datalayout = "A5" ; GCN-LABEL: {{^}}store_fi_lifetime: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[FI]] define amdgpu_kernel void @store_fi_lifetime(i32 addrspace(1)* %out, i32 %in) #0 { entry: - %b = alloca i8 - call void @llvm.lifetime.start.p0i8(i64 1, i8* %b) - store volatile i8* %b, i8* addrspace(1)* undef - call void @llvm.lifetime.end.p0i8(i64 1, i8* %b) + %b = alloca i8, addrspace(5) + call void @llvm.lifetime.start.p0i8(i64 1, i8 addrspace(5)* %b) + store volatile i8 addrspace(5)* %b, i8 addrspace(5)* addrspace(1)* undef + call void @llvm.lifetime.end.p0i8(i64 1, i8 addrspace(5)* %b) ret void } @@ -18,10 +19,10 @@ ; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}} ; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] ; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] -define amdgpu_kernel void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 { - %tmp = alloca float - store float 4.0, float *%tmp - store float* %tmp, float* addrspace(3)* %ptr +define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 { + %tmp = alloca float, addrspace(5) + store float 4.0, float addrspace(5)*%tmp + store float addrspace(5)* %tmp, float addrspace(5)* addrspace(3)* %ptr ret void } @@ -38,13 +39,13 @@ ; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 8{{$}} ; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]] -define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 { - %tmp0 = alloca float - %tmp1 = alloca float - store float 4.0, float* %tmp0 - store float 4.0, float* %tmp1 - store volatile float* %tmp0, float* addrspace(3)* %ptr - store volatile float* %tmp1, float* addrspace(3)* %ptr +define amdgpu_kernel void @stored_fi_to_lds_2_small_objects(float addrspace(5)* addrspace(3)* %ptr) #0 { + %tmp0 = alloca float, addrspace(5) + %tmp1 = alloca float, addrspace(5) + store float 4.0, float addrspace(5)* %tmp0 + store float 4.0, float addrspace(5)* %tmp1 + store volatile float addrspace(5)* %tmp0, float addrspace(5)* addrspace(3)* %ptr + store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(3)* %ptr ret void } @@ -55,12 +56,12 @@ ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[ZERO]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @stored_fi_to_self() #0 { - %tmp = alloca i32* + %tmp = alloca i32 addrspace(5)*, addrspace(5) ; Avoid optimizing everything out - store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp - %bitcast = bitcast i32** %tmp to i32* - store volatile i32* %bitcast, i32** %tmp + store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp + %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp to i32 addrspace(5)* + store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp ret void } @@ -74,17 +75,17 @@ ; GCN: v_mov_b32_e32 [[OFFSETK:v[0-9]+]], 0x804{{$}} ; GCN: buffer_store_dword [[OFFSETK]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2052{{$}} define amdgpu_kernel void @stored_fi_to_self_offset() #0 { - %tmp0 = alloca [512 x i32] - %tmp1 = alloca i32* + %tmp0 = alloca [512 x i32], addrspace(5) + %tmp1 = alloca i32 addrspace(5)*, addrspace(5) ; Avoid optimizing everything out - %tmp0.cast = bitcast [512 x i32]* %tmp0 to i32* - store volatile i32 32, i32* %tmp0.cast + %tmp0.cast = bitcast [512 x i32] addrspace(5)* %tmp0 to i32 addrspace(5)* + store volatile i32 32, i32 addrspace(5)* %tmp0.cast - store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp1 + store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1 - %bitcast = bitcast i32** %tmp1 to i32* - store volatile i32* %bitcast, i32** %tmp1 + %bitcast = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)* + store volatile i32 addrspace(5)* %bitcast, i32 addrspace(5)* addrspace(5)* %tmp1 ret void } @@ -99,18 +100,18 @@ ; GCN: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}} ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:8{{$}} define amdgpu_kernel void @stored_fi_to_fi() #0 { - %tmp0 = alloca i32* - %tmp1 = alloca i32* - %tmp2 = alloca i32* - store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp0 - store volatile i32* inttoptr (i32 5678 to i32*), i32** %tmp1 - store volatile i32* inttoptr (i32 9999 to i32*), i32** %tmp2 - - %bitcast1 = bitcast i32** %tmp1 to i32* - %bitcast2 = bitcast i32** %tmp2 to i32* ; at offset 8 - - store volatile i32* %bitcast1, i32** %tmp2 ; store offset 4 at offset 8 - store volatile i32* %bitcast2, i32** %tmp1 ; store offset 8 at offset 4 + %tmp0 = alloca i32 addrspace(5)*, addrspace(5) + %tmp1 = alloca i32 addrspace(5)*, addrspace(5) + %tmp2 = alloca i32 addrspace(5)*, addrspace(5) + store volatile i32 addrspace(5)* inttoptr (i32 1234 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp0 + store volatile i32 addrspace(5)* inttoptr (i32 5678 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp1 + store volatile i32 addrspace(5)* inttoptr (i32 9999 to i32 addrspace(5)*), i32 addrspace(5)* addrspace(5)* %tmp2 + + %bitcast1 = bitcast i32 addrspace(5)* addrspace(5)* %tmp1 to i32 addrspace(5)* + %bitcast2 = bitcast i32 addrspace(5)* addrspace(5)* %tmp2 to i32 addrspace(5)* ; at offset 8 + + store volatile i32 addrspace(5)* %bitcast1, i32 addrspace(5)* addrspace(5)* %tmp2 ; store offset 4 at offset 8 + store volatile i32 addrspace(5)* %bitcast2, i32 addrspace(5)* addrspace(5)* %tmp1 ; store offset 8 at offset 4 ret void } @@ -118,10 +119,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4{{$}} ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} ; GCN: buffer_store_dword [[FI]] -define amdgpu_kernel void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 { - %tmp = alloca float - store float 0.0, float *%tmp - store float* %tmp, float* addrspace(1)* %ptr +define amdgpu_kernel void @stored_fi_to_global(float addrspace(5)* addrspace(1)* %ptr) #0 { + %tmp = alloca float, addrspace(5) + store float 0.0, float addrspace(5)*%tmp + store float addrspace(5)* %tmp, float addrspace(5)* addrspace(1)* %ptr ret void } @@ -136,15 +137,15 @@ ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 12{{$}} ; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 { - %tmp0 = alloca float - %tmp1 = alloca float - %tmp2 = alloca float - store volatile float 0.0, float *%tmp0 - store volatile float 0.0, float *%tmp1 - store volatile float 0.0, float *%tmp2 - store volatile float* %tmp1, float* addrspace(1)* %ptr - store volatile float* %tmp2, float* addrspace(1)* %ptr +define amdgpu_kernel void @stored_fi_to_global_2_small_objects(float addrspace(5)* addrspace(1)* %ptr) #0 { + %tmp0 = alloca float, addrspace(5) + %tmp1 = alloca float, addrspace(5) + %tmp2 = alloca float, addrspace(5) + store volatile float 0.0, float addrspace(5)*%tmp0 + store volatile float 0.0, float addrspace(5)*%tmp1 + store volatile float 0.0, float addrspace(5)*%tmp2 + store volatile float addrspace(5)* %tmp1, float addrspace(5)* addrspace(1)* %ptr + store volatile float addrspace(5)* %tmp2, float addrspace(5)* addrspace(1)* %ptr ret void } @@ -163,19 +164,19 @@ ; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} ; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { - %tmp0 = alloca [4096 x i32] - %tmp1 = alloca [4096 x i32] - %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 0 - store volatile i32 0, i32* %gep0.tmp0 - %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 4095 - store volatile i32 999, i32* %gep1.tmp0 - %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32]* %tmp0, i32 0, i32 14 - store i32* %gep0.tmp1, i32* addrspace(1)* %ptr +define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset(i32 addrspace(5)* addrspace(1)* %ptr) #0 { + %tmp0 = alloca [4096 x i32], addrspace(5) + %tmp1 = alloca [4096 x i32], addrspace(5) + %gep0.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 0 + store volatile i32 0, i32 addrspace(5)* %gep0.tmp0 + %gep1.tmp0 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 4095 + store volatile i32 999, i32 addrspace(5)* %gep1.tmp0 + %gep0.tmp1 = getelementptr [4096 x i32], [4096 x i32] addrspace(5)* %tmp0, i32 0, i32 14 + store i32 addrspace(5)* %gep0.tmp1, i32 addrspace(5)* addrspace(1)* %ptr ret void } -@g1 = external addrspace(1) global i32* +@g1 = external addrspace(1) global i32 addrspace(5)* ; This was leaving a dead node around resulting in failing to select ; on the leftover AssertZext's ValueType operand. @@ -188,16 +189,16 @@ ; GCN: buffer_store_dword [[FI]] define amdgpu_kernel void @cannot_select_assertzext_valuetype(i32 addrspace(1)* %out, i32 %idx) #0 { entry: - %b = alloca i32, align 4 - %tmp1 = load volatile i32*, i32* addrspace(1)* @g1, align 4 - %arrayidx = getelementptr inbounds i32, i32* %tmp1, i32 %idx - %tmp2 = load i32, i32* %arrayidx, align 4 - store volatile i32* %b, i32* addrspace(1)* undef + %b = alloca i32, align 4, addrspace(5) + %tmp1 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* @g1, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %tmp1, i32 %idx + %tmp2 = load i32, i32 addrspace(5)* %arrayidx, align 4 + store volatile i32 addrspace(5)* %b, i32 addrspace(5)* addrspace(1)* undef ret void } -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 +declare void @llvm.lifetime.start.p0i8(i64, i8 addrspace(5)* nocapture) #1 +declare void @llvm.lifetime.end.p0i8(i64, i8 addrspace(5)* nocapture) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind } Index: test/CodeGen/AMDGPU/cayman-loop-bug.ll =================================================================== --- test/CodeGen/AMDGPU/cayman-loop-bug.ll +++ test/CodeGen/AMDGPU/cayman-loop-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: LOOP_START_DX10 Index: test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -O0 < %s ; GCN-LABEL: {{^}}test_loop: ; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}} Index: test/CodeGen/AMDGPU/cf-stack-bug.ll =================================================================== --- test/CodeGen/AMDGPU/cf-stack-bug.ll +++ test/CodeGen/AMDGPU/cf-stack-bug.ll @@ -1,29 +1,30 @@ -; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=sumo -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=barts -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=turks -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=caicos -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cedar -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG32 %s < %t -; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=juniper -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t -; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t -; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t +target datalayout = "A5" ; REQUIRES: asserts Index: test/CodeGen/AMDGPU/cf_end.ll =================================================================== --- test/CodeGen/AMDGPU/cf_end.ll +++ test/CodeGen/AMDGPU/cf_end.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s ; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] ; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] Index: test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -1,37 +1,38 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +target datalayout = "A5" ; OPT-LABEL: @test_no_sink_flat_small_offset_i32( -; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in +; OPT-CIVI: getelementptr i32, i32* %in ; OPT-CIVI: br i1 ; OPT-CIVI-NOT: ptrtoint ; OPT-GFX9: br -; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28 -; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)* -; OPT-GFX9: load i32, i32 addrspace(4)* %1 +; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28 +; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32* +; OPT-GFX9: load i32, i32* %1 ; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: ; GCN: flat_load_dword ; GCN: {{^}}BB0_2: -define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, i32* %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -39,7 +40,7 @@ } ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT: getelementptr i32, i32* %out, ; rOPT-CI-NOT: getelementptr ; OPT: br i1 @@ -50,11 +51,11 @@ ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32: ; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 -define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)* + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 + %cast = addrspacecast i32* %in.gep to i32 addrspace(1)* %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if @@ -64,7 +65,7 @@ endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -72,7 +73,7 @@ } ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT: getelementptr i32, i32* %out, ; OPT-CI-NOT: getelementptr ; OPT: br i1 @@ -83,11 +84,11 @@ ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32: ; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)* + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 + %cast = addrspacecast i32* %in.gep to i32 addrspace(2)* %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if @@ -97,7 +98,7 @@ endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -105,34 +106,34 @@ } ; OPT-LABEL: @test_sink_flat_small_max_flat_offset( -; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 +; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095 ; OPT-CIVI: br ; OPT-CIVI-NOT: getelementptr -; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep +; OPT-CIVI: load i8, i8* %in.gep ; OPT-GFX9: br -; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095 -; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr +; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095 +; OPT-GFX9: load i8, i8* %sunkaddr ; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset: ; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}} ; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 + %out.gep = getelementptr i32, i32* %out, i32 1024 + %in.gep = getelementptr i8, i8* %in, i64 4095 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -140,29 +141,29 @@ } ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 +; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096 ; OPT: br ; OPT-NOT: getelementptr -; OPT: load i8, i8 addrspace(4)* %in.gep +; OPT: load i8, i8* %in.gep ; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset: ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 + %out.gep = getelementptr i32, i32* %out, i64 99999 + %in.gep = getelementptr i8, i8* %in, i64 4096 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -170,30 +171,30 @@ } ; OPT-LABEL: @test_no_sink_flat_reg_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg +; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg ; OPT: br ; OPT-NOT: getelementptr -; OPT: load i8, i8 addrspace(4)* %in.gep +; OPT: load i8, i8* %in.gep ; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset: ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 { +define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg + %out.gep = getelementptr i32, i32* %out, i32 1024 + %in.gep = getelementptr i8, i8* %in, i64 %reg %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: Index: test/CodeGen/AMDGPU/cgp-bitfield-extract.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -codegenprepare < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; This particular case will actually be worse in terms of code size ; from sinking into both. Index: test/CodeGen/AMDGPU/clamp-modifier.ll =================================================================== --- test/CodeGen/AMDGPU/clamp-modifier.ll +++ test/CodeGen/AMDGPU/clamp-modifier.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}v_clamp_add_src_f32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] Index: test/CodeGen/AMDGPU/clamp.ll =================================================================== --- test/CodeGen/AMDGPU/clamp.ll +++ test/CodeGen/AMDGPU/clamp.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}v_clamp_f32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] Index: test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll =================================================================== --- test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i1 @llvm.amdgcn.class.f32(float, i32) Index: test/CodeGen/AMDGPU/coalescer-subrange-crash.ll =================================================================== --- test/CodeGen/AMDGPU/coalescer-subrange-crash.ll +++ test/CodeGen/AMDGPU/coalescer-subrange-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; ; This testcase used to cause the following crash: ; Index: test/CodeGen/AMDGPU/coalescer_remat.ll =================================================================== --- test/CodeGen/AMDGPU/coalescer_remat.ll +++ test/CodeGen/AMDGPU/coalescer_remat.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn---amdgiz -o - %s | FileCheck %s declare float @llvm.fma.f32(float, float, float) Index: test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll =================================================================== --- test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll +++ test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -codegenprepare -S < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-LLC %s +; RUN: opt -mtriple=amdgcn---amdgiz -codegenprepare -S < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-LLC %s ; OPT-LABEL: @test( ; OPT: mul nsw i32 Index: test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- test/CodeGen/AMDGPU/collapse-endcf.ll +++ test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}simple_nested_if: ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] @@ -237,7 +237,7 @@ br i1 %tmp3, label %bb4, label %bb10 bb4: ; preds = %bb2 - %tmp6 = load float, float* undef + %tmp6 = load float, float addrspace(5)* undef %tmp7 = fcmp olt float %tmp6, 0.0 br i1 %tmp7, label %bb8, label %Flow @@ -257,7 +257,7 @@ br label %bb1 bb12: ; preds = %bb10 - store volatile <4 x float> %tmp11, <4 x float>* undef, align 16 + store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 ret void } Index: test/CodeGen/AMDGPU/combine-and-sext-bool.ll =================================================================== --- test/CodeGen/AMDGPU/combine-and-sext-bool.ll +++ test/CodeGen/AMDGPU/combine-and-sext-bool.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}and_i1_sext_bool: ; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/combine-cond-add-sub.ll =================================================================== --- test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}add1: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/combine-ftrunc.ll =================================================================== --- test/CodeGen/AMDGPU/combine-ftrunc.ll +++ test/CodeGen/AMDGPU/combine-ftrunc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}combine_ftrunc_frint_f64: ; GCN: v_rndne_f64_e32 [[RND:v\[[0-9:]+\]]], Index: test/CodeGen/AMDGPU/commute-compares.ll =================================================================== --- test/CodeGen/AMDGPU/commute-compares.ll +++ test/CodeGen/AMDGPU/commute-compares.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +target datalayout = "A5" declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -703,9 +704,9 @@ ; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: - %stack0 = alloca i32 - %ptr0 = load volatile i32*, i32* addrspace(1)* undef - %eq = icmp eq i32* %ptr0, %stack0 + %stack0 = alloca i32, addrspace(5) + %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef + %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0 %ext = zext i1 %eq to i32 store volatile i32 %ext, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- test/CodeGen/AMDGPU/commute-shifts.ll +++ test/CodeGen/AMDGPU/commute-shifts.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}main: ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/commute_modifiers.ll =================================================================== --- test/CodeGen/AMDGPU/commute_modifiers.ll +++ test/CodeGen/AMDGPU/commute_modifiers.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 Index: test/CodeGen/AMDGPU/complex-folding.ll =================================================================== --- test/CodeGen/AMDGPU/complex-folding.ll +++ test/CodeGen/AMDGPU/complex-folding.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}main: ; CHECK-NOT: MOV Index: test/CodeGen/AMDGPU/concat_vectors.ll =================================================================== --- test/CodeGen/AMDGPU/concat_vectors.ll +++ test/CodeGen/AMDGPU/concat_vectors.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_concat_v1i32: ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF Index: test/CodeGen/AMDGPU/constant-fold-mi-operands.ll =================================================================== --- test/CodeGen/AMDGPU/constant-fold-mi-operands.ll +++ test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}fold_mi_v_and_0: ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} Index: test/CodeGen/AMDGPU/control-flow-optnone.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-optnone.ll +++ test/CodeGen/AMDGPU/control-flow-optnone.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; optnone disables AMDGPUAnnotateUniformValues, so no branch is known ; to be uniform during instruction selection. The custom selection for Index: test/CodeGen/AMDGPU/convergent-inlineasm.ll =================================================================== --- test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}convergent_inlineasm: Index: test/CodeGen/AMDGPU/copy-illegal-type.ll =================================================================== --- test/CodeGen/AMDGPU/copy-illegal-type.ll +++ test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/copy-to-reg.ll =================================================================== --- test/CodeGen/AMDGPU/copy-to-reg.ll +++ test/CodeGen/AMDGPU/copy-to-reg.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs < %s +target datalayout = "A5" ; Test that CopyToReg instructions don't have non-register operands prior ; to being emitted. @@ -8,20 +9,20 @@ ; CHECK-LABEL: {{^}}copy_to_reg_frameindex: define amdgpu_kernel void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: - %alloca = alloca [16 x i32] + %alloca = alloca [16 x i32], addrspace(5) br label %loop loop: %inc = phi i32 [0, %entry], [%inc.i, %loop] - %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc - store i32 %inc, i32* %ptr + %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %inc + store i32 %inc, i32 addrspace(5)* %ptr %inc.i = add i32 %inc, 1 %cnd = icmp uge i32 %inc.i, 16 br i1 %cnd, label %done, label %loop done: - %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0 - %tmp1 = load i32, i32* %tmp0 + %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %tmp1 = load i32, i32 addrspace(5)* %tmp0 store i32 %tmp1, i32 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop.ll +++ test/CodeGen/AMDGPU/ctpop.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone Index: test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop64.ll +++ test/CodeGen/AMDGPU/ctpop64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/cttz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-NOSDWA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-SDWA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-NOSDWA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-SDWA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/cube.ll =================================================================== --- test/CodeGen/AMDGPU/cube.ll +++ test/CodeGen/AMDGPU/cube.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubeid(float, float, float) #0 declare float @llvm.amdgcn.cubesc(float, float, float) #0 Index: test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll +++ test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 Index: test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll +++ test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 Index: test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll =================================================================== --- test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll +++ test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; We are only checking that instruction selection can succeed in this case. This ; cut down test results in no instructions, but that's fine. Index: test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll +++ test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; Test for a bug where DAGCombiner::ReassociateOps() was creating adds ; with offset in the first operand and base pointers in the second. Index: test/CodeGen/AMDGPU/debug.ll =================================================================== --- test/CodeGen/AMDGPU/debug.ll +++ test/CodeGen/AMDGPU/debug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; Test for a crash in the custom assembly dump code. Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_si: ; GCN: FloatMode: 192 Index: test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll =================================================================== --- test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll +++ test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s -; PRED_SET* instructions must be tied to any instruction that uses their -; result. This tests that there are no instructions between the PRED_SET* +; PRED_SET addrspace(5)* instructions must be tied to any instruction that uses their +; result. This tests that there are no instructions between the PRED_SET addrspace(5)* ; and the PREDICATE_BREAK in this loop. ; CHECK: {{^}}loop_ge: Index: test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll +++ test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; The memory operand was dropped from the buffer_load_dword_offset ; when replaced with the addr64 during operand legalization, resulting Index: test/CodeGen/AMDGPU/ds-combine-large-stride.ll =================================================================== --- test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: ds_read32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 @@ -14,7 +14,7 @@ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 -define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float addrspace(5)*nocapture %arg1) { bb: %tmp = load float, float addrspace(3)* %arg, align 4 %tmp2 = fadd float %tmp, 0.000000e+00 @@ -39,7 +39,7 @@ %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 %tmp22 = load float, float addrspace(3)* %tmp21, align 4 %tmp23 = fadd float %tmp20, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, float addrspace(5)*%arg1, align 4 ret void } @@ -56,7 +56,7 @@ ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 -define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float addrspace(5)*nocapture %arg1) { bb: %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 %tmp2 = load float, float addrspace(3)* %tmp, align 4 @@ -81,7 +81,7 @@ %tmp21 = fadd float %tmp18, %tmp20 %tmp22 = load float, float addrspace(3)* %arg, align 4 %tmp23 = fadd float %tmp21, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, float addrspace(5)*%arg1, align 4 ret void } @@ -92,7 +92,7 @@ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224 -define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float addrspace(5)*nocapture %arg1) { bb: %tmp = load float, float addrspace(3)* %arg, align 4 %tmp2 = fadd float %tmp, 0.000000e+00 @@ -117,7 +117,7 @@ %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 %tmp22 = load float, float addrspace(3)* %tmp21, align 4 %tmp23 = fadd float %tmp20, %tmp22 - store float %tmp23, float *%arg1, align 4 + store float %tmp23, float addrspace(5)*%arg1, align 4 ret void } @@ -133,7 +133,7 @@ ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 -define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float addrspace(5)*nocapture %arg1) { bb: %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2 %tmp2 = load float, float addrspace(3)* %tmp, align 4 @@ -153,7 +153,7 @@ %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242 %tmp17 = load float, float addrspace(3)* %tmp16, align 4 %tmp18 = fadd float %tmp15, %tmp17 - store float %tmp18, float *%arg1, align 4 + store float %tmp18, float addrspace(5)*%arg1, align 4 ret void } @@ -166,7 +166,7 @@ ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50 -define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double addrspace(5)*nocapture %arg1) { bb: %tmp = load double, double addrspace(3)* %arg, align 8 %tmp2 = fadd double %tmp, 0.000000e+00 @@ -191,7 +191,7 @@ %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 %tmp22 = load double, double addrspace(3)* %tmp21, align 8 %tmp23 = fadd double %tmp20, %tmp22 - store double %tmp23, double *%arg1, align 8 + store double %tmp23, double addrspace(5)*%arg1, align 8 ret void } @@ -207,7 +207,7 @@ ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 -define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double addrspace(5)*nocapture %arg1) { bb: %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 %tmp2 = load double, double addrspace(3)* %tmp, align 8 @@ -227,7 +227,7 @@ %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 %tmp17 = load double, double addrspace(3)* %tmp16, align 8 %tmp18 = fadd double %tmp15, %tmp17 - store double %tmp18, double *%arg1, align 8 + store double %tmp18, double addrspace(5)*%arg1, align 8 ret void } Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare void @llvm.amdgcn.s.barrier() #1 Index: test/CodeGen/AMDGPU/ds-sub-offset.ll =================================================================== --- test/CodeGen/AMDGPU/ds-sub-offset.ll +++ test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s declare i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/AMDGPU/ds_read2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2.ll +++ test/CodeGen/AMDGPU/ds_read2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. Index: test/CodeGen/AMDGPU/ds_read2_offset_order.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_offset_order.ll +++ test/CodeGen/AMDGPU/ds_read2_offset_order.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 Index: test/CodeGen/AMDGPU/ds_read2_superreg.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4 Index: test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2st64.ll +++ test/CodeGen/AMDGPU/ds_read2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 @@ -174,7 +174,7 @@ ret void } -; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff +; The maximum is not the usual 0xff because 0xff addrspace(5)* 8 addrspace(5)* 64 > 0xffff ; SI-LABEL: @simple_read2st64_f64_max_offset ; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127 ; SI: s_waitcnt lgkmcnt(0) @@ -232,7 +232,7 @@ ret void } -; The stride of 8 elements is 8 * 8 bytes. We need to make sure the +; The stride of 8 elements is 8 addrspace(5)* 8 bytes. We need to make sure the ; stride in elements, not bytes, is a multiple of 64. ; SI-LABEL: @byte_size_only_divisible_64_read2_f64 Index: test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2.ll +++ test/CodeGen/AMDGPU/ds_write2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 Index: test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2st64.ll +++ test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 Index: test/CodeGen/AMDGPU/early-if-convert-cost.ll =================================================================== --- test/CodeGen/AMDGPU/early-if-convert-cost.ll +++ test/CodeGen/AMDGPU/early-if-convert-cost.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Most of these cases that don't trigger because of broken cost ; heuristics. Should not need -stress-early-ifcvt Index: test/CodeGen/AMDGPU/early-if-convert.ll =================================================================== --- test/CodeGen/AMDGPU/early-if-convert.ll +++ test/CodeGen/AMDGPU/early-if-convert.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; XUN: llc -march=amdgcn -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: This leaves behind a now unnecessary and with exec Index: test/CodeGen/AMDGPU/early-inline-alias.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline-alias.ll +++ test/CodeGen/AMDGPU/early-inline-alias.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 %s | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O1 -S -inline-threshold=1 %s | FileCheck %s ; CHECK: @add1alias = alias i32 (i32), i32 (i32)* @add1 ; CHECK: @add1alias2 = alias i32 (i32), i32 (i32)* @add1 Index: test/CodeGen/AMDGPU/early-inline.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline.ll +++ test/CodeGen/AMDGPU/early-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s @c_alias = alias i32 (i32), i32 (i32)* @callee Index: test/CodeGen/AMDGPU/elf-header.ll =================================================================== --- test/CodeGen/AMDGPU/elf-header.ll +++ test/CodeGen/AMDGPU/elf-header.ll @@ -1,26 +1,26 @@ -; RUN: llc -march=r600 -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s - -; RUN: llc -march=amdgcn -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s - -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-unknown-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s - -; RUN: llc -mtriple=amdgcn--amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-unknown-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s - -; RUN: llc -mtriple=amdgcn--mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s -; RUN: llc -mtriple=amdgcn-amd-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd--amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-unknown-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s + +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-unknown-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s + +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-unknown-amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s + +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-unknown-amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s + +; RUN: llc -mtriple=amdgcn--mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-amd-mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s ; R600: Format: ELF32-amdgpu ; R600: Arch: r600 Index: test/CodeGen/AMDGPU/elf.ll =================================================================== --- test/CodeGen/AMDGPU/elf.ll +++ test/CodeGen/AMDGPU/elf.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s ; Test that we don't try to produce a COFF file on windows -; RUN: llc < %s -mtriple=amdgcn-pc-mingw -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn-pc-mingw-amdgiz -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s ; ELF: Format: ELF64-amdgpu ; ELF: OS/ABI: SystemV (0x0) Index: test/CodeGen/AMDGPU/elf.r600.ll =================================================================== --- test/CodeGen/AMDGPU/elf.r600.ll +++ test/CodeGen/AMDGPU/elf.r600.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s ; ELF: Format: ELF32-amdgpu ; ELF: Name: .AMDGPU.config Index: test/CodeGen/AMDGPU/else.ll =================================================================== --- test/CodeGen/AMDGPU/else.ll +++ test/CodeGen/AMDGPU/else.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}else_no_execfix: ; CHECK: ; %Flow Index: test/CodeGen/AMDGPU/empty-function.ll =================================================================== --- test/CodeGen/AMDGPU/empty-function.ll +++ test/CodeGen/AMDGPU/empty-function.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure we don't assert on empty functions Index: test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll =================================================================== --- test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll +++ test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s -; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/endcf-loop-header.ll =================================================================== --- test/CodeGen/AMDGPU/endcf-loop-header.ll +++ test/CodeGen/AMDGPU/endcf-loop-header.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s ; This tests that the llvm.SI.end.cf intrinsic is not inserted into the ; loop block. This intrinsic will be lowered to s_or_b64 by the code Index: test/CodeGen/AMDGPU/exceed-max-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { Index: test/CodeGen/AMDGPU/extend-bit-ops-i16.ll =================================================================== --- test/CodeGen/AMDGPU/extend-bit-ops-i16.ll +++ test/CodeGen/AMDGPU/extend-bit-ops-i16.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN ; GCN-LABEL: and_zext: ; GCN: v_and_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/extload-private.ll =================================================================== --- test/CodeGen/AMDGPU/extload-private.ll +++ test/CodeGen/AMDGPU/extload-private.ll @@ -1,12 +1,13 @@ -; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +target datalayout = "A5" ; FUNC-LABEL: {{^}}load_i8_sext_private: ; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i8_sext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i8 - %tmp1 = load i8, i8* %tmp0 + %tmp0 = alloca i8, addrspace(5) + %tmp1 = load i8, i8 addrspace(5)* %tmp0 %tmp2 = sext i8 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -16,8 +17,8 @@ ; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i8_zext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i8 - %tmp1 = load i8, i8* %tmp0 + %tmp0 = alloca i8, addrspace(5) + %tmp1 = load i8, i8 addrspace(5)* %tmp0 %tmp2 = zext i8 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -27,8 +28,8 @@ ; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i16_sext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i16 - %tmp1 = load i16, i16* %tmp0 + %tmp0 = alloca i16, addrspace(5) + %tmp1 = load i16, i16 addrspace(5)* %tmp0 %tmp2 = sext i16 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -38,8 +39,8 @@ ; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i16_zext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i16 - %tmp1 = load volatile i16, i16* %tmp0 + %tmp0 = alloca i16, addrspace(5) + %tmp1 = load volatile i16, i16 addrspace(5)* %tmp0 %tmp2 = zext i16 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll =================================================================== --- test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll +++ test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32: ; GCN: buffer_load_dword Index: test/CodeGen/AMDGPU/extractelt-to-trunc.ll =================================================================== --- test/CodeGen/AMDGPU/extractelt-to-trunc.ll +++ test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f16.ll +++ test/CodeGen/AMDGPU/fabs.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; DAGCombiner will transform: ; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) Index: test/CodeGen/AMDGPU/fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f64.ll +++ test/CodeGen/AMDGPU/fabs.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.ll +++ test/CodeGen/AMDGPU/fabs.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; DAGCombiner will transform: Index: test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll =================================================================== --- test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll +++ test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s -; RUN: llc -march=amdgcn -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s ; FIXME: This should also fold when fma is actually fast if an FMA ; exists in the original program. Index: test/CodeGen/AMDGPU/fadd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.f16.ll +++ test/CodeGen/AMDGPU/fadd.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fadd_f16 ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fadd.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.ll +++ test/CodeGen/AMDGPU/fadd.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; FUNC-LABEL: {{^}}fadd_f32: ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W Index: test/CodeGen/AMDGPU/fadd64.ll =================================================================== --- test/CodeGen/AMDGPU/fadd64.ll +++ test/CodeGen/AMDGPU/fadd64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}v_fadd_f64: ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}} Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 Index: test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.ll +++ test/CodeGen/AMDGPU/fcanonicalize.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare float @llvm.canonicalize.f32(float) #0 Index: test/CodeGen/AMDGPU/fceil.ll =================================================================== --- test/CodeGen/AMDGPU/fceil.ll +++ test/CodeGen/AMDGPU/fceil.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.ceil.f32(float) nounwind readnone declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/fceil64.ll =================================================================== --- test/CodeGen/AMDGPU/fceil64.ll +++ test/CodeGen/AMDGPU/fceil64.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +target datalayout = "A5" declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/fcmp-cnd.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp-cnd.ll +++ test/CodeGen/AMDGPU/fcmp-cnd.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;Not checking arguments 2 and 3 to CNDE, because they may change between ;registers and literal.x depending on what the optimizer does. Index: test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll +++ test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the -; chance to optimize the fcmp + select instructions to SET* was missed +; chance to optimize the fcmp + select instructions to SET addrspace(5)* was missed ; due to the fact that the operands to fcmp and select had different types ; CHECK: SET{{[A-Z]+}}_DX10 Index: test/CodeGen/AMDGPU/fcmp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp.f16.ll +++ test/CodeGen/AMDGPU/fcmp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fcmp_f16_lt ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fcmp64.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp64.ll +++ test/CodeGen/AMDGPU/fcmp64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}flt_f64: ; CHECK: v_cmp_nge_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} Index: test/CodeGen/AMDGPU/fconst64.ll =================================================================== --- test/CodeGen/AMDGPU/fconst64.ll +++ test/CodeGen/AMDGPU/fconst64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fconst_f64: ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000 Index: test/CodeGen/AMDGPU/fcopysign.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f16.ll +++ test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.copysign.f16(half, half) declare float @llvm.copysign.f32(float, float) Index: test/CodeGen/AMDGPU/fcopysign.f32.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f32.ll +++ test/CodeGen/AMDGPU/fcopysign.f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.copysign.f32(float, float) nounwind readnone declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/fcopysign.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f64.ll +++ test/CodeGen/AMDGPU/fcopysign.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.copysign.f64(double, double) nounwind readnone declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f16.ll +++ test/CodeGen/AMDGPU/fdiv.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Make sure fdiv is promoted to f32. Index: test/CodeGen/AMDGPU/fdiv.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f64.ll +++ test/CodeGen/AMDGPU/fdiv.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s ; GCN-LABEL: {{^}}fdiv_f64: Index: test/CodeGen/AMDGPU/fence-barrier.ll =================================================================== --- test/CodeGen/AMDGPU/fence-barrier.ll +++ test/CodeGen/AMDGPU/fence-barrier.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +target datalayout = "A5" declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() @@ -16,8 +17,8 @@ ; GCN-NEXT: s_barrier ; GCN: flat_store_dword define amdgpu_kernel void @test_local(i32 addrspace(1)*) { - %2 = alloca i32 addrspace(1)*, align 4 - store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4 + %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) + store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 %3 = call i32 @llvm.amdgcn.workitem.id.x() %4 = zext i32 %3 to i64 %5 = icmp eq i64 %4, 0 @@ -32,7 +33,7 @@ call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire %8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4 - %9 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4 + %9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4 %10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %11 = call i32 @llvm.amdgcn.workitem.id.x() %12 = call i32 @llvm.amdgcn.workgroup.id.x() @@ -58,14 +59,14 @@ ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier define amdgpu_kernel void @test_global(i32 addrspace(1)*) { - %2 = alloca i32 addrspace(1)*, align 4 - %3 = alloca i32, align 4 - store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4 - store i32 0, i32* %3, align 4 + %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 + store i32 0, i32 addrspace(5)* %3, align 4 br label %4 ;