Index: test/CodeGen/AMDGPU/32-bit-local-address-space.ll =================================================================== --- test/CodeGen/AMDGPU/32-bit-local-address-space.ll +++ test/CodeGen/AMDGPU/32-bit-local-address-space.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and ; the global address space(1) uses 64-bit pointers. These tests check to make sure Index: test/CodeGen/AMDGPU/InlineAsmCrash.ll =================================================================== --- test/CodeGen/AMDGPU/InlineAsmCrash.ll +++ test/CodeGen/AMDGPU/InlineAsmCrash.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; CHECK: ;;#ASMSTART ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: ;;#ASMEND -define void @foo(i32* %ptr) { +define void @foo(i32 addrspace(5)* %ptr) { %tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2) %tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0 - store i32 %tmp2, i32* %ptr, align 4 + store i32 %tmp2, i32 addrspace(5)* %ptr, align 4 ret void } Index: test/CodeGen/AMDGPU/add-debug.ll =================================================================== --- test/CodeGen/AMDGPU/add-debug.ll +++ test/CodeGen/AMDGPU/add-debug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug -; RUN: llc < %s -march=amdgcn -mcpu=tonga -debug +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -debug +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -debug ; REQUIRES: asserts ; Check that SelectionDAGDumper does not crash on int_SI_if. Index: test/CodeGen/AMDGPU/add.i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.i16.ll +++ test/CodeGen/AMDGPU/add.i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_i16: Index: test/CodeGen/AMDGPU/add.ll =================================================================== --- test/CodeGen/AMDGPU/add.ll +++ test/CodeGen/AMDGPU/add.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test1: ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} Index: test/CodeGen/AMDGPU/add.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.v2i16.ll +++ test/CodeGen/AMDGPU/add.v2i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16: Index: test/CodeGen/AMDGPU/add_i128.ll =================================================================== --- test/CodeGen/AMDGPU/add_i128.ll +++ test/CodeGen/AMDGPU/add_i128.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_i128_vreg: ; GCN: v_add_i32_e32 v[[LO:[0-9]+]], vcc, Index: test/CodeGen/AMDGPU/add_i64.ll =================================================================== --- test/CodeGen/AMDGPU/add_i64.ll +++ test/CodeGen/AMDGPU/add_i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() readnone Index: test/CodeGen/AMDGPU/addrspacecast-captured.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast-captured.ll +++ test/CodeGen/AMDGPU/addrspacecast-captured.ll @@ -1,45 +1,46 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Nothing should be done if the addrspacecast is captured. declare void @consume_ptr2int(i32) #0 ; CHECK-LABEL: @addrspacecast_captured( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: %ptr2int = ptrtoint i32* %cast to i32 ; CHECK: store i32 %ptr2int, i32 addrspace(1)* %out define amdgpu_kernel void @addrspacecast_captured(i32 addrspace(1)* %out) #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + %ptr2int = ptrtoint i32* %cast to i32 store i32 %ptr2int, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: @addrspacecast_captured_store( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out -define amdgpu_kernel void @addrspacecast_captured_store(i32 addrspace(4)* addrspace(1)* %out) #0 { +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: store i32* %cast, i32* addrspace(1)* %out +define amdgpu_kernel void @addrspacecast_captured_store(i32* addrspace(1)* %out) #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %out + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + store i32* %cast, i32* addrspace(1)* %out ret void } ; CHECK-LABEL: @addrspacecast_captured_call( -; CHECK: %data = alloca i32, align 4 -; CHECK: %cast = addrspacecast i32* %data to i32 addrspace(4)* -; CHECK: %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 +; CHECK: %data = alloca i32, align 4, addrspace(5) +; CHECK: %cast = addrspacecast i32 addrspace(5)* %data to i32* +; CHECK: %ptr2int = ptrtoint i32* %cast to i32 ; CHECK: call void @consume_ptr2int(i32 %ptr2int) define amdgpu_kernel void @addrspacecast_captured_call() #0 { entry: - %data = alloca i32, align 4 - %cast = addrspacecast i32* %data to i32 addrspace(4)* - %ptr2int = ptrtoint i32 addrspace(4)* %cast to i32 + %data = alloca i32, align 4, addrspace(5) + %cast = addrspacecast i32 addrspace(5)* %data to i32* + %ptr2int = ptrtoint i32* %cast to i32 call void @consume_ptr2int(i32 %ptr2int) ret void } Index: test/CodeGen/AMDGPU/alignbit-pat.ll =================================================================== --- test/CodeGen/AMDGPU/alignbit-pat.ll +++ test/CodeGen/AMDGPU/alignbit-pat.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}alignbit_shr_pat: ; GCN-DAG: s_load_dword s[[SHR:[0-9]+]] Index: test/CodeGen/AMDGPU/amdgcn.bitcast.ll =================================================================== --- test/CodeGen/AMDGPU/amdgcn.bitcast.ll +++ test/CodeGen/AMDGPU/amdgcn.bitcast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. Index: test/CodeGen/AMDGPU/amdgcn.private-memory.ll =================================================================== --- test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -1,9 +1,10 @@ -; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s -; RUN: llc -mattr=+promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE -check-prefix=HSA %s -; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s -; RUN: llc -mattr=-promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA -check-prefix=HSA %s -; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s -; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s +; RUN: llc -mattr=+promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE -check-prefix=HSA %s +; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +; RUN: llc -mattr=-promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA -check-prefix=HSA %s +; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s +; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -17,13 +18,13 @@ ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: - %0 = alloca [2 x i32] - %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0 - %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1 - store i32 0, i32* %1 - store i32 1, i32* %2 - %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in - %4 = load i32, i32* %3 + %0 = alloca [2 x i32], addrspace(5) + %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 + %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %1 + store i32 1, i32 addrspace(5)* %2 + %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in + %4 = load i32, i32 addrspace(5)* %3 %5 = call i32 @llvm.amdgcn.workitem.id.x() %6 = add i32 %4, %5 store i32 %6, i32 addrspace(1)* %out Index: test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -1,9 +1,9 @@ -; RUN: opt -mtriple=amdgcn-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=r600---amdgiz -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; CHECK: NoAlias: i8 addrspace(1)* %p1, i8* %p +; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p -define void @test(i8* %p, i8 addrspace(1)* %p1) { +define void @test(i8 addrspace(5)* %p, i8 addrspace(1)* %p1) { ret void } Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-codegenprepare %s | FileCheck %s ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s ; Make sure this doesn't crash with no triple Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -mcpu=tonga -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: @add_i3( ; SI: %r = add i3 %a, %b Index: test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll +++ test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}shader_cc: Index: test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; Legacy intrinsics that just read implicit parameters Index: test/CodeGen/AMDGPU/amdpal-cs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-cs.ll +++ test/CodeGen/AMDGPU/amdpal-cs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata ; GCN-LABEL: {{^}}cs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-es.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-es.ll +++ test/CodeGen/AMDGPU/amdpal-es.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata ; GCN-LABEL: {{^}}es_amdpal: Index: test/CodeGen/AMDGPU/amdpal-gs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-gs.ll +++ test/CodeGen/AMDGPU/amdpal-gs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata ; GCN-LABEL: {{^}}gs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-hs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-hs.ll +++ test/CodeGen/AMDGPU/amdpal-hs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata ; GCN-LABEL: {{^}}hs_amdpal: Index: test/CodeGen/AMDGPU/amdpal-ls.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-ls.ll +++ test/CodeGen/AMDGPU/amdpal-ls.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata ; GCN-LABEL: {{^}}ls_amdpal: Index: test/CodeGen/AMDGPU/amdpal-ps.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-ps.ll +++ test/CodeGen/AMDGPU/amdpal-ps.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal ; metadata. Check for 0x2c0b (SPI_SHADER_PGM_RSRC2_PS) in pal metadata, and Index: test/CodeGen/AMDGPU/amdpal-psenable.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-psenable.ll +++ test/CodeGen/AMDGPU/amdpal-psenable.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; This pixel shader does not use the result of its interpolation, so it would ; end up with an interpolation mode set in PSAddr but not PSEnable. This test tests Index: test/CodeGen/AMDGPU/amdpal-vs.ll =================================================================== --- test/CodeGen/AMDGPU/amdpal-vs.ll +++ test/CodeGen/AMDGPU/amdpal-vs.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s ; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata ; GCN-LABEL: {{^}}vs_amdpal: Index: test/CodeGen/AMDGPU/and-gcn.ll =================================================================== --- test/CodeGen/AMDGPU/and-gcn.ll +++ test/CodeGen/AMDGPU/and-gcn.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_and_i64_br: ; SI: s_and_b64 Index: test/CodeGen/AMDGPU/and.ll =================================================================== --- test/CodeGen/AMDGPU/and.ll +++ test/CodeGen/AMDGPU/and.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -186,22 +186,22 @@ ; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 { define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 { define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 { define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof call void @func_indirect_use_queue_ptr() ret void } Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa-amdgiz -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -176,57 +176,57 @@ ; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 { -define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 { - %stof = addrspacecast i32* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof +; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { +define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { + %stof = addrspacecast i32 addrspace(5)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* +; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* store volatile i32 0, i32 addrspace(3)* %ftos ret void } -; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* - store volatile i32 0, i32* %ftos +; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* + store volatile i32 0, i32 addrspace(5)* %ftos ret void } ; No-op addrspacecast should not use queue ptr ; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* - store volatile i32 0, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(1)* %ptr to i32* + store volatile i32 0, i32* %stof ret void } ; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { - %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* - %ld = load volatile i32, i32 addrspace(4)* %stof + %stof = addrspacecast i32 addrspace(2)* %ptr to i32* + %ld = load volatile i32, i32* %stof ret void } -; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* +; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* store volatile i32 0, i32 addrspace(1)* %ftos ret void } -; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { -define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { - %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* +; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { +define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { + %ftos = addrspacecast i32* %ptr to i32 addrspace(2)* %ld = load volatile i32, i32 addrspace(2)* %ftos ret void } Index: test/CodeGen/AMDGPU/annotate-kernel-features.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown-amdgiz -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 Index: test/CodeGen/AMDGPU/anonymous-gv.ll =================================================================== --- test/CodeGen/AMDGPU/anonymous-gv.ll +++ test/CodeGen/AMDGPU/anonymous-gv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji | FileCheck %s ; Make sure we don't crash on a global variable with no name. @0 = external addrspace(1) global i32 Index: test/CodeGen/AMDGPU/any_extend_vector_inreg.ll =================================================================== --- test/CodeGen/AMDGPU/any_extend_vector_inreg.ll +++ test/CodeGen/AMDGPU/any_extend_vector_inreg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}any_extend_vector_inreg_v16i8_to_v4i32: ; GCN: s_load_dwordx4 Index: test/CodeGen/AMDGPU/anyext.ll =================================================================== --- test/CodeGen/AMDGPU/anyext.ll +++ test/CodeGen/AMDGPU/anyext.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -1,5 +1,6 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 @@ -20,12 +21,12 @@ ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how ; to interpret: -; getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b +; getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 64 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] define amdgpu_kernel void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 { - %alloca = alloca [16 x i32], align 16 + %alloca = alloca [16 x i32], align 16, addrspace(5) %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0); %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid @@ -33,11 +34,11 @@ %a = load i32, i32 addrspace(1)* %a_ptr %b = load i32, i32 addrspace(1)* %b_ptr %result = add i32 %a, %b - %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 1, i32 %b - store i32 %result, i32* %alloca_ptr, align 4 + %alloca_ptr = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 1, i32 %b + store i32 %result, i32 addrspace(5)* %alloca_ptr, align 4 ; Dummy call call void @llvm.amdgcn.s.barrier() - %reload = load i32, i32* %alloca_ptr, align 4 + %reload = load i32, i32 addrspace(5)* %alloca_ptr, align 4 %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 ret void Index: test/CodeGen/AMDGPU/array-ptr-calc-i64.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i64.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 Index: test/CodeGen/AMDGPU/ashr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/ashr.v2i16.ll +++ test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s ; GCN-LABEL: {{^}}s_ashr_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] Index: test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll =================================================================== --- test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll +++ test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: ; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb Index: test/CodeGen/AMDGPU/atomic_load_add.ll =================================================================== --- test/CodeGen/AMDGPU/atomic_load_add.ll +++ test/CodeGen/AMDGPU/atomic_load_add.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_add_local: ; R600: LDS_ADD * Index: test/CodeGen/AMDGPU/atomic_load_sub.ll =================================================================== --- test/CodeGen/AMDGPU/atomic_load_sub.ll +++ test/CodeGen/AMDGPU/atomic_load_sub.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}atomic_sub_local: ; R600: LDS_SUB * Index: test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=HSAMD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 Index: test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s ; If spilling to smem, additional registers are used for the resource ; descriptor. @@ -65,7 +65,7 @@ ; %x.3 = call i64 @llvm.amdgcn.dispatch.id() ; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() ; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() -; store volatile i32 0, i32* undef +; store volatile i32 0, i32 addrspace(5)* undef ; br label %stores ; ;stores: @@ -100,7 +100,7 @@ ; i32 addrspace(1)* %out3, ; i32 addrspace(1)* %out4, ; i32 %one, i32 %two, i32 %three, i32 %four) #2 { -; store volatile i32 0, i32* undef +; store volatile i32 0, i32 addrspace(5)* undef ; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() ; store volatile i32 %x.0, i32 addrspace(1)* undef ; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() Index: test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s @var = addrspace(1) global float 0.0 Index: test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s ; Exactly 1 wave per execution unit. ; CHECK-LABEL: {{^}}empty_exactly_1: Index: test/CodeGen/AMDGPU/attr-unparseable.ll =================================================================== --- test/CodeGen/AMDGPU/attr-unparseable.ll +++ test/CodeGen/AMDGPU/attr-unparseable.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: can't parse integer attribute amdgpu-num-sgpr define amdgpu_kernel void @unparseable_single_0() #0 { Index: test/CodeGen/AMDGPU/barrier-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/barrier-elimination.ll +++ test/CodeGen/AMDGPU/barrier-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck %s ; CHECK-LABEL: {{^}}unknown_wgs: ; CHECK: s_barrier Index: test/CodeGen/AMDGPU/basic-branch.ll =================================================================== --- test/CodeGen/AMDGPU/basic-branch.ll +++ test/CodeGen/AMDGPU/basic-branch.ll @@ -1,7 +1,7 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_branch: ; GCNNOOPT: v_writelane_b32 Index: test/CodeGen/AMDGPU/basic-loop.ll =================================================================== --- test/CodeGen/AMDGPU/basic-loop.ll +++ test/CodeGen/AMDGPU/basic-loop.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck %s -; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck %s +; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_loop: define amdgpu_kernel void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { Index: test/CodeGen/AMDGPU/bfe-combine.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-combine.ll +++ test/CodeGen/AMDGPU/bfe-combine.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s -; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s ; GCN-LABEL: {{^}}bfe_combine8: ; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 8, 8 Index: test/CodeGen/AMDGPU/bfe-patterns.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-patterns.ll +++ test/CodeGen/AMDGPU/bfe-patterns.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}v_ubfe_sub_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] Index: test/CodeGen/AMDGPU/bfe_uint.ll =================================================================== --- test/CodeGen/AMDGPU/bfe_uint.ll +++ test/CodeGen/AMDGPU/bfe_uint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}bfe_def: ; CHECK: BFE_UINT Index: test/CodeGen/AMDGPU/bfi_int.ll =================================================================== --- test/CodeGen/AMDGPU/bfi_int.ll +++ test/CodeGen/AMDGPU/bfi_int.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck --check-prefix=SI %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI %s ; BFI_INT Definition pattern from ISA docs ; (y & x) | (z & ~x) Index: test/CodeGen/AMDGPU/bfm.ll =================================================================== --- test/CodeGen/AMDGPU/bfm.ll +++ test/CodeGen/AMDGPU/bfm.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}bfm_pattern: ; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} Index: test/CodeGen/AMDGPU/big_alu.ll =================================================================== --- test/CodeGen/AMDGPU/big_alu.ll +++ test/CodeGen/AMDGPU/big_alu.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cedar < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cedar < %s ; This test ensures that R600 backend can handle ifcvt properly Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll =================================================================== --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; The bitcast should be pushed through the bitcasts so the vectors can ; be broken down and the shared components can be CSEd Index: test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll =================================================================== --- test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll +++ test/CodeGen/AMDGPU/bitreverse-inline-immediates.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Test that materialization constants that are the bit reversed of ; inline immediates are replaced with bfrev of the inline immediate to Index: test/CodeGen/AMDGPU/bitreverse.ll =================================================================== --- test/CodeGen/AMDGPU/bitreverse.ll +++ test/CodeGen/AMDGPU/bitreverse.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/br_cc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/br_cc.f16.ll +++ test/CodeGen/AMDGPU/br_cc.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}br_cc_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/branch-condition-and.ll =================================================================== --- test/CodeGen/AMDGPU/branch-condition-and.ll +++ test/CodeGen/AMDGPU/branch-condition-and.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; This used to crash because during intermediate control flow lowering, there ; was a sequence Index: test/CodeGen/AMDGPU/branch-relax-spill.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relax-spill.ll +++ test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s 2>&1 | FileCheck -check-prefix=FAIL %s ; FIXME: This should be able to compile, but requires inserting an ; extra block to restore the scavenged register. Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -check-prefix=GCN %s ; FIXME: We should use llvm-mc for this, but we can't even parse our own output. ; See PR33579. -; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -amdgpu-s-branch-bits=4 -o %t.o -filetype=obj %s ; RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=OBJ %s ; OBJ: Relocations [ Index: test/CodeGen/AMDGPU/branch-uniformity.ll =================================================================== --- test/CodeGen/AMDGPU/branch-uniformity.ll +++ test/CodeGen/AMDGPU/branch-uniformity.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; The branch instruction in LOOP49 has a uniform condition, but PHI instructions ; introduced by the structurizecfg pass previously caused a false divergence Index: test/CodeGen/AMDGPU/bswap.ll =================================================================== --- test/CodeGen/AMDGPU/bswap.ll +++ test/CodeGen/AMDGPU/bswap.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone Index: test/CodeGen/AMDGPU/bug-vopc-commute.ll =================================================================== --- test/CodeGen/AMDGPU/bug-vopc-commute.ll +++ test/CodeGen/AMDGPU/bug-vopc-commute.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; Index: test/CodeGen/AMDGPU/build_vector.ll =================================================================== --- test/CodeGen/AMDGPU/build_vector.ll +++ test/CodeGen/AMDGPU/build_vector.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI ; R600: {{^}}build_vector2: ; R600: MOV Index: test/CodeGen/AMDGPU/call_fs.ll =================================================================== --- test/CodeGen/AMDGPU/call_fs.ll +++ test/CodeGen/AMDGPU/call_fs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s ; EG: .long 257 ; EG: {{^}}call_fs: Index: test/CodeGen/AMDGPU/cayman-loop-bug.ll =================================================================== --- test/CodeGen/AMDGPU/cayman-loop-bug.ll +++ test/CodeGen/AMDGPU/cayman-loop-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: LOOP_START_DX10 Index: test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -O0 < %s ; GCN-LABEL: {{^}}test_loop: ; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}} Index: test/CodeGen/AMDGPU/cf-stack-bug.ll =================================================================== --- test/CodeGen/AMDGPU/cf-stack-bug.ll +++ test/CodeGen/AMDGPU/cf-stack-bug.ll @@ -1,29 +1,30 @@ -; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=sumo -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=barts -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=turks -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=caicos -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG64 %s < %t -; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cedar -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=BUG32 %s < %t -; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=juniper -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t -; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t -; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -debug-only=r600cf %s -o - 2>%t | FileCheck %s --check-prefix=FUNC ; RUN: FileCheck --check-prefix=NOBUG %s < %t +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; REQUIRES: asserts Index: test/CodeGen/AMDGPU/cf_end.ll =================================================================== --- test/CodeGen/AMDGPU/cf_end.ll +++ test/CodeGen/AMDGPU/cf_end.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM %s ; EG: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80] ; CM: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88] Index: test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -1,37 +1,38 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; OPT-LABEL: @test_no_sink_flat_small_offset_i32( -; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in +; OPT-CIVI: getelementptr i32, i32* %in ; OPT-CIVI: br i1 ; OPT-CIVI-NOT: ptrtoint ; OPT-GFX9: br -; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28 -; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)* -; OPT-GFX9: load i32, i32 addrspace(4)* %1 +; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %0, i64 28 +; OPT-GFX9: %1 = bitcast i8* %sunkaddr to i32* +; OPT-GFX9: load i32, i32* %1 ; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: ; GCN: flat_load_dword ; GCN: {{^}}BB0_2: -define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep + %tmp1 = load i32, i32* %in.gep br label %endif endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -39,7 +40,7 @@ } ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT: getelementptr i32, i32* %out, ; rOPT-CI-NOT: getelementptr ; OPT: br i1 @@ -50,11 +51,11 @@ ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32: ; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 -define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_global_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)* + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 + %cast = addrspacecast i32* %in.gep to i32 addrspace(1)* %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if @@ -64,7 +65,7 @@ endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -72,7 +73,7 @@ } ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT: getelementptr i32, i32* %out, ; OPT-CI-NOT: getelementptr ; OPT: br i1 @@ -83,11 +84,11 @@ ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32: ; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +define amdgpu_kernel void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32* %out, i32* %in, i32 %cond) { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)* + %out.gep = getelementptr i32, i32* %out, i64 999999 + %in.gep = getelementptr i32, i32* %in, i64 7 + %cast = addrspacecast i32* %in.gep to i32 addrspace(2)* %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %endif, label %if @@ -97,7 +98,7 @@ endif: %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -105,34 +106,34 @@ } ; OPT-LABEL: @test_sink_flat_small_max_flat_offset( -; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 +; OPT-CIVI: %in.gep = getelementptr i8, i8* %in, i64 4095 ; OPT-CIVI: br ; OPT-CIVI-NOT: getelementptr -; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep +; OPT-CIVI: load i8, i8* %in.gep ; OPT-GFX9: br -; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095 -; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr +; OPT-GFX9: %sunkaddr = getelementptr i8, i8* %in, i64 4095 +; OPT-GFX9: load i8, i8* %sunkaddr ; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset: ; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}} ; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32* %out, i8* %in) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 + %out.gep = getelementptr i32, i32* %out, i32 1024 + %in.gep = getelementptr i8, i8* %in, i64 4095 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -140,29 +141,29 @@ } ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 +; OPT: %in.gep = getelementptr i8, i8* %in, i64 4096 ; OPT: br ; OPT-NOT: getelementptr -; OPT: load i8, i8 addrspace(4)* %in.gep +; OPT: load i8, i8* %in.gep ; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset: ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32* %out, i8* %in) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 + %out.gep = getelementptr i32, i32* %out, i64 99999 + %in.gep = getelementptr i8, i8* %in, i64 4096 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: @@ -170,30 +171,30 @@ } ; OPT-LABEL: @test_no_sink_flat_reg_offset( -; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg +; OPT: %in.gep = getelementptr i8, i8* %in, i64 %reg ; OPT: br ; OPT-NOT: getelementptr -; OPT: load i8, i8 addrspace(4)* %in.gep +; OPT: load i8, i8* %in.gep ; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset: ; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 { +define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32* %out, i8* %in, i64 %reg) #1 { entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 - %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg + %out.gep = getelementptr i32, i32* %out, i32 1024 + %in.gep = getelementptr i8, i8* %in, i64 %reg %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp0 = icmp eq i32 %tid, 0 br i1 %tmp0, label %endif, label %if if: - %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp1 = load i8, i8* %in.gep %tmp2 = sext i8 %tmp1 to i32 br label %endif endif: %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep + store i32 %x, i32* %out.gep br label %done done: Index: test/CodeGen/AMDGPU/cgp-bitfield-extract.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -codegenprepare < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; This particular case will actually be worse in terms of code size ; from sinking into both. Index: test/CodeGen/AMDGPU/clamp-modifier.ll =================================================================== --- test/CodeGen/AMDGPU/clamp-modifier.ll +++ test/CodeGen/AMDGPU/clamp-modifier.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}v_clamp_add_src_f32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] Index: test/CodeGen/AMDGPU/clamp.ll =================================================================== --- test/CodeGen/AMDGPU/clamp.ll +++ test/CodeGen/AMDGPU/clamp.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}v_clamp_f32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] Index: test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll =================================================================== --- test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i1 @llvm.amdgcn.class.f32(float, i32) Index: test/CodeGen/AMDGPU/coalescer-subrange-crash.ll =================================================================== --- test/CodeGen/AMDGPU/coalescer-subrange-crash.ll +++ test/CodeGen/AMDGPU/coalescer-subrange-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; ; This testcase used to cause the following crash: ; Index: test/CodeGen/AMDGPU/coalescer_remat.ll =================================================================== --- test/CodeGen/AMDGPU/coalescer_remat.ll +++ test/CodeGen/AMDGPU/coalescer_remat.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn---amdgiz -o - %s | FileCheck %s declare float @llvm.fma.f32(float, float, float) Index: test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll =================================================================== --- test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll +++ test/CodeGen/AMDGPU/codegen-prepare-addrmode-sext.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -codegenprepare -S < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-LLC %s +; RUN: opt -mtriple=amdgcn---amdgiz -codegenprepare -S < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-LLC %s ; OPT-LABEL: @test( ; OPT: mul nsw i32 Index: test/CodeGen/AMDGPU/combine-and-sext-bool.ll =================================================================== --- test/CodeGen/AMDGPU/combine-and-sext-bool.ll +++ test/CodeGen/AMDGPU/combine-and-sext-bool.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}and_i1_sext_bool: ; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/combine-cond-add-sub.ll =================================================================== --- test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}add1: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/combine-ftrunc.ll =================================================================== --- test/CodeGen/AMDGPU/combine-ftrunc.ll +++ test/CodeGen/AMDGPU/combine-ftrunc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}combine_ftrunc_frint_f64: ; GCN: v_rndne_f64_e32 [[RND:v\[[0-9:]+\]]], Index: test/CodeGen/AMDGPU/commute-compares.ll =================================================================== --- test/CodeGen/AMDGPU/commute-compares.ll +++ test/CodeGen/AMDGPU/commute-compares.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -703,9 +704,9 @@ ; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: - %stack0 = alloca i32 - %ptr0 = load volatile i32*, i32* addrspace(1)* undef - %eq = icmp eq i32* %ptr0, %stack0 + %stack0 = alloca i32, addrspace(5) + %ptr0 = load volatile i32 addrspace(5)*, i32 addrspace(5)* addrspace(1)* undef + %eq = icmp eq i32 addrspace(5)* %ptr0, %stack0 %ext = zext i1 %eq to i32 store volatile i32 %ext, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- test/CodeGen/AMDGPU/commute-shifts.ll +++ test/CodeGen/AMDGPU/commute-shifts.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}main: ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/commute_modifiers.ll =================================================================== --- test/CodeGen/AMDGPU/commute_modifiers.ll +++ test/CodeGen/AMDGPU/commute_modifiers.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 Index: test/CodeGen/AMDGPU/complex-folding.ll =================================================================== --- test/CodeGen/AMDGPU/complex-folding.ll +++ test/CodeGen/AMDGPU/complex-folding.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}main: ; CHECK-NOT: MOV Index: test/CodeGen/AMDGPU/concat_vectors.ll =================================================================== --- test/CodeGen/AMDGPU/concat_vectors.ll +++ test/CodeGen/AMDGPU/concat_vectors.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_concat_v1i32: ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF Index: test/CodeGen/AMDGPU/constant-fold-mi-operands.ll =================================================================== --- test/CodeGen/AMDGPU/constant-fold-mi-operands.ll +++ test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}fold_mi_v_and_0: ; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} Index: test/CodeGen/AMDGPU/control-flow-optnone.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-optnone.ll +++ test/CodeGen/AMDGPU/control-flow-optnone.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; optnone disables AMDGPUAnnotateUniformValues, so no branch is known ; to be uniform during instruction selection. The custom selection for Index: test/CodeGen/AMDGPU/convergent-inlineasm.ll =================================================================== --- test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}convergent_inlineasm: Index: test/CodeGen/AMDGPU/copy-illegal-type.ll =================================================================== --- test/CodeGen/AMDGPU/copy-illegal-type.ll +++ test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/copy-to-reg.ll =================================================================== --- test/CodeGen/AMDGPU/copy-to-reg.ll +++ test/CodeGen/AMDGPU/copy-to-reg.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs < %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Test that CopyToReg instructions don't have non-register operands prior ; to being emitted. @@ -8,20 +9,20 @@ ; CHECK-LABEL: {{^}}copy_to_reg_frameindex: define amdgpu_kernel void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: - %alloca = alloca [16 x i32] + %alloca = alloca [16 x i32], addrspace(5) br label %loop loop: %inc = phi i32 [0, %entry], [%inc.i, %loop] - %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc - store i32 %inc, i32* %ptr + %ptr = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %inc + store i32 %inc, i32 addrspace(5)* %ptr %inc.i = add i32 %inc, 1 %cnd = icmp uge i32 %inc.i, 16 br i1 %cnd, label %done, label %loop done: - %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0 - %tmp1 = load i32, i32* %tmp0 + %tmp0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %tmp1 = load i32, i32 addrspace(5)* %tmp0 store i32 %tmp1, i32 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop.ll +++ test/CodeGen/AMDGPU/ctpop.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone Index: test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop64.ll +++ test/CodeGen/AMDGPU/ctpop64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/cttz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-NOSDWA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-SDWA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-NOSDWA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-SDWA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone Index: test/CodeGen/AMDGPU/cube.ll =================================================================== --- test/CodeGen/AMDGPU/cube.ll +++ test/CodeGen/AMDGPU/cube.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubeid(float, float, float) #0 declare float @llvm.amdgcn.cubesc(float, float, float) #0 Index: test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll +++ test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 Index: test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll +++ test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 Index: test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll =================================================================== --- test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll +++ test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; We are only checking that instruction selection can succeed in this case. This ; cut down test results in no instructions, but that's fine. Index: test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll +++ test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; Test for a bug where DAGCombiner::ReassociateOps() was creating adds ; with offset in the first operand and base pointers in the second. Index: test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll +++ test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; This test is for a bug in ; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where Index: test/CodeGen/AMDGPU/debug.ll =================================================================== --- test/CodeGen/AMDGPU/debug.ll +++ test/CodeGen/AMDGPU/debug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; Test for a crash in the custom assembly dump code. Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_si: ; GCN: FloatMode: 192 Index: test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll =================================================================== --- test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll +++ test/CodeGen/AMDGPU/disconnected-predset-break-bug.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s -; PRED_SET* instructions must be tied to any instruction that uses their -; result. This tests that there are no instructions between the PRED_SET* +; PRED_SET addrspace(5)* instructions must be tied to any instruction that uses their +; result. This tests that there are no instructions between the PRED_SET addrspace(5)* ; and the PREDICATE_BREAK in this loop. ; CHECK: {{^}}loop_ge: Index: test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll +++ test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; The memory operand was dropped from the buffer_load_dword_offset ; when replaced with the addr64 during operand legalization, resulting Index: test/CodeGen/AMDGPU/ds-combine-large-stride.ll =================================================================== --- test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: ds_read32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare void @llvm.amdgcn.s.barrier() #1 Index: test/CodeGen/AMDGPU/ds-sub-offset.ll =================================================================== --- test/CodeGen/AMDGPU/ds-sub-offset.ll +++ test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s declare i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/AMDGPU/ds_read2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2.ll +++ test/CodeGen/AMDGPU/ds_read2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. Index: test/CodeGen/AMDGPU/ds_read2_offset_order.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_offset_order.ll +++ test/CodeGen/AMDGPU/ds_read2_offset_order.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 Index: test/CodeGen/AMDGPU/ds_read2_superreg.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4 Index: test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2st64.ll +++ test/CodeGen/AMDGPU/ds_read2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 Index: test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2.ll +++ test/CodeGen/AMDGPU/ds_write2.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 Index: test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2st64.ll +++ test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s @lds = addrspace(3) global [512 x float] undef, align 4 Index: test/CodeGen/AMDGPU/early-if-convert-cost.ll =================================================================== --- test/CodeGen/AMDGPU/early-if-convert-cost.ll +++ test/CodeGen/AMDGPU/early-if-convert-cost.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -stress-early-ifcvt -amdgpu-early-ifcvt=1 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Most of these cases that don't trigger because of broken cost ; heuristics. Should not need -stress-early-ifcvt Index: test/CodeGen/AMDGPU/early-if-convert.ll =================================================================== --- test/CodeGen/AMDGPU/early-if-convert.ll +++ test/CodeGen/AMDGPU/early-if-convert.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; XUN: llc -march=amdgcn -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -amdgpu-early-ifcvt=1 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: This leaves behind a now unnecessary and with exec Index: test/CodeGen/AMDGPU/early-inline-alias.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline-alias.ll +++ test/CodeGen/AMDGPU/early-inline-alias.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 %s | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O1 -S -inline-threshold=1 %s | FileCheck %s ; CHECK: @add1alias = alias i32 (i32), i32 (i32)* @add1 ; CHECK: @add1alias2 = alias i32 (i32), i32 (i32)* @add1 Index: test/CodeGen/AMDGPU/early-inline.ll =================================================================== --- test/CodeGen/AMDGPU/early-inline.ll +++ test/CodeGen/AMDGPU/early-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s @c_alias = alias i32 (i32), i32 (i32)* @callee Index: test/CodeGen/AMDGPU/elf-header.ll =================================================================== --- test/CodeGen/AMDGPU/elf-header.ll +++ test/CodeGen/AMDGPU/elf-header.ll @@ -1,26 +1,26 @@ -; RUN: llc -march=r600 -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s -; RUN: llc -mtriple=r600-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s - -; RUN: llc -march=amdgcn -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s -; RUN: llc -mtriple=amdgcn-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s - -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s -; RUN: llc -mtriple=amdgcn-unknown-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s - -; RUN: llc -mtriple=amdgcn--amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s -; RUN: llc -mtriple=amdgcn-unknown-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s - -; RUN: llc -mtriple=amdgcn--mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s -; RUN: llc -mtriple=amdgcn-amd-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd--amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-unknown-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s + +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn---amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-unknown-unknown-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s + +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-unknown-amdhsa-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s + +; RUN: llc -mtriple=amdgcn--amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-unknown-amdpal-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s + +; RUN: llc -mtriple=amdgcn--mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-amd-mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s ; R600: Format: ELF32-amdgpu ; R600: Arch: r600 Index: test/CodeGen/AMDGPU/elf.ll =================================================================== --- test/CodeGen/AMDGPU/elf.ll +++ test/CodeGen/AMDGPU/elf.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s ; Test that we don't try to produce a COFF file on windows -; RUN: llc < %s -mtriple=amdgcn-pc-mingw -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn-pc-mingw-amdgiz -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s ; ELF: Format: ELF64-amdgpu ; ELF: OS/ABI: SystemV (0x0) Index: test/CodeGen/AMDGPU/elf.r600.ll =================================================================== --- test/CodeGen/AMDGPU/elf.r600.ll +++ test/CodeGen/AMDGPU/elf.r600.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s ; ELF: Format: ELF32-amdgpu ; ELF: Name: .AMDGPU.config Index: test/CodeGen/AMDGPU/else.ll =================================================================== --- test/CodeGen/AMDGPU/else.ll +++ test/CodeGen/AMDGPU/else.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}else_no_execfix: ; CHECK: ; %Flow Index: test/CodeGen/AMDGPU/empty-function.ll =================================================================== --- test/CodeGen/AMDGPU/empty-function.ll +++ test/CodeGen/AMDGPU/empty-function.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure we don't assert on empty functions Index: test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll =================================================================== --- test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll +++ test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s -; RUN: llc -march=amdgcn -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s -; RUN: llc -march=amdgcn -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-signed-zeros-fp-math=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-no-signed-zeros-fp-math=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-UNSAFE %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/endcf-loop-header.ll =================================================================== --- test/CodeGen/AMDGPU/endcf-loop-header.ll +++ test/CodeGen/AMDGPU/endcf-loop-header.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s ; This tests that the llvm.SI.end.cf intrinsic is not inserted into the ; loop block. This intrinsic will be lowered to s_or_b64 by the code Index: test/CodeGen/AMDGPU/exceed-max-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { Index: test/CodeGen/AMDGPU/extend-bit-ops-i16.ll =================================================================== --- test/CodeGen/AMDGPU/extend-bit-ops-i16.ll +++ test/CodeGen/AMDGPU/extend-bit-ops-i16.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=GCN ; GCN-LABEL: and_zext: ; GCN: v_and_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/extload-private.ll =================================================================== --- test/CodeGen/AMDGPU/extload-private.ll +++ test/CodeGen/AMDGPU/extload-private.ll @@ -1,12 +1,13 @@ -; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FUNC-LABEL: {{^}}load_i8_sext_private: ; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i8_sext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i8 - %tmp1 = load i8, i8* %tmp0 + %tmp0 = alloca i8, addrspace(5) + %tmp1 = load i8, i8 addrspace(5)* %tmp0 %tmp2 = sext i8 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -16,8 +17,8 @@ ; SI: buffer_load_ubyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i8_zext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i8 - %tmp1 = load i8, i8* %tmp0 + %tmp0 = alloca i8, addrspace(5) + %tmp1 = load i8, i8 addrspace(5)* %tmp0 %tmp2 = zext i8 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -27,8 +28,8 @@ ; SI: buffer_load_sshort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i16_sext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i16 - %tmp1 = load i16, i16* %tmp0 + %tmp0 = alloca i16, addrspace(5) + %tmp1 = load i16, i16 addrspace(5)* %tmp0 %tmp2 = sext i16 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void @@ -38,8 +39,8 @@ ; SI: buffer_load_ushort v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4{{$}} define amdgpu_kernel void @load_i16_zext_private(i32 addrspace(1)* %out) { entry: - %tmp0 = alloca i16 - %tmp1 = load volatile i16, i16* %tmp0 + %tmp0 = alloca i16, addrspace(5) + %tmp1 = load volatile i16, i16 addrspace(5)* %tmp0 %tmp2 = zext i16 %tmp1 to i32 store i32 %tmp2, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/extload.ll =================================================================== --- test/CodeGen/AMDGPU/extload.ll +++ test/CodeGen/AMDGPU/extload.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: This seems to not ever actually become an extload ; FUNC-LABEL: {{^}}global_anyext_load_i8: Index: test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll =================================================================== --- test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll +++ test/CodeGen/AMDGPU/extract-vector-elt-build-vector-combine.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32: ; GCN: buffer_load_dword Index: test/CodeGen/AMDGPU/extractelt-to-trunc.ll =================================================================== --- test/CodeGen/AMDGPU/extractelt-to-trunc.ll +++ test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f16.ll +++ test/CodeGen/AMDGPU/fabs.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; DAGCombiner will transform: ; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) Index: test/CodeGen/AMDGPU/fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f64.ll +++ test/CodeGen/AMDGPU/fabs.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.ll +++ test/CodeGen/AMDGPU/fabs.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; DAGCombiner will transform: Index: test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll =================================================================== --- test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll +++ test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fast-fmaf,-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s -; RUN: llc -march=amdgcn -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-FASTFMA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fast-fmaf,+fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SLOWFMA %s ; FIXME: This should also fold when fma is actually fast if an FMA ; exists in the original program. Index: test/CodeGen/AMDGPU/fadd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.f16.ll +++ test/CodeGen/AMDGPU/fadd.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fadd_f16 ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fadd.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.ll +++ test/CodeGen/AMDGPU/fadd.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; FUNC-LABEL: {{^}}fadd_f32: ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W Index: test/CodeGen/AMDGPU/fadd64.ll =================================================================== --- test/CodeGen/AMDGPU/fadd64.ll +++ test/CodeGen/AMDGPU/fadd64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}v_fadd_f64: ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}} Index: test/CodeGen/AMDGPU/fcanonicalize-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-EXCEPT -check-prefix=VI -check-prefix=GCN-FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GCN-FLUSH %s ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 Index: test/CodeGen/AMDGPU/fcanonicalize.ll =================================================================== --- test/CodeGen/AMDGPU/fcanonicalize.ll +++ test/CodeGen/AMDGPU/fcanonicalize.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare float @llvm.canonicalize.f32(float) #0 Index: test/CodeGen/AMDGPU/fceil.ll =================================================================== --- test/CodeGen/AMDGPU/fceil.ll +++ test/CodeGen/AMDGPU/fceil.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.ceil.f32(float) nounwind readnone declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/fceil64.ll =================================================================== --- test/CodeGen/AMDGPU/fceil64.ll +++ test/CodeGen/AMDGPU/fceil64.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/fcmp-cnd.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp-cnd.ll +++ test/CodeGen/AMDGPU/fcmp-cnd.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;Not checking arguments 2 and 3 to CNDE, because they may change between ;registers and literal.x depending on what the optimizer does. Index: test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll +++ test/CodeGen/AMDGPU/fcmp-cnde-int-args.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the -; chance to optimize the fcmp + select instructions to SET* was missed +; chance to optimize the fcmp + select instructions to SET addrspace(5)* was missed ; due to the fact that the operands to fcmp and select had different types ; CHECK: SET{{[A-Z]+}}_DX10 Index: test/CodeGen/AMDGPU/fcmp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp.f16.ll +++ test/CodeGen/AMDGPU/fcmp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fcmp_f16_lt ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp.ll +++ test/CodeGen/AMDGPU/fcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}fcmp_sext: ; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -15,8 +15,8 @@ } ; This test checks that a setcc node with f32 operands is lowered to a -; SET*_DX10 instruction. Previously we were lowering this to: -; SET* + FP_TO_SINT +; SET addrspace(5)*_DX10 instruction. Previously we were lowering this to: +; SET addrspace(5)* + FP_TO_SINT ; CHECK: {{^}}fcmp_br: ; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}} Index: test/CodeGen/AMDGPU/fcmp64.ll =================================================================== --- test/CodeGen/AMDGPU/fcmp64.ll +++ test/CodeGen/AMDGPU/fcmp64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}flt_f64: ; CHECK: v_cmp_nge_f64_e32 vcc, {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} Index: test/CodeGen/AMDGPU/fconst64.ll =================================================================== --- test/CodeGen/AMDGPU/fconst64.ll +++ test/CodeGen/AMDGPU/fconst64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}fconst_f64: ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000 Index: test/CodeGen/AMDGPU/fcopysign.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f16.ll +++ test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.copysign.f16(half, half) declare float @llvm.copysign.f32(float, float) Index: test/CodeGen/AMDGPU/fcopysign.f32.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f32.ll +++ test/CodeGen/AMDGPU/fcopysign.f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.copysign.f32(float, float) nounwind readnone declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/fcopysign.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f64.ll +++ test/CodeGen/AMDGPU/fcopysign.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.copysign.f64(double, double) nounwind readnone declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f16.ll +++ test/CodeGen/AMDGPU/fdiv.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Make sure fdiv is promoted to f32. Index: test/CodeGen/AMDGPU/fdiv.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f64.ll +++ test/CodeGen/AMDGPU/fdiv.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s ; GCN-LABEL: {{^}}fdiv_f64: Index: test/CodeGen/AMDGPU/fdiv.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.ll +++ test/CodeGen/AMDGPU/fdiv.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; These tests check that fdiv is expanded correctly and also test that the ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate Index: test/CodeGen/AMDGPU/fence-amdgiz.ll =================================================================== --- test/CodeGen/AMDGPU/fence-amdgiz.ll +++ test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=kaveri < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl-amdgiz -mcpu=kaveri < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" Index: test/CodeGen/AMDGPU/fetch-limits.r600.ll =================================================================== --- test/CodeGen/AMDGPU/fetch-limits.r600.ll +++ test/CodeGen/AMDGPU/fetch-limits.r600.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -mcpu=r600 | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=rs880 | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=r600 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rs880 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv670 | FileCheck %s ; R600 supports 8 fetches in a clause ; CHECK: {{^}}fetch_limits_r600: Index: test/CodeGen/AMDGPU/fetch-limits.r700+.ll =================================================================== --- test/CodeGen/AMDGPU/fetch-limits.r700+.ll +++ test/CodeGen/AMDGPU/fetch-limits.r700+.ll @@ -1,15 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=rv710 | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=rv730 | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=rv770 | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=sumo | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=juniper | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=barts | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=turks | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=caicos | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv710 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv730 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=rv770 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cedar | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=sumo | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=juniper | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cypress | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=barts | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=turks | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=caicos | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s ; r700+ supports 16 fetches in a clause ; CHECK: {{^}}fetch_limits_r700: Index: test/CodeGen/AMDGPU/ffloor.f64.ll =================================================================== --- test/CodeGen/AMDGPU/ffloor.f64.ll +++ test/CodeGen/AMDGPU/ffloor.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.fabs.f64(double %Val) declare double @llvm.floor.f64(double) nounwind readnone Index: test/CodeGen/AMDGPU/ffloor.ll =================================================================== --- test/CodeGen/AMDGPU/ffloor.ll +++ test/CodeGen/AMDGPU/ffloor.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}floor_f32: ; SI: v_floor_f32_e32 Index: test/CodeGen/AMDGPU/flat_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics.ll +++ test/CodeGen/AMDGPU/flat_atomics.ll @@ -1,33 +1,33 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}atomic_add_i32_offset: ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i32_max_offset: ; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} -define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_max_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 1023 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1: ; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} -define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 1024 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst ret void } @@ -35,22 +35,22 @@ ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: ; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst ret void } @@ -58,60 +58,60 @@ ; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile add i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i32: ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_add_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile add i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i32_ret: ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile add i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile add i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i32_addr64: ; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: ; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile add i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile add i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i32_offset: ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_and_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst ret void } @@ -119,22 +119,22 @@ ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_and_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: ; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst ret void } @@ -142,60 +142,60 @@ ; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile and i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile and i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i32: ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_and_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_and_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile and i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i32_ret: ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_and_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile and i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile and i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i32_addr64: ; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: ; GCN: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile and i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile and i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i32_offset: ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst ret void } @@ -203,22 +203,22 @@ ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst ret void } @@ -226,60 +226,60 @@ ; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile sub i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile sub i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i32: ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i32_ret: ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile sub i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile sub i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i32_addr64: ; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: ; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile sub i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile sub i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i32_offset: ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_max_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst ret void } @@ -287,22 +287,22 @@ ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_max_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: ; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst ret void } @@ -310,60 +310,60 @@ ; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile max i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile max i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i32: ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_max_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_max_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile max i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i32_ret: ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_max_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile max i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile max i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i32_addr64: ; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: ; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile max i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile max i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i32_offset: ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst ret void } @@ -371,22 +371,22 @@ ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst ret void } @@ -394,60 +394,60 @@ ; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile umax i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile umax i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i32: ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i32_ret: ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile umax i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile umax i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i32_addr64: ; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: ; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile umax i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile umax i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i32_offset: ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_min_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst ret void } @@ -455,22 +455,22 @@ ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_min_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: ; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst ret void } @@ -478,60 +478,60 @@ ; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile min i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile min i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i32: ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_min_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_min_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile min i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i32_ret: ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_min_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile min i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile min i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i32_addr64: ; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: ; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile min i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile min i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i32_offset: ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst ret void } @@ -539,22 +539,22 @@ ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst ret void } @@ -562,60 +562,60 @@ ; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile umin i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile umin i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i32: ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i32_ret: ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile umin i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile umin i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i32_addr64: ; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: ; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]{{$}} - define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { + define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile umin i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile umin i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i32_offset: ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_or_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst ret void } @@ -623,22 +623,22 @@ ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_or_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: ; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst ret void } @@ -646,60 +646,60 @@ ; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile or i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile or i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i32: ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_or_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_or_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile or i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i32_ret: ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_or_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile or i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile or i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i32_addr64: ; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: ; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile or i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile or i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32_offset: ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst ret void } @@ -707,22 +707,22 @@ ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: ; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst ret void } @@ -730,50 +730,50 @@ ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile xchg i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32: ; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} -define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32_ret: ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: ; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: ; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile xchg i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile xchg i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } @@ -782,10 +782,10 @@ ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -793,23 +793,23 @@ ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] -define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 - store i32 %flag, i32 addrspace(4)* %out2 + store i32 %flag, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: ; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} ; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32* %out, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -817,63 +817,63 @@ ; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] -define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 - store i32 %flag, i32 addrspace(4)* %out2 + store i32 %flag, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32: ; GCN: flat_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(4)* %out, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32(i32* %out, i32 %in, i32 %old) { entry: - %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst + %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] -define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32* %out, i32* %out2, i32 %in, i32 %old) { entry: - %val = cmpxchg volatile i32 addrspace(4)* %out, i32 %old, i32 %in seq_cst seq_cst + %val = cmpxchg volatile i32* %out, i32 %old, i32 %in seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 - store i32 %flag, i32 addrspace(4)* %out2 + store i32 %flag, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: ; GCN: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32* %out, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: ; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] -define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = cmpxchg volatile i32 addrspace(4)* %ptr, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = cmpxchg volatile i32* %ptr, i32 %old, i32 %in seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 - store i32 %flag, i32 addrspace(4)* %out2 + store i32 %flag, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i32_offset: ; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} ; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32_offset(i32* %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst ret void } @@ -881,22 +881,22 @@ ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32* %out, i32* %out2, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %gep = getelementptr i32, i32* %out, i32 4 + %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} ; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst ret void } @@ -904,50 +904,50 @@ ; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = atomicrmw volatile xor i32 addrspace(4)* %gep, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = atomicrmw volatile xor i32* %gep, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i32: ; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} -define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(4)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32(i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i32_ret: ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32_ret(i32* %out, i32* %out2, i32 %in) { entry: - %val = atomicrmw volatile xor i32 addrspace(4)* %out, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %val = atomicrmw volatile xor i32* %out, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i32_addr64: ; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} -define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(4)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_addr64(i32* %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: ; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32* %out, i32* %out2, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %val = atomicrmw volatile xor i32 addrspace(4)* %ptr, i32 %in seq_cst - store i32 %val, i32 addrspace(4)* %out2 + %ptr = getelementptr i32, i32* %out, i64 %index + %val = atomicrmw volatile xor i32* %ptr, i32 %in seq_cst + store i32 %val, i32* %out2 ret void } @@ -955,21 +955,21 @@ ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_load_i32_offset(i32* %in, i32* %out) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %in, i32 4 - %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %gep = getelementptr i32, i32* %in, i32 4 + %val = load atomic i32, i32* %gep seq_cst, align 4 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}atomic_load_i32: ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i32(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_load_i32(i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in seq_cst, align 4 + store i32 %val, i32* %out ret void } @@ -977,60 +977,60 @@ ; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32* %in, i32* %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %val = load atomic i32, i32 addrspace(4)* %gep seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %ptr = getelementptr i32, i32* %in, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + %val = load atomic i32, i32* %gep seq_cst, align 4 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}atomic_load_i32_addr64: ; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_load_i32_addr64(i32* %in, i32* %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %in, i64 %index - %val = load atomic i32, i32 addrspace(4)* %ptr seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %ptr = getelementptr i32, i32* %in, i64 %index + %val = load atomic i32, i32* %ptr seq_cst, align 4 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}atomic_store_i32_offset: ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32* %out) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4 + %gep = getelementptr i32, i32* %out, i32 4 + store atomic i32 %in, i32* %gep seq_cst, align 4 ret void } ; GCN-LABEL: {{^}}atomic_store_i32: ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_store_i32(i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 + store atomic i32 %in, i32* %out seq_cst, align 4 ret void } ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: ; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} ; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} -define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32* %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - store atomic i32 %in, i32 addrspace(4)* %gep seq_cst, align 4 + %ptr = getelementptr i32, i32* %out, i64 %index + %gep = getelementptr i32, i32* %ptr, i32 4 + store atomic i32 %in, i32* %gep seq_cst, align 4 ret void } ; GCN-LABEL: {{^}}atomic_store_i32_addr64: ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32* %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index - store atomic i32 %in, i32 addrspace(4)* %ptr seq_cst, align 4 + %ptr = getelementptr i32, i32* %out, i64 %index + store atomic i32 %in, i32* %ptr seq_cst, align 4 ret void } Index: test/CodeGen/AMDGPU/flat_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -1,975 +1,975 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}atomic_add_i64_offset: ; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} -define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: ; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset: ; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} -define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset: ; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i64: ; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_add_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i64_ret: ; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_add_i64_addr64: ; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_add_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64: ; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile add i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i64_offset: ; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: ; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset: ; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset: ; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i64: ; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_and_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i64_ret: ; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_and_i64_addr64: ; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64: ; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile and i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_offset: ; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: ; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset: ; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset: ; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i64: ; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_ret: ; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_addr64: ; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64: ; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile sub i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i64_offset: ; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: ; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset: ; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset: ; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i64: ; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_max_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i64_ret: ; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_max_i64_addr64: ; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64: ; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile max i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_offset: ; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: ; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset: ; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset: ; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i64: ; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_ret: ; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_addr64: ; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64: ; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile umax i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i64_offset: ; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: ; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset: ; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset: ; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i64: ; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_min_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i64_ret: ; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_min_i64_addr64: ; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64: ; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile min i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_offset: ; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: ; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset: ; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset: ; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i64: ; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_ret: ; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_addr64: ; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64: ; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile umin i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i64_offset: ; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: ; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset: ; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset: ; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i64: ; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_or_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i64_ret: ; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_or_i64_addr64: ; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64: ; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile or i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_offset: ; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset: ; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64: ; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64: ; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64: ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile xchg i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_offset: ; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: ; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %gep = getelementptr i64, i64* %out, i64 4 + %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset: ; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset: ; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %gep, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i64: ; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(4)* %out, i64 %in) { +define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) { entry: - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst + %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_ret: ; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in) { +define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) { entry: - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %out, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_addr64: ; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst ret void } ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64: ; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %tmp0 = atomicrmw volatile xor i64 addrspace(4)* %ptr, i64 %in seq_cst - store i64 %tmp0, i64 addrspace(4)* %out2 + %ptr = getelementptr i64, i64* %out, i64 %index + %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst + store i64 %tmp0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_load_i64_offset: ; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %in, i64 4 - %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8 - store i64 %val, i64 addrspace(4)* %out + %gep = getelementptr i64, i64* %in, i64 4 + %val = load atomic i64, i64* %gep seq_cst, align 8 + store i64 %val, i64* %out ret void } ; GCN-LABEL: {{^}}atomic_load_i64: ; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i64(i64 addrspace(4)* %in, i64 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) { entry: - %val = load atomic i64, i64 addrspace(4)* %in seq_cst, align 8 - store i64 %val, i64 addrspace(4)* %out + %val = load atomic i64, i64* %in seq_cst, align 8 + store i64 %val, i64* %out ret void } ; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset: ; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %val = load atomic i64, i64 addrspace(4)* %gep seq_cst, align 8 - store i64 %val, i64 addrspace(4)* %out + %ptr = getelementptr i64, i64* %in, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %val = load atomic i64, i64* %gep seq_cst, align 8 + store i64 %val, i64* %out ret void } ; GCN-LABEL: {{^}}atomic_load_i64_addr64: ; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(4)* %in, i64 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %in, i64 %index - %val = load atomic i64, i64 addrspace(4)* %ptr seq_cst, align 8 - store i64 %val, i64 addrspace(4)* %out + %ptr = getelementptr i64, i64* %in, i64 %index + %val = load atomic i64, i64* %ptr seq_cst, align 8 + store i64 %val, i64* %out ret void } ; GCN-LABEL: {{^}}atomic_store_i64_offset: ; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8 + %gep = getelementptr i64, i64* %out, i64 4 + store atomic i64 %in, i64* %gep seq_cst, align 8 ret void } ; GCN-LABEL: {{^}}atomic_store_i64: ; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] -define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(4)* %out) { +define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) { entry: - store atomic i64 %in, i64 addrspace(4)* %out seq_cst, align 8 + store atomic i64 %in, i64* %out seq_cst, align 8 ret void } ; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: ; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - store atomic i64 %in, i64 addrspace(4)* %gep seq_cst, align 8 + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + store atomic i64 %in, i64* %gep seq_cst, align 8 ret void } ; GCN-LABEL: {{^}}atomic_store_i64_addr64: ; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(4)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - store atomic i64 %in, i64 addrspace(4)* %ptr seq_cst, align 8 + %ptr = getelementptr i64, i64* %out, i64 %index + store atomic i64 %in, i64* %ptr seq_cst, align 8 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset: ; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(4)* %out, i64 %in, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset: ; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(4)* %out, i64 %in, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 9000 - %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst + %gep = getelementptr i64, i64* %out, i64 9000 + %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: ; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: -define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) { entry: - %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 - %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst + %gep = getelementptr i64, i64* %out, i64 4 + %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 - store i64 %extract0, i64 addrspace(4)* %out2 + store i64 %extract0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: ; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: ; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: -define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i64 4 - %val = cmpxchg volatile i64 addrspace(4)* %gep, i64 %old, i64 %in seq_cst seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %gep = getelementptr i64, i64* %ptr, i64 4 + %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 - store i64 %extract0, i64 addrspace(4)* %out2 + store i64 %extract0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64: ; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(4)* %out, i64 %in, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) { entry: - %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst + %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret: ; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: -define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) { entry: - %val = cmpxchg volatile i64 addrspace(4)* %out, i64 %old, i64 %in seq_cst seq_cst + %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 - store i64 %extract0, i64 addrspace(4)* %out2 + store i64 %extract0, i64* %out2 ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64: ; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} -define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(4)* %out, i64 %in, i64 %index, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst ret void } ; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: ; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]: -define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %out2, i64 %in, i64 %index, i64 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) { entry: - %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index - %val = cmpxchg volatile i64 addrspace(4)* %ptr, i64 %old, i64 %in seq_cst seq_cst + %ptr = getelementptr i64, i64* %out, i64 %index + %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 - store i64 %extract0, i64 addrspace(4)* %out2 + store i64 %extract0, i64* %out2 ret void } Index: test/CodeGen/AMDGPU/floor.ll =================================================================== --- test/CodeGen/AMDGPU/floor.ll +++ test/CodeGen/AMDGPU/floor.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s ; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define amdgpu_ps void @test(<4 x float> inreg %reg0) { Index: test/CodeGen/AMDGPU/fma-combine.ll =================================================================== --- test/CodeGen/AMDGPU/fma-combine.ll +++ test/CodeGen/AMDGPU/fma-combine.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s ; Note: The SI-FMA conversions of type x * (y + 1) --> x * y + x would be ; beneficial even without fp32 denormals, but they do require no-infs-fp-math Index: test/CodeGen/AMDGPU/fma.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fma.f64.ll +++ test/CodeGen/AMDGPU/fma.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/fma.ll =================================================================== --- test/CodeGen/AMDGPU/fma.ll +++ test/CodeGen/AMDGPU/fma.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/fmad.ll =================================================================== --- test/CodeGen/AMDGPU/fmad.ll +++ test/CodeGen/AMDGPU/fmad.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} Index: test/CodeGen/AMDGPU/fmax.ll =================================================================== --- test/CodeGen/AMDGPU/fmax.ll +++ test/CodeGen/AMDGPU/fmax.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} Index: test/CodeGen/AMDGPU/fmax3.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax3.f64.ll +++ test/CodeGen/AMDGPU/fmax3.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare double @llvm.maxnum.f64(double, double) nounwind readnone Index: test/CodeGen/AMDGPU/fmax3.ll =================================================================== --- test/CodeGen/AMDGPU/fmax3.ll +++ test/CodeGen/AMDGPU/fmax3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}test_fmax3_olt_0_f32: ; GCN: buffer_load_dword [[REGC:v[0-9]+]] Index: test/CodeGen/AMDGPU/fmax_legacy.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax_legacy.f64.ll +++ test/CodeGen/AMDGPU/fmax_legacy.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Make sure we don't try to form FMAX_LEGACY nodes with f64 declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/fmax_legacy.ll =================================================================== --- test/CodeGen/AMDGPU/fmax_legacy.ll +++ test/CodeGen/AMDGPU/fmax_legacy.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s -; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: Should replace unsafe-fp-math with no signed zeros. Index: test/CodeGen/AMDGPU/fmaxnum.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmaxnum.f64.ll +++ test/CodeGen/AMDGPU/fmaxnum.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.maxnum.f64(double, double) #0 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0 Index: test/CodeGen/AMDGPU/fmaxnum.ll =================================================================== --- test/CodeGen/AMDGPU/fmaxnum.ll +++ test/CodeGen/AMDGPU/fmaxnum.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.maxnum.f32(float, float) #0 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 Index: test/CodeGen/AMDGPU/fmed3.ll =================================================================== --- test/CodeGen/AMDGPU/fmed3.ll +++ test/CodeGen/AMDGPU/fmed3.ll @@ -1,9 +1,9 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32: Index: test/CodeGen/AMDGPU/fmin.ll =================================================================== --- test/CodeGen/AMDGPU/fmin.ll +++ test/CodeGen/AMDGPU/fmin.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} Index: test/CodeGen/AMDGPU/fmin3.ll =================================================================== --- test/CodeGen/AMDGPU/fmin3.ll +++ test/CodeGen/AMDGPU/fmin3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}test_fmin3_olt_0_f32: ; GCN: buffer_load_dword [[REGC:v[0-9]+]] Index: test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll +++ test/CodeGen/AMDGPU/fmin_fmax_legacy.amdgcn.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN %s -; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-NONAN -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-NONAN -check-prefix=GCN %s ; FIXME: Should replace unsafe-fp-math with no signed zeros. Index: test/CodeGen/AMDGPU/fmin_legacy.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_legacy.f64.ll +++ test/CodeGen/AMDGPU/fmin_legacy.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/fmin_legacy.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_legacy.ll +++ test/CodeGen/AMDGPU/fmin_legacy.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: Should replace unsafe-fp-math with no signed zeros. Index: test/CodeGen/AMDGPU/fminnum.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fminnum.f64.ll +++ test/CodeGen/AMDGPU/fminnum.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.minnum.f64(double, double) #0 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 Index: test/CodeGen/AMDGPU/fminnum.ll =================================================================== --- test/CodeGen/AMDGPU/fminnum.ll +++ test/CodeGen/AMDGPU/fminnum.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.minnum.f32(float, float) #0 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -1,6 +1,6 @@ -; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't Index: test/CodeGen/AMDGPU/fmul.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmul.f16.ll +++ test/CodeGen/AMDGPU/fmul.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fmul_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fmul.ll =================================================================== --- test/CodeGen/AMDGPU/fmul.ll +++ test/CodeGen/AMDGPU/fmul.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fmul_f32: ; GCN: v_mul_f32 Index: test/CodeGen/AMDGPU/fmul64.ll =================================================================== --- test/CodeGen/AMDGPU/fmul64.ll +++ test/CodeGen/AMDGPU/fmul64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s ; FUNC-LABEL: {{^}}fmul_f64: ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} Index: test/CodeGen/AMDGPU/fmuladd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.f16.ll +++ test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s - -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-FLUSH,VI %s + +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI-DENORM-STRICT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI-DENORM-CONTRACT,VI-DENORM,VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fmuladd.f16(half, half, half) #1 Index: test/CodeGen/AMDGPU/fmuladd.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.f64.ll +++ test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICTSI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,VI %s ; GCN-LABEL: {{^}}fmuladd_f64: ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} Index: test/CodeGen/AMDGPU/fmuladd.v2f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s - -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s + +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 Index: test/CodeGen/AMDGPU/fnearbyint.ll =================================================================== --- test/CodeGen/AMDGPU/fnearbyint.ll +++ test/CodeGen/AMDGPU/fnearbyint.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s ; This should have the exactly the same output as the test for rint, ; so no need to check anything. Index: test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-combines.ll +++ test/CodeGen/AMDGPU/fneg-combines.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s ; -------------------------------------------------------------------------------- ; fadd tests Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 Index: test/CodeGen/AMDGPU/fneg-fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f64.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Check something here. Currently it seems fabs + fneg aren't ; into 2 modifiers, although theoretically that should work. Index: test/CodeGen/AMDGPU/fneg-fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.ll +++ test/CodeGen/AMDGPU/fneg-fabs.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32: ; SI-NOT: and Index: test/CodeGen/AMDGPU/fneg.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f16.ll +++ test/CodeGen/AMDGPU/fneg.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; FIXME: Should be able to do scalar op ; GCN-LABEL: {{^}}s_fneg_f16: Index: test/CodeGen/AMDGPU/fneg.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f64.ll +++ test/CodeGen/AMDGPU/fneg.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_f64: ; GCN: v_xor_b32 Index: test/CodeGen/AMDGPU/fneg.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.ll +++ test/CodeGen/AMDGPU/fneg.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_fneg_f32: ; R600: -PV Index: test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll =================================================================== --- test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll +++ test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}fold_mul_neg: ; GCN: load_dword [[V:v[0-9]+]] Index: test/CodeGen/AMDGPU/fp-classify.ll =================================================================== --- test/CodeGen/AMDGPU/fp-classify.ll +++ test/CodeGen/AMDGPU/fp-classify.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.fabs.f32(float) #1 declare double @llvm.fabs.f64(double) #1 Index: test/CodeGen/AMDGPU/fp16_to_fp32.ll =================================================================== --- test/CodeGen/AMDGPU/fp16_to_fp32.ll +++ test/CodeGen/AMDGPU/fp16_to_fp32.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone Index: test/CodeGen/AMDGPU/fp16_to_fp64.ll =================================================================== --- test/CodeGen/AMDGPU/fp16_to_fp64.ll +++ test/CodeGen/AMDGPU/fp16_to_fp64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone Index: test/CodeGen/AMDGPU/fp32_to_fp16.ll =================================================================== --- test/CodeGen/AMDGPU/fp32_to_fp16.ll +++ test/CodeGen/AMDGPU/fp32_to_fp16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone Index: test/CodeGen/AMDGPU/fp_to_sint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare double @llvm.fabs.f64(double) #1 Index: test/CodeGen/AMDGPU/fp_to_sint.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_sint.ll +++ test/CodeGen/AMDGPU/fp_to_sint.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC declare float @llvm.fabs.f32(float) #1 Index: test/CodeGen/AMDGPU/fp_to_uint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_uint.f64.ll +++ test/CodeGen/AMDGPU/fp_to_uint.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare double @llvm.fabs.f64(double) #1 Index: test/CodeGen/AMDGPU/fp_to_uint.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_uint.ll +++ test/CodeGen/AMDGPU/fp_to_uint.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,FUNC,SI -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,FUNC,VI -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,FUNC,SI +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,FUNC,VI +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC declare float @llvm.fabs.f32(float) #1 Index: test/CodeGen/AMDGPU/fpext-free.ll =================================================================== --- test/CodeGen/AMDGPU/fpext-free.ll +++ test/CodeGen/AMDGPU/fpext-free.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s ; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) Index: test/CodeGen/AMDGPU/fpext.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fpext.f16.ll +++ test/CodeGen/AMDGPU/fpext.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}fpext_f16_to_f32 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fpext.ll =================================================================== --- test/CodeGen/AMDGPU/fpext.ll +++ test/CodeGen/AMDGPU/fpext.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fpext_f32_to_f64: ; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} Index: test/CodeGen/AMDGPU/fptosi.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fptosi.f16.ll +++ test/CodeGen/AMDGPU/fptosi.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fptosi_f16_to_i16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fptoui.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fptoui.f16.ll +++ test/CodeGen/AMDGPU/fptoui.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}fptoui_f16_to_i16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fptrunc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fptrunc.f16.ll +++ test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] Index: test/CodeGen/AMDGPU/fptrunc.ll =================================================================== --- test/CodeGen/AMDGPU/fptrunc.ll +++ test/CodeGen/AMDGPU/fptrunc.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-UNSAFE %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN-UNSAFE %s ; FUNC-LABEL: {{^}}fptrunc_f64_to_f32: ; GCN: v_cvt_f32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} Index: test/CodeGen/AMDGPU/fract.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fract.f64.ll +++ test/CodeGen/AMDGPU/fract.f64.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=SI-UNSAFE -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=VI-UNSAFE -check-prefix=FUNC %s declare double @llvm.fabs.f64(double) #0 declare double @llvm.floor.f64(double) #0 Index: test/CodeGen/AMDGPU/fract.ll =================================================================== --- test/CodeGen/AMDGPU/fract.ll +++ test/CodeGen/AMDGPU/fract.ll @@ -1,8 +1,8 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SAFE -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN-UNSAFE -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare float @llvm.floor.f32(float) #0 Index: test/CodeGen/AMDGPU/frem.ll =================================================================== --- test/CodeGen/AMDGPU/frem.ll +++ test/CodeGen/AMDGPU/frem.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}frem_f32: ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} Index: test/CodeGen/AMDGPU/fsqrt.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fsqrt.f64.ll +++ test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_safe_fsqrt_f64: ; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} Index: test/CodeGen/AMDGPU/fsqrt.ll =================================================================== --- test/CodeGen/AMDGPU/fsqrt.ll +++ test/CodeGen/AMDGPU/fsqrt.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x) Index: test/CodeGen/AMDGPU/fsub.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fsub.f16.ll +++ test/CodeGen/AMDGPU/fsub.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fsub_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/fsub.ll =================================================================== --- test/CodeGen/AMDGPU/fsub.ll +++ test/CodeGen/AMDGPU/fsub.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_fsub_f32: ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} Index: test/CodeGen/AMDGPU/fsub64.ll =================================================================== --- test/CodeGen/AMDGPU/fsub64.ll +++ test/CodeGen/AMDGPU/fsub64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare double @llvm.fabs.f64(double) #0 Index: test/CodeGen/AMDGPU/ftrunc.f64.ll =================================================================== --- test/CodeGen/AMDGPU/ftrunc.f64.ll +++ test/CodeGen/AMDGPU/ftrunc.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s declare double @llvm.trunc.f64(double) nounwind readnone declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone Index: test/CodeGen/AMDGPU/ftrunc.ll =================================================================== --- test/CodeGen/AMDGPU/ftrunc.ll +++ test/CodeGen/AMDGPU/ftrunc.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s declare float @llvm.trunc.f32(float) nounwind readnone declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/gep-address-space.ll =================================================================== --- test/CodeGen/AMDGPU/gep-address-space.ll +++ test/CodeGen/AMDGPU/gep-address-space.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s define amdgpu_kernel void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: {{^}}use_gep_address_space: Index: test/CodeGen/AMDGPU/global-constant.ll =================================================================== --- test/CodeGen/AMDGPU/global-constant.ll +++ test/CodeGen/AMDGPU/global-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s @private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] @private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] Index: test/CodeGen/AMDGPU/global-directive.ll =================================================================== --- test/CodeGen/AMDGPU/global-directive.ll +++ test/CodeGen/AMDGPU/global-directive.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Make sure the GlobalDirective isn't merged with the function name Index: test/CodeGen/AMDGPU/global-extload-i16.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i16.ll +++ test/CodeGen/AMDGPU/global-extload-i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; XUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: cypress is broken because the bigger testcases spill and it's not implemented ; FUNC-LABEL: {{^}}zextload_global_i16_to_i32: Index: test/CodeGen/AMDGPU/global-smrd-unknown.ll =================================================================== --- test/CodeGen/AMDGPU/global-smrd-unknown.ll +++ test/CodeGen/AMDGPU/global-smrd-unknown.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}unknown_memdep_analysis: ; GCN: flat_load_dword Index: test/CodeGen/AMDGPU/global-variable-relocs.ll =================================================================== --- test/CodeGen/AMDGPU/global-variable-relocs.ll +++ test/CodeGen/AMDGPU/global-variable-relocs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji < %s | FileCheck %s @private = private addrspace(1) global [256 x i32] zeroinitializer @internal = internal addrspace(1) global [256 x i32] zeroinitializer Index: test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics.ll +++ test/CodeGen/AMDGPU/global_atomics.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}atomic_add_i32_offset: ; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} Index: test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics_i64.ll +++ test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}atomic_add_i64_offset: ; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} Index: test/CodeGen/AMDGPU/gv-const-addrspace.ll =================================================================== --- test/CodeGen/AMDGPU/gv-const-addrspace.ll +++ test/CodeGen/AMDGPU/gv-const-addrspace.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 Index: test/CodeGen/AMDGPU/gv-offset-folding.ll =================================================================== --- test/CodeGen/AMDGPU/gv-offset-folding.ll +++ test/CodeGen/AMDGPU/gv-offset-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -relocation-model=static < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -relocation-model=static < %s | FileCheck %s @lds = external addrspace(3) global [4 x i32] Index: test/CodeGen/AMDGPU/half.ll =================================================================== --- test/CodeGen/AMDGPU/half.ll +++ test/CodeGen/AMDGPU/half.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; half args should be promoted to float for SI and lower. Index: test/CodeGen/AMDGPU/hoist-cond.ll =================================================================== --- test/CodeGen/AMDGPU/hoist-cond.ll +++ test/CodeGen/AMDGPU/hoist-cond.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; Check that invariant compare is hoisted out of the loop. ; At the same time condition shall not be serialized into a VGPR and deserialized later Index: test/CodeGen/AMDGPU/hsa-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-fp-mode.ll +++ test/CodeGen/AMDGPU/hsa-fp-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_default_ci: ; GCN: float_mode = 192 Index: test/CodeGen/AMDGPU/hsa-func-align.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-func-align.ll +++ test/CodeGen/AMDGPU/hsa-func-align.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s ; ELF: Section { ; ELF: Name: .text Index: test/CodeGen/AMDGPU/hsa-globals.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-globals.ll +++ test/CodeGen/AMDGPU/hsa-globals.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=ASM %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri | FileCheck --check-prefix=ASM %s @linkonce_odr_global_program = linkonce_odr addrspace(1) global i32 0 @linkonce_global_program = linkonce addrspace(1) global i32 0 Index: test/CodeGen/AMDGPU/hsa-group-segment.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-group-segment.ll +++ test/CodeGen/AMDGPU/hsa-group-segment.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri | FileCheck --check-prefix=HSA %s @internal_group = internal addrspace(3) global i32 undef @external_group = addrspace(3) global i32 undef Index: test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll +++ test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; CHECK: - Name: test_ro_arg ; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: - TypeName: 'float addrspace(5)*' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -12,7 +12,7 @@ ; CHECK-NEXT: AccQual: ReadOnly ; CHECK-NEXT: IsConst: true ; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: - TypeName: 'float addrspace(5)*' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -28,5 +28,5 @@ !0 = !{i32 1, i32 1} !1 = !{!"none", !"none"} -!2 = !{!"float*", !"float*"} +!2 = !{!"float addrspace(5)*", !"float addrspace(5)*"} !3 = !{!"const restrict", !""} Index: test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll +++ test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx800 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -431,7 +431,7 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_struct(%struct.A* byval %a) +define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { ret void @@ -1032,7 +1032,7 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a) +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { ret void @@ -1067,7 +1067,7 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global -define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a) +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { ret void @@ -1078,7 +1078,7 @@ ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'global int* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: - TypeName: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))' ; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue @@ -1263,7 +1263,7 @@ ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @__test_block_invoke_kernel( - <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0 + <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { ret void @@ -1359,7 +1359,7 @@ !80 = !{!"int **"} !81 = !{i32 1} !82 = !{!"struct B"} -!83 = !{!"global int* __attribute__((ext_vector_type(2)))"} +!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"} !84 = !{!"clk_event_t"} !opencl.ocl.version = !{!90} !90 = !{i32 2, i32 0} Index: test/CodeGen/AMDGPU/hsa-metadata-images.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-images.ll +++ test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. Index: test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s ; CHECK: --- ; CHECK: Version: [ 1, 0 ] Index: test/CodeGen/AMDGPU/hsa-note-no-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,28 +1,28 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris10 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=polaris11 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx800 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI800 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=polaris10 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=polaris11 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx800 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI800 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx801 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx802 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s ; HSA: .hsa_code_object_version 2,1 ; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" Index: test/CodeGen/AMDGPU/i1-copy-implicit-def.ll =================================================================== --- test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: Index: test/CodeGen/AMDGPU/i1-copy-phi.ll =================================================================== --- test/CodeGen/AMDGPU/i1-copy-phi.ll +++ test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}br_i1_phi: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} Index: test/CodeGen/AMDGPU/i8-to-double-to-float.ll =================================================================== --- test/CodeGen/AMDGPU/i8-to-double-to-float.ll +++ test/CodeGen/AMDGPU/i8-to-double-to-float.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} Index: test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll =================================================================== --- test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll +++ test/CodeGen/AMDGPU/icmp-select-sete-reverse-args.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;Test that a select with reversed True/False values is correctly lowered ;to a SETNE_INT. There should only be one SETNE_INT instruction. Index: test/CodeGen/AMDGPU/icmp.i16.ll =================================================================== --- test/CodeGen/AMDGPU/icmp.i16.ll +++ test/CodeGen/AMDGPU/icmp.i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s ;;;==========================================================================;;; ;; 16-bit integer comparisons Index: test/CodeGen/AMDGPU/icmp64.ll =================================================================== --- test/CodeGen/AMDGPU/icmp64.ll +++ test/CodeGen/AMDGPU/icmp64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}test_i64_eq: ; SI: v_cmp_eq_u64 Index: test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll =================================================================== --- test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -1,5 +1,5 @@ -; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERR %s -; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s 2>&1 | FileCheck -check-prefix=ERR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN %s ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v1 to s9 Index: test/CodeGen/AMDGPU/image-attributes.ll =================================================================== --- test/CodeGen/AMDGPU/image-attributes.ll +++ test/CodeGen/AMDGPU/image-attributes.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; === WIDTH ================================================================== ; 9 implicit args = 9 dwords to first image argument. @@ -193,14 +193,14 @@ !10 = !{!"kernel_arg_addr_space", i32 1, i32 1} !20 = !{!"kernel_arg_access_qual", !"read_only", !"none"} !21 = !{!"kernel_arg_access_qual", !"read_only", !"none"} -!30 = !{!"kernel_arg_type", !"image2d_t", !"int*"} -!31 = !{!"kernel_arg_type", !"image3d_t", !"int*"} -!40 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"} -!41 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"} +!30 = !{!"kernel_arg_type", !"image2d_t", !"int addrspace(5)*"} +!31 = !{!"kernel_arg_type", !"image3d_t", !"int addrspace(5)*"} +!40 = !{!"kernel_arg_base_type", !"image2d_t", !"int addrspace(5)*"} +!41 = !{!"kernel_arg_base_type", !"image3d_t", !"int addrspace(5)*"} !50 = !{!"kernel_arg_type_qual", !"", !""} !12 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1, i32 1} !22 = !{!"kernel_arg_access_qual", !"read_only", !"none", !"write_only", !"none"} -!32 = !{!"kernel_arg_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"} -!42 = !{!"kernel_arg_base_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"} +!32 = !{!"kernel_arg_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int addrspace(5)*"} +!42 = !{!"kernel_arg_base_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int addrspace(5)*"} !52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} Index: test/CodeGen/AMDGPU/image-resource-id.ll =================================================================== --- test/CodeGen/AMDGPU/image-resource-id.ll +++ test/CodeGen/AMDGPU/image-resource-id.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; === 1 image arg, read_only =================================================== @@ -342,10 +342,10 @@ !110 = !{!"kernel_arg_addr_space", i32 1, i32 1} !120 = !{!"kernel_arg_access_qual", !"read_only", !"none"} !121 = !{!"kernel_arg_access_qual", !"write_only", !"none"} -!130 = !{!"kernel_arg_type", !"image2d_t", !"int*"} -!131 = !{!"kernel_arg_type", !"image3d_t", !"int*"} -!140 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"} -!141 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"} +!130 = !{!"kernel_arg_type", !"image2d_t", !"int addrspace(5)*"} +!131 = !{!"kernel_arg_type", !"image3d_t", !"int addrspace(5)*"} +!140 = !{!"kernel_arg_base_type", !"image2d_t", !"int addrspace(5)*"} +!141 = !{!"kernel_arg_base_type", !"image3d_t", !"int addrspace(5)*"} !150 = !{!"kernel_arg_type_qual", !"", !""} !4 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, @@ -367,10 +367,10 @@ !112 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1} !122 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"none"} !123 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"none"} -!132 = !{!"kernel_arg_type", !"image2d_t", !"image2d_t", !"int*"} -!133 = !{!"kernel_arg_type", !"image3d_t", !"image3d_t", !"int*"} -!142 = !{!"kernel_arg_base_type", !"image2d_t", !"image2d_t", !"int*"} -!143 = !{!"kernel_arg_base_type", !"image3d_t", !"image3d_t", !"int*"} +!132 = !{!"kernel_arg_type", !"image2d_t", !"image2d_t", !"int addrspace(5)*"} +!133 = !{!"kernel_arg_type", !"image3d_t", !"image3d_t", !"int addrspace(5)*"} +!142 = !{!"kernel_arg_base_type", !"image2d_t", !"image2d_t", !"int addrspace(5)*"} +!143 = !{!"kernel_arg_base_type", !"image3d_t", !"image3d_t", !"int addrspace(5)*"} !152 = !{!"kernel_arg_type_qual", !"", !"", !""} !12 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, @@ -402,8 +402,8 @@ !125 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"write_only", !"none"} !126 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"read_only", !"none"} !127 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"write_only", !"none"} -!134 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"} -!135 = !{!"kernel_arg_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"} -!144 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"} -!145 = !{!"kernel_arg_base_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"} +!134 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int addrspace(5)*"} +!135 = !{!"kernel_arg_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int addrspace(5)*"} +!144 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int addrspace(5)*"} +!145 = !{!"kernel_arg_base_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int addrspace(5)*"} !154 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Use a 64-bit value with lo bits that can be represented as an inline constant ; GCN-LABEL: {{^}}i64_imm_inline_lo: Index: test/CodeGen/AMDGPU/imm16.ll =================================================================== --- test/CodeGen/AMDGPU/imm16.ll +++ test/CodeGen/AMDGPU/imm16.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; FIXME: Merge into imm.ll Index: test/CodeGen/AMDGPU/immv216.ll =================================================================== --- test/CodeGen/AMDGPU/immv216.ll +++ test/CodeGen/AMDGPU/immv216.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; FIXME: Merge into imm.ll ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16: Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s ; FIXME: Merge into indirect-addressing-si.ll Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. Index: test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll =================================================================== --- test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll +++ test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -disable-output -disable-verify -debug-pass=Structure -O2 %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn--amdhsa-amdgiz -disable-output -disable-verify -debug-pass=Structure -O2 %s 2>&1 | FileCheck -check-prefix=GCN %s ; GCN: Function Integration/Inlining ; GCN: FunctionPass Manager Index: test/CodeGen/AMDGPU/infinite-loop-evergreen.ll =================================================================== --- test/CodeGen/AMDGPU/infinite-loop-evergreen.ll +++ test/CodeGen/AMDGPU/infinite-loop-evergreen.ll @@ -1,6 +1,6 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck %s define amdgpu_kernel void @inf_loop_irreducible_cfg() nounwind { entry: Index: test/CodeGen/AMDGPU/infinite-loop.ll =================================================================== --- test/CodeGen/AMDGPU/infinite-loop.ll +++ test/CodeGen/AMDGPU/infinite-loop.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}infinite_loop: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 Index: test/CodeGen/AMDGPU/inline-asm.ll =================================================================== --- test/CodeGen/AMDGPU/inline-asm.ll +++ test/CodeGen/AMDGPU/inline-asm.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}inline_asm: ; CHECK: s_endpgm Index: test/CodeGen/AMDGPU/inline-attr.ll =================================================================== --- test/CodeGen/AMDGPU/inline-attr.ll +++ test/CodeGen/AMDGPU/inline-attr.ll @@ -1,6 +1,6 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s -; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s -; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s +; RUN: opt -mtriple=amdgcn--amdhsa-amdgiz -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s +; RUN: opt -mtriple=amdgcn--amdhsa-amdgiz -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s +; RUN: opt -mtriple=amdgcn--amdhsa-amdgiz -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s ; GCN: define float @foo(float %x) local_unnamed_addr #0 { ; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { Index: test/CodeGen/AMDGPU/inline-calls.ll =================================================================== --- test/CodeGen/AMDGPU/inline-calls.ll +++ test/CodeGen/AMDGPU/inline-calls.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; CHECK-NOT: {{^}}func: define internal fastcc i32 @func(i32 %a) { Index: test/CodeGen/AMDGPU/inline-constraints.ll =================================================================== --- test/CodeGen/AMDGPU/inline-constraints.ll +++ test/CodeGen/AMDGPU/inline-constraints.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}inline_reg_constraints: ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] Index: test/CodeGen/AMDGPU/inlineasm-16.ll =================================================================== --- test/CodeGen/AMDGPU/inlineasm-16.ll +++ test/CodeGen/AMDGPU/inlineasm-16.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s -; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; GCN-LABEL: {{^}}s_input_output_i16: ; SICI: error: couldn't allocate output register for constraint 's' Index: test/CodeGen/AMDGPU/inlineasm-illegal-type.ll =================================================================== --- test/CodeGen/AMDGPU/inlineasm-illegal-type.ll +++ test/CodeGen/AMDGPU/inlineasm-illegal-type.ll @@ -1,5 +1,6 @@ -; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; GCN: error: couldn't allocate output register for constraint 's' ; GCN: error: couldn't allocate input reg for constraint 's' Index: test/CodeGen/AMDGPU/inlineasm-packed.ll =================================================================== --- test/CodeGen/AMDGPU/inlineasm-packed.ll +++ test/CodeGen/AMDGPU/inlineasm-packed.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}inline_asm_input_v2i16: ; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} Index: test/CodeGen/AMDGPU/input-mods.ll =================================================================== --- test/CodeGen/AMDGPU/input-mods.ll +++ test/CodeGen/AMDGPU/input-mods.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=EG +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s --check-prefix=CM ;EG-LABEL: {{^}}test: ;EG: EXP_IEEE * Index: test/CodeGen/AMDGPU/insert_subreg.ll =================================================================== --- test/CodeGen/AMDGPU/insert_subreg.ll +++ test/CodeGen/AMDGPU/insert_subreg.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Test that INSERT_SUBREG instructions don't have non-register operands after ; instruction selection. @@ -8,8 +9,8 @@ ; CHECK-LABEL: test: define amdgpu_kernel void @test(i64 addrspace(1)* %out) { entry: - %tmp0 = alloca [16 x i32] - %tmp1 = ptrtoint [16 x i32]* %tmp0 to i32 + %tmp0 = alloca [16 x i32], addrspace(5) + %tmp1 = ptrtoint [16 x i32] addrspace(5)* %tmp0 to i32 %tmp2 = sext i32 %tmp1 to i64 store i64 %tmp2, i64 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/internalize.ll =================================================================== --- test/CodeGen/AMDGPU/internalize.ll +++ test/CodeGen/AMDGPU/internalize.ll @@ -1,5 +1,5 @@ -; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s -; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s +; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s +; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s ; OPT-NOT: gvar_unused ; OPTNONE: gvar_unused Index: test/CodeGen/AMDGPU/invalid-addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-addrspacecast.ll +++ test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; ERROR: error: :0:0: in function use_group_to_global_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast define amdgpu_kernel void @use_group_to_global_addrspacecast(i32 addrspace(3)* %ptr) { Index: test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll =================================================================== --- test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll +++ test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GatherAllAliases gives up on trying to analyze cases where the ; pointer may have been loaded from an aliased store, so make sure Index: test/CodeGen/AMDGPU/jump-address.ll =================================================================== --- test/CodeGen/AMDGPU/jump-address.ll +++ test/CodeGen/AMDGPU/jump-address.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: JUMP @6 ; CHECK: EXPORT Index: test/CodeGen/AMDGPU/kcache-fold.ll =================================================================== --- test/CodeGen/AMDGPU/kcache-fold.ll +++ test/CodeGen/AMDGPU/kcache-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s ; CHECK: {{^}}main1: ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}} Index: test/CodeGen/AMDGPU/kernarg-stack-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/kernarg-stack-alignment.ll +++ test/CodeGen/AMDGPU/kernarg-stack-alignment.ll @@ -1,4 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Test that the alignment of kernel arguments does not impact the ; alignment of the stack @@ -6,39 +7,39 @@ ; CHECK-LABEL: {{^}}no_args: ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @no_args() { - %alloca = alloca i8 - store volatile i8 0, i8* %alloca + %alloca = alloca i8, addrspace(5) + store volatile i8 0, i8 addrspace(5)* %alloca ret void } ; CHECK-LABEL: {{^}}force_align32: ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align32(<8 x i32>) { - %alloca = alloca i8 - store volatile i8 0, i8* %alloca + %alloca = alloca i8, addrspace(5) + store volatile i8 0, i8 addrspace(5)* %alloca ret void } ; CHECK-LABEL: {{^}}force_align64: ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align64(<16 x i32>) { - %alloca = alloca i8 - store volatile i8 0, i8* %alloca + %alloca = alloca i8, addrspace(5) + store volatile i8 0, i8 addrspace(5)* %alloca ret void } ; CHECK-LABEL: {{^}}force_align128: ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align128(<32 x i32>) { - %alloca = alloca i8 - store volatile i8 0, i8* %alloca + %alloca = alloca i8, addrspace(5) + store volatile i8 0, i8 addrspace(5)* %alloca ret void } ; CHECK-LABEL: {{^}}force_align256: ; CHECK: ScratchSize: 5{{$}} define amdgpu_kernel void @force_align256(<64 x i32>) { - %alloca = alloca i8 - store volatile i8 0, i8* %alloca + %alloca = alloca i8, addrspace(5) + store volatile i8 0, i8 addrspace(5)* %alloca ret void } Index: test/CodeGen/AMDGPU/knownbits-recursion.ll =================================================================== --- test/CodeGen/AMDGPU/knownbits-recursion.ll +++ test/CodeGen/AMDGPU/knownbits-recursion.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN %s ; Check we can compile this test without infinite loop in the ; DAG.computeKnownBits() due to missing (Depth + 1) argument in Index: test/CodeGen/AMDGPU/large-alloca-compute.ll =================================================================== --- test/CodeGen/AMDGPU/large-alloca-compute.ll +++ test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -1,8 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa-amdgiz < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FIXME: align on alloca seems to be ignored for private_segment_alignment @@ -46,14 +47,14 @@ ; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen ; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen -; Scratch size = alloca size + emergency stack slot +; Scratch size = alloca size + emergency stack slot, addrspace(5) ; ALL: ; ScratchSize: 32772 define amdgpu_kernel void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { - %large = alloca [8192 x i32], align 4 - %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 - store volatile i32 %x, i32* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y - %val = load volatile i32, i32* %gep1 + %large = alloca [8192 x i32], align 4, addrspace(5) + %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 + store volatile i32 %x, i32 addrspace(5)* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y + %val = load volatile i32, i32 addrspace(5)* %gep1 store volatile i32 %val, i32 addrspace(1)* undef ret void } Index: test/CodeGen/AMDGPU/large-alloca-graphics.ll =================================================================== --- test/CodeGen/AMDGPU/large-alloca-graphics.ll +++ test/CodeGen/AMDGPU/large-alloca-graphics.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=carrizo -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; ALL-LABEL: {{^}}large_alloca_pixel_shader: ; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 @@ -15,11 +16,11 @@ ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 { - %large = alloca [8192 x i32], align 4 - %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 - store volatile i32 %x, i32* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y - %val = load volatile i32, i32* %gep1 + %large = alloca [8192 x i32], align 4, addrspace(5) + %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 + store volatile i32 %x, i32 addrspace(5)* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y + %val = load volatile i32, i32 addrspace(5)* %gep1 store volatile i32 %val, i32 addrspace(1)* undef ret void } @@ -37,11 +38,11 @@ ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 { - %large = alloca [8192 x i32], align 4 - %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 - store volatile i32 %x, i32* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y - %val = load volatile i32, i32* %gep1 + %large = alloca [8192 x i32], align 4, addrspace(5) + %gep = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 8191 + store volatile i32 %x, i32 addrspace(5)* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %large, i32 0, i32 %y + %val = load volatile i32, i32 addrspace(5)* %gep1 store volatile i32 %val, i32 addrspace(1)* undef ret void } Index: test/CodeGen/AMDGPU/large-constant-initializer.ll =================================================================== --- test/CodeGen/AMDGPU/large-constant-initializer.ll +++ test/CodeGen/AMDGPU/large-constant-initializer.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s ; CHECK: s_endpgm @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4 Index: test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll =================================================================== --- test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll +++ test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll @@ -1,24 +1,25 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck --check-prefix=SI --check-prefix=ALL %s -; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck --check-prefix=CI --check-prefix=ALL %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown-amdgiz -amdgpu-promote-alloca < %s | FileCheck --check-prefix=SI --check-prefix=ALL %s +; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown-amdgiz -amdgpu-promote-alloca < %s | FileCheck --check-prefix=CI --check-prefix=ALL %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4 ; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4 define amdgpu_kernel void @promote_alloca_size_63(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %3, i32 addrspace(1)* %arrayidx13 ret void @@ -28,19 +29,19 @@ define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #1 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %3, i32 addrspace(1)* %arrayidx13 ret void @@ -50,19 +51,19 @@ define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %3, i32 addrspace(1)* %arrayidx13 ret void @@ -73,19 +74,19 @@ ; SI: alloca [5 x i32] define amdgpu_kernel void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %3, i32 addrspace(1)* %arrayidx13 ret void @@ -96,19 +97,19 @@ ; SI: alloca [5 x i32] define amdgpu_kernel void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %3, i32 addrspace(1)* %arrayidx13 ret void @@ -120,21 +121,21 @@ ; CI-NOT: alloca define amdgpu_kernel void @occupancy_6(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 { entry: - %stack = alloca [42 x i8], align 4 + %stack = alloca [42 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [42 x i8], [42 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void @@ -144,21 +145,21 @@ ; ALL: alloca [43 x i8] define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 { entry: - %stack = alloca [43 x i8], align 4 + %stack = alloca [43 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [43 x i8], [43 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void @@ -170,21 +171,21 @@ ; CI-NOT: alloca define amdgpu_kernel void @occupancy_8(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 { entry: - %stack = alloca [32 x i8], align 4 + %stack = alloca [32 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void @@ -194,21 +195,21 @@ ; ALL: alloca [33 x i8] define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 { entry: - %stack = alloca [33 x i8], align 4 + %stack = alloca [33 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [33 x i8], [33 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void @@ -220,21 +221,21 @@ ; CI-NOT: alloca define amdgpu_kernel void @occupancy_9(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 { entry: - %stack = alloca [28 x i8], align 4 + %stack = alloca [28 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [28 x i8], [28 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void @@ -244,21 +245,21 @@ ; ALL: alloca [29 x i8] define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 { entry: - %stack = alloca [29 x i8], align 4 + %stack = alloca [29 x i8], align 4, addrspace(5) %tmp = load i8, i8 addrspace(1)* %in, align 1 %tmp4 = sext i8 %tmp to i64 - %arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp4 - store i8 4, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp4 + store i8 4, i8 addrspace(5)* %arrayidx1, align 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 1 %tmp1 = load i8, i8 addrspace(1)* %arrayidx2, align 1 %tmp5 = sext i8 %tmp1 to i64 - %arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 %tmp5 - store i8 5, i8* %arrayidx3, align 1 - %arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 0 - %tmp2 = load i8, i8* %arrayidx10, align 1 + %arrayidx3 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 %tmp5 + store i8 5, i8 addrspace(5)* %arrayidx3, align 1 + %arrayidx10 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 0 + %tmp2 = load i8, i8 addrspace(5)* %arrayidx10, align 1 store i8 %tmp2, i8 addrspace(1)* %out, align 1 - %arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8]* %stack, i64 0, i64 1 - %tmp3 = load i8, i8* %arrayidx12, align 1 + %arrayidx12 = getelementptr inbounds [29 x i8], [29 x i8] addrspace(5)* %stack, i64 0, i64 1 + %tmp3 = load i8, i8 addrspace(5)* %arrayidx12, align 1 %arrayidx13 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1 store i8 %tmp3, i8 addrspace(1)* %arrayidx13, align 1 ret void Index: test/CodeGen/AMDGPU/lds-initializer.ll =================================================================== --- test/CodeGen/AMDGPU/lds-initializer.ll +++ test/CodeGen/AMDGPU/lds-initializer.ll @@ -1,5 +1,5 @@ -; RUN: not llc -march=amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: in function load_init_lds_global{{.*}}: unsupported initializer for address space Index: test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll =================================================================== --- test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll +++ test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Make sure that m0 is not reinitialized in the loop. Index: test/CodeGen/AMDGPU/lds-oqap-crash.ll =================================================================== --- test/CodeGen/AMDGPU/lds-oqap-crash.ll +++ test/CodeGen/AMDGPU/lds-oqap-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs | FileCheck %s ; The test is for a bug in R600EmitClauseMarkers.cpp where this pass ; was searching for a use of the OQAP register in order to determine Index: test/CodeGen/AMDGPU/lds-output-queue.ll =================================================================== --- test/CodeGen/AMDGPU/lds-output-queue.ll +++ test/CodeGen/AMDGPU/lds-output-queue.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; ; This test checks that the lds input queue will is empty at the end of ; the ALU clause. Index: test/CodeGen/AMDGPU/lds-size.ll =================================================================== --- test/CodeGen/AMDGPU/lds-size.ll +++ test/CodeGen/AMDGPU/lds-size.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=ALL -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=ALL -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are ; used in different basic blocks. Index: test/CodeGen/AMDGPU/lds-zero-initializer.ll =================================================================== --- test/CodeGen/AMDGPU/lds-zero-initializer.ll +++ test/CodeGen/AMDGPU/lds-zero-initializer.ll @@ -1,5 +1,5 @@ -; RUN: not llc -march=amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s 2>&1 | FileCheck %s ; CHECK: in function load_zeroinit_lds_global{{.*}}: unsupported initializer for address space Index: test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll =================================================================== --- test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll +++ test/CodeGen/AMDGPU/legalizedag-bug-expand-setcc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; This tests a bug where LegalizeDAG was not checking the target's ; BooleanContents value and always using one for true, when expanding Index: test/CodeGen/AMDGPU/lit.local.cfg =================================================================== --- test/CodeGen/AMDGPU/lit.local.cfg +++ test/CodeGen/AMDGPU/lit.local.cfg @@ -1,2 +1,3 @@ if not 'AMDGPU' in config.root.targets: config.unsupported = True +config.suffixes = ['.ll'] Index: test/CodeGen/AMDGPU/literals.ll =================================================================== --- test/CodeGen/AMDGPU/literals.ll +++ test/CodeGen/AMDGPU/literals.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; Test using an integer literal constant. ; Generated ASM should be: Index: test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll +++ test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}kill_gs_const: ; SI-NOT: v_cmpx_le_f32 Index: test/CodeGen/AMDGPU/llvm.SI.load.dword.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.load.dword.ll +++ test/CodeGen/AMDGPU/llvm.SI.load.dword.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s ; Example of a simple geometry shader loading vertex attributes from the ; ESGS ring buffer Index: test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll +++ test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}test1: ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc Index: test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0 declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -1,13 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* nocapture, i32, i32, i32, i1) #2 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -137,59 +137,59 @@ ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out ret void } ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 { + %gep = getelementptr i32, i32* %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out ret void } ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32: ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) ret void } ; FUNC-LABEL: {{^}}flat_atomic_dec_noret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind { + %gep = getelementptr i32, i32* %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) ret void } ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32_offset_addr64: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out.gep + %gep.tid = getelementptr i32, i32* %ptr, i32 %id + %out.gep = getelementptr i32, i32* %out, i32 %id + %gep = getelementptr i32, i32* %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out.gep ret void } ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32_offset_addr64: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, i32* %ptr, i32 %id + %gep = getelementptr i32, i32* %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -197,9 +197,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out ret void } @@ -207,10 +207,10 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 { + %gep = getelementptr i64, i64* %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out ret void } @@ -218,8 +218,8 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64 addrspace(4)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -227,9 +227,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind { + %gep = getelementptr i64, i64* %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -237,13 +237,13 @@ ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out.gep + %gep.tid = getelementptr i64, i64* %ptr, i32 %id + %out.gep = getelementptr i64, i64* %out, i32 %id + %gep = getelementptr i64, i64* %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out.gep ret void } @@ -251,11 +251,11 @@ ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, i64* %ptr, i32 %id + %gep = getelementptr i64, i64* %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) ret void } Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -1,13 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* nocapture, i32, i32, i32, i1) #2 declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -238,59 +238,59 @@ ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out ret void } ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { + %gep = getelementptr i32, i32* %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out ret void } ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i32: ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32 addrspace(4)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) ret void } ; FUNC-LABEL: {{^}}flat_atomic_inc_noret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { + %gep = getelementptr i32, i32* %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) ret void } ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32_offset_addr64: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(4)* %out.gep + %gep.tid = getelementptr i32, i32* %ptr, i32 %id + %out.gep = getelementptr i32, i32* %out, i32 %id + %gep = getelementptr i32, i32* %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, i32* %out.gep ret void } ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32_offset_addr64: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(4)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(4)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, i32* %ptr, i32 %id + %gep = getelementptr i32, i32* %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -313,9 +313,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out ret void } @@ -323,10 +323,10 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { + %gep = getelementptr i64, i64* %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out ret void } @@ -334,8 +334,8 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64 addrspace(4)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -343,9 +343,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64 addrspace(4)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(4)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { + %gep = getelementptr i64, i64* %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -353,13 +353,13 @@ ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(4)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(4)* %out.gep + %gep.tid = getelementptr i64, i64* %ptr, i32 %id + %out.gep = getelementptr i64, i64* %out, i32 %id + %gep = getelementptr i64, i64* %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, i64* %out.gep ret void } @@ -367,11 +367,11 @@ ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(4)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(4)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, i64* %ptr, i32 %id + %gep = getelementptr i64, i64* %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) ret void } Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ;CHECK-LABEL: {{^}}test1: ;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI ;CHECK-LABEL: {{^}}buffer_load: ;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI ;CHECK-LABEL: {{^}}buffer_load: ;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ;CHECK-LABEL: {{^}}buffer_store: ;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ;CHECK-LABEL: {{^}}buffer_store: ;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare void @llvm.amdgcn.buffer.wbinvl1() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.fabs.f16(half %a) declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b) Index: test/CodeGen/AMDGPU/llvm.amdgcn.class.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.class.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i1 @llvm.amdgcn.class.f32(float, i32) #1 declare i1 @llvm.amdgcn.class.f64(double, i32) #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.cos.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s declare float @llvm.amdgcn.cos.f32(float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubeid(float, float, float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubema(float, float, float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubesc(float, float, float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare float @llvm.amdgcn.cubetc(float, float, float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}} Index: test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.dispatch.id() #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c) Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s ; FIXME: Enable for VI. Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s declare float @llvm.amdgcn.fdiv.fast(float, float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmed3_f16: ; GCN: v_med3_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmed3: ; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_mul_legacy_f32: Index: test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.fract.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s declare float @llvm.amdgcn.fract.f32(float) #0 declare double @llvm.amdgcn.fract.f64(double) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare double @llvm.fabs.f64(double) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.frexp.mant.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare float @llvm.fabs.f32(float) #0 declare double @llvm.fabs.f64(double) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4 Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI -;RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=SI +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=VI ;CHECK-LABEL: {{^}}image_atomic_swap: ;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00] Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}gather4_v2: ; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}getlod: ; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; GCN-LABEL: {{^}}image_load_v4i32: ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}sample: ; GCN: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}sample: ; GCN: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf Index: test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 { Index: test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FIXME: Requires stack object to not assert ; GCN-LABEL: {{^}}test_ps: @@ -8,8 +9,8 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: ; return define amdgpu_ps i32 @test_ps() #1 { - %alloca = alloca i32 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)* %value = load volatile i32, i32 addrspace(2)* %buffer_ptr @@ -21,8 +22,8 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:4 ; GCN: s_load_dword s0, s[0:1], 0x0 define amdgpu_cs i32 @test_cs() #1 { - %alloca = alloca i32 - store volatile i32 0, i32* %alloca + %alloca = alloca i32, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca %implicit_buffer_ptr = call i8 addrspace(2)* @llvm.amdgcn.implicit.buffer.ptr() %buffer_ptr = bitcast i8 addrspace(2)* %implicit_buffer_ptr to i32 addrspace(2)* %value = load volatile i32, i32 addrspace(2)* %buffer_ptr Index: test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN ; GCN-LABEL: {{^}}full_mask: ; GCN: s_mov_b64 exec, -1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s -; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=kabini -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,16BANK %s ; GCN-LABEL: {{^}}v_interp: ; GCN-NOT: s_wqm Index: test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s ; SI-LABEL: {{^}}gs_const: ; SI-NOT: v_cmpx Index: test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.ldexp.f16(half %a, i32 %b) Index: test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone declare double @llvm.amdgcn.ldexp.f64(double, i32) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s ; ERR: intrinsic not supported on subtarget Index: test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mbcnt_intrinsics: ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FIXME: The register allocator / scheduler should be able to avoid these hazards. Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: ; CHECK: s_mov_b64 s[0:1], exec Index: test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -mtriple=amdgcn-unknown-unknown-amdgiz -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.rcp.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: not llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: :0:0: in function rcp_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget Index: test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.amdgcn.rcp.f32(float) #0 declare double @llvm.amdgcn.rcp.f64(double) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.readfirstlane(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.readlane(i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s declare float @llvm.amdgcn.rsq.clamp.f32(float) #1 declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.rsq.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.amdgcn.rsq.legacy(float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.amdgcn.rsq.f32(float) #0 declare double @llvm.amdgcn.rsq.f64(double) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s -; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s ; GCN-LABEL: {{^}}test_barrier: ; GFX8: buffer_store_dword Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare void @llvm.amdgcn.s.dcache.inv() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare void @llvm.amdgcn.s.dcache.inv.vol() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s declare void @llvm.amdgcn.s.dcache.wb() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s declare void @llvm.amdgcn.s.dcache.wb.vol() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @llvm.amdgcn.s.decperflevel(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i64 @llvm.amdgcn.s.getpc() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}s_getreg_test: ; GCN: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_LDS_ALLOC, 8, 23) Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @llvm.amdgcn.s.incperflevel(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare i64 @llvm.amdgcn.s.memrealtime() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare i64 @llvm.amdgcn.s.memtime() #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @llvm.amdgcn.s.sleep(i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: ; CHECK: image_store Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg: ; GCN: v_bfe_i32 Index: test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll @@ -1,5 +1,5 @@ -;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}test_interrupt: ; GCN: s_mov_b32 m0, 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s ; GCN-LABEL: {{^}}set_inactive: Index: test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.sffbh.i32(i32) #1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.amdgcn.sin.f16(half %a) Index: test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s declare float @llvm.amdgcn.sin.f32(float) #0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll @@ -1,5 +1,5 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}tbuffer_load: ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; GCN-LABEL: {{^}}tbuffer_store: ; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg: ; GCN: v_bfe_u32 Index: test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s ; VI-LABEL: {{^}}dpp_test: ; VI: v_mov_b32_e32 v0, s{{[0-9]+}} Index: test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_wave_barrier: ; GCN-DAG: ; wave barrier Index: test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CHECK %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CHECK %s ;CHECK-LABEL: {{^}}ret: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 Index: test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll +++ test/CodeGen/AMDGPU/llvm.amdgpu.kilp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}kilp_gs_const: ; SI: s_mov_b64 exec, 0 Index: test/CodeGen/AMDGPU/llvm.ceil.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.ceil.f16.ll +++ test/CodeGen/AMDGPU/llvm.ceil.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.ceil.f16(half %a) declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.cos.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.cos.f16.ll +++ test/CodeGen/AMDGPU/llvm.cos.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.cos.f16(half %a) declare <2 x half> @llvm.cos.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.cos.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.cos.ll +++ test/CodeGen/AMDGPU/llvm.cos.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=amdgcn | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC ;FUNC-LABEL: test ;EG: MULADD_IEEE * Index: test/CodeGen/AMDGPU/llvm.exp2.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.exp2.f16.ll +++ test/CodeGen/AMDGPU/llvm.exp2.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.exp2.f16(half %a) declare <2 x half> @llvm.exp2.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.exp2.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.exp2.ll +++ test/CodeGen/AMDGPU/llvm.exp2.ll @@ -1,7 +1,7 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC -;RUN: llc < %s -march=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC -;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC ;FUNC-LABEL: {{^}}test: ;EG: EXP_IEEE Index: test/CodeGen/AMDGPU/llvm.floor.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.floor.f16.ll +++ test/CodeGen/AMDGPU/llvm.floor.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.floor.f16(half %a) declare <2 x half> @llvm.floor.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.fma.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.fma.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s declare half @llvm.fmuladd.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) Index: test/CodeGen/AMDGPU/llvm.log2.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.log2.f16.ll +++ test/CodeGen/AMDGPU/llvm.log2.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.log2.f16(half %a) declare <2 x half> @llvm.log2.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.log2.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.log2.ll +++ test/CodeGen/AMDGPU/llvm.log2.ll @@ -1,7 +1,7 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC -;RUN: llc < %s -march=amdgcn -mcpu=tahiti | FileCheck %s --check-prefix=SI --check-prefix=FUNC -;RUN: llc < %s -march=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti | FileCheck %s --check-prefix=SI --check-prefix=FUNC +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga | FileCheck %s --check-prefix=SI --check-prefix=FUNC ;FUNC-LABEL: {{^}}test: ;EG: LOG_IEEE Index: test/CodeGen/AMDGPU/llvm.maxnum.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.maxnum.f16(half %a, half %b) declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) Index: test/CodeGen/AMDGPU/llvm.memcpy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.memcpy.ll +++ test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind Index: test/CodeGen/AMDGPU/llvm.minnum.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.minnum.f16(half %a, half %b) declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b) Index: test/CodeGen/AMDGPU/llvm.pow.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.pow.ll +++ test/CodeGen/AMDGPU/llvm.pow.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK-LABEL: test1: ;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, Index: test/CodeGen/AMDGPU/llvm.r600.cube.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.cube.ll +++ test/CodeGen/AMDGPU/llvm.r600.cube.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s ; CHECK-LABEL: {{^}}cube: ; CHECK: CUBE T{{[0-9]}}.X Index: test/CodeGen/AMDGPU/llvm.r600.dot4.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.dot4.ll +++ test/CodeGen/AMDGPU/llvm.r600.dot4.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll +++ test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG %s ; EG-LABEL: {{^}}test_group_barrier: ; EG: GROUP_BARRIER Index: test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_size_x: Index: test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll +++ test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll +++ test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone Index: test/CodeGen/AMDGPU/llvm.r600.tex.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.tex.ll +++ test/CodeGen/AMDGPU/llvm.r600.tex.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN Index: test/CodeGen/AMDGPU/llvm.rint.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.rint.f16.ll +++ test/CodeGen/AMDGPU/llvm.rint.f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.rint.f16(half %a) declare <2 x half> @llvm.rint.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.rint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.rint.f64.ll +++ test/CodeGen/AMDGPU/llvm.rint.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rint_f64: ; CI: v_rndne_f64_e32 Index: test/CodeGen/AMDGPU/llvm.rint.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.rint.ll +++ test/CodeGen/AMDGPU/llvm.rint.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; FUNC-LABEL: {{^}}rint_f32: ; R600: RNDNE Index: test/CodeGen/AMDGPU/llvm.round.f64.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.f64.ll +++ test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}round_f64: ; SI: s_endpgm Index: test/CodeGen/AMDGPU/llvm.round.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.ll +++ test/CodeGen/AMDGPU/llvm.round.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}round_f32: ; GCN-DAG: s_load_dword [[SX:s[0-9]+]] Index: test/CodeGen/AMDGPU/llvm.sin.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sin.f16.ll +++ test/CodeGen/AMDGPU/llvm.sin.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.sin.f16(half %a) declare <2 x half> @llvm.sin.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.sin.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sin.ll +++ test/CodeGen/AMDGPU/llvm.sin.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: sin_f32 ; EG: MULADD_IEEE * Index: test/CodeGen/AMDGPU/llvm.sqrt.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.sqrt.f16.ll +++ test/CodeGen/AMDGPU/llvm.sqrt.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.sqrt.f16(half %a) declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/llvm.trunc.f16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.trunc.f16.ll +++ test/CodeGen/AMDGPU/llvm.trunc.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare half @llvm.trunc.f16(half %a) declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a) Index: test/CodeGen/AMDGPU/load-constant-f64.ll =================================================================== --- test/CodeGen/AMDGPU/load-constant-f64.ll +++ test/CodeGen/AMDGPU/load-constant-f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_f64: ; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}] Index: test/CodeGen/AMDGPU/load-constant-i32.ll =================================================================== --- test/CodeGen/AMDGPU/load-constant-i32.ll +++ test/CodeGen/AMDGPU/load-constant-i32.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i32: ; GCN: s_load_dword s{{[0-9]+}} Index: test/CodeGen/AMDGPU/load-constant-i64.ll =================================================================== --- test/CodeGen/AMDGPU/load-constant-i64.ll +++ test/CodeGen/AMDGPU/load-constant-i64.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i64: Index: test/CodeGen/AMDGPU/load-global-f32.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-f32.ll +++ test/CodeGen/AMDGPU/load-global-f32.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_f32: ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}} Index: test/CodeGen/AMDGPU/load-global-f64.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-f64.ll +++ test/CodeGen/AMDGPU/load-global-f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_f64: ; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] Index: test/CodeGen/AMDGPU/load-global-i32.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i32.ll +++ test/CodeGen/AMDGPU/load-global-i32.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i32: Index: test/CodeGen/AMDGPU/load-global-i64.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i64.ll +++ test/CodeGen/AMDGPU/load-global-i64.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i64: ; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] Index: test/CodeGen/AMDGPU/load-hi16.ll =================================================================== --- test/CodeGen/AMDGPU/load-hi16.ll +++ test/CodeGen/AMDGPU/load-hi16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}load_local_hi_v2i16_undeflo: ; GCN: s_waitcnt @@ -222,9 +222,9 @@ ; VI: flat_load_ushort v{{[0-9]+}} ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_sdwa -define void @load_flat_hi_v2i16_reglo_vreg(i16 addrspace(4)* %in, i16 %reg) #0 { +define void @load_flat_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 { entry: - %load = load i16, i16 addrspace(4)* %in + %load = load i16, i16* %in %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef @@ -242,9 +242,9 @@ ; VI: flat_load_ushort v{{[0-9]+}} ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_sdwa -define void @load_flat_hi_v2f16_reglo_vreg(half addrspace(4)* %in, half %reg) #0 { +define void @load_flat_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 { entry: - %load = load half, half addrspace(4)* %in + %load = load half, half* %in %build0 = insertelement <2 x half> undef, half %reg, i32 0 %build1 = insertelement <2 x half> %build0, half %load, i32 1 store <2 x half> %build1, <2 x half> addrspace(1)* undef @@ -262,9 +262,9 @@ ; VI: flat_load_ubyte v{{[0-9]+}} ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_sdwa -define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i16 %reg) #0 { +define void @load_flat_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 { entry: - %load = load i8, i8 addrspace(4)* %in + %load = load i8, i8* %in %ext = zext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -283,9 +283,9 @@ ; VI: flat_load_sbyte v{{[0-9]+}} ; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_sdwa -define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i16 %reg) #0 { +define void @load_flat_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 { entry: - %load = load i8, i8 addrspace(4)* %in + %load = load i8, i8* %in %ext = sext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -302,10 +302,10 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}} -define void @load_private_hi_v2i16_reglo_vreg(i16* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg(i16 addrspace(5)* %in, i16 %reg) #0 { entry: - %gep = getelementptr inbounds i16, i16* %in, i64 2047 - %load = load i16, i16* %gep + %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2047 + %load = load i16, i16 addrspace(5)* %gep %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef @@ -321,10 +321,10 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}} -define void @load_private_hi_v2f16_reglo_vreg(half* %in, half %reg) #0 { +define void @load_private_hi_v2f16_reglo_vreg(half addrspace(5)* %in, half %reg) #0 { entry: - %gep = getelementptr inbounds half, half* %in, i64 2047 - %load = load half, half* %gep + %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2047 + %load = load half, half addrspace(5)* %gep %build0 = insertelement <2 x half> undef, half %reg, i32 0 %build1 = insertelement <2 x half> %build0, half %load, i32 1 store <2 x half> %build1, <2 x half> addrspace(1)* undef @@ -340,9 +340,9 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} -define void @load_private_hi_v2i16_reglo_vreg_nooff(i16* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i16 %reg) #0 { entry: - %load = load volatile i16, i16* inttoptr (i32 4094 to i16*) + %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 1 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef @@ -358,9 +358,9 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} -define void @load_private_hi_v2f16_reglo_vreg_nooff(half* %in, half %reg) #0 { +define void @load_private_hi_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, half %reg) #0 { entry: - %load = load volatile half, half* inttoptr (i32 4094 to half*) + %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*) %build0 = insertelement <2 x half> undef, half %reg, i32 0 %build1 = insertelement <2 x half> %build0, half %load, i32 1 store <2 x half> %build1, <2 x half> addrspace(1)* undef @@ -376,10 +376,10 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ubyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}} -define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* %in, i16 %reg) #0 { entry: - %gep = getelementptr inbounds i8, i8* %in, i64 2047 - %load = load i8, i8* %gep + %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 2047 + %load = load i8, i8 addrspace(5)* %gep %ext = zext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -396,10 +396,10 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_sbyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}} -define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* %in, i16 %reg) #0 { entry: - %gep = getelementptr inbounds i8, i8* %in, i64 2047 - %load = load i8, i8* %gep + %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 2047 + %load = load i8, i8 addrspace(5)* %gep %ext = sext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -416,9 +416,9 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} -define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i16 %reg) #0 { entry: - %load = load volatile i8, i8* inttoptr (i32 4094 to i8*) + %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) %ext = zext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -435,9 +435,9 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}} -define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i16 %reg) #0 { +define void @load_private_hi_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i16 %reg) #0 { entry: - %load = load volatile i8, i8* inttoptr (i32 4094 to i8*) + %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) %ext = sext i8 %load to i16 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 1 @@ -454,9 +454,9 @@ ; GFX9-NEXT: s_setpc_b64 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} -define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8* %in, half %reg) #0 { +define void @load_private_hi_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, half %reg) #0 { entry: - %load = load volatile i8, i8* inttoptr (i32 4094 to i8*) + %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) %ext = zext i8 %load to i16 %bc.ext = bitcast i16 %ext to half %build0 = insertelement <2 x half> undef, half %reg, i32 0 Index: test/CodeGen/AMDGPU/load-input-fold.ll =================================================================== --- test/CodeGen/AMDGPU/load-input-fold.ll +++ test/CodeGen/AMDGPU/load-input-fold.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=cayman +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) { main_body: Index: test/CodeGen/AMDGPU/load-local-f32.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-f32.ll +++ test/CodeGen/AMDGPU/load-local-f32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}load_f32_local: ; GCN: s_mov_b32 m0 Index: test/CodeGen/AMDGPU/load-local-f64.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-f64.ll +++ test/CodeGen/AMDGPU/load-local-f64.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_f64: ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} Index: test/CodeGen/AMDGPU/load-local-i32.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i32.ll +++ test/CodeGen/AMDGPU/load-local-i32.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i32: Index: test/CodeGen/AMDGPU/load-local-i64.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i64.ll +++ test/CodeGen/AMDGPU/load-local-i64.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i64: ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} Index: test/CodeGen/AMDGPU/load-weird-sizes.ll =================================================================== --- test/CodeGen/AMDGPU/load-weird-sizes.ll +++ test/CodeGen/AMDGPU/load-weird-sizes.ll @@ -1,8 +1,8 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=CM -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}load_i24: ; SI-DAG: {{flat|buffer}}_load_ubyte Index: test/CodeGen/AMDGPU/local-64.ll =================================================================== --- test/CodeGen/AMDGPU/local-64.ll +++ test/CodeGen/AMDGPU/local-64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s ; BOTH-LABEL: {{^}}local_i32_load ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 Index: test/CodeGen/AMDGPU/local-atomics.ll =================================================================== --- test/CodeGen/AMDGPU/local-atomics.ll +++ test/CodeGen/AMDGPU/local-atomics.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * Index: test/CodeGen/AMDGPU/local-atomics64.ll =================================================================== --- test/CodeGen/AMDGPU/local-atomics64.ll +++ test/CodeGen/AMDGPU/local-atomics64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: ; GCN: ds_wrxchg_rtn_b64 Index: test/CodeGen/AMDGPU/local-memory.amdgcn.ll =================================================================== --- test/CodeGen/AMDGPU/local-memory.amdgcn.ll +++ test/CodeGen/AMDGPU/local-memory.amdgcn.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 Index: test/CodeGen/AMDGPU/local-memory.ll =================================================================== --- test/CodeGen/AMDGPU/local-memory.ll +++ test/CodeGen/AMDGPU/local-memory.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 Index: test/CodeGen/AMDGPU/local-memory.r600.ll =================================================================== --- test/CodeGen/AMDGPU/local-memory.r600.ll +++ test/CodeGen/AMDGPU/local-memory.r600.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 Index: test/CodeGen/AMDGPU/local-stack-slot-offset.ll =================================================================== --- test/CodeGen/AMDGPU/local-stack-slot-offset.ll +++ test/CodeGen/AMDGPU/local-stack-slot-offset.ll @@ -1,5 +1,6 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -mattr=-promote-alloca -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Allocate two stack slots of 2052 bytes each requiring a total of 4104 bytes. ; Extracting the last element of each does not fit into the offset field of @@ -13,22 +14,22 @@ ; CHECK: buffer_load_dword define amdgpu_gs float @main(float %v1, float %v2, i32 %idx1, i32 %idx2) { main_body: - %m1 = alloca [513 x float] - %m2 = alloca [513 x float] + %m1 = alloca [513 x float], addrspace(5) + %m2 = alloca [513 x float], addrspace(5) - %gep1.store = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 %idx1 - store float %v1, float* %gep1.store + %gep1.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 %idx1 + store float %v1, float addrspace(5)* %gep1.store - %gep2.store = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 %idx2 - store float %v2, float* %gep2.store + %gep2.store = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 %idx2 + store float %v2, float addrspace(5)* %gep2.store ; This used to use a base reg equal to 0. - %gep1.load = getelementptr [513 x float], [513 x float]* %m1, i32 0, i32 0 - %out1 = load float, float* %gep1.load + %gep1.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m1, i32 0, i32 0 + %out1 = load float, float addrspace(5)* %gep1.load ; This used to attempt to re-use the base reg at 0, generating an out-of-bounds instruction offset. - %gep2.load = getelementptr [513 x float], [513 x float]* %m2, i32 0, i32 512 - %out2 = load float, float* %gep2.load + %gep2.load = getelementptr [513 x float], [513 x float] addrspace(5)* %m2, i32 0, i32 512 + %out2 = load float, float addrspace(5)* %gep2.load %r = fadd float %out1, %out2 ret float %r Index: test/CodeGen/AMDGPU/loop-address.ll =================================================================== --- test/CodeGen/AMDGPU/loop-address.ll +++ test/CodeGen/AMDGPU/loop-address.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood < %s | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck %s ;CHECK: ALU_PUSH ;CHECK: LOOP_START_DX10 @11 Index: test/CodeGen/AMDGPU/loop-idiom.ll =================================================================== --- test/CodeGen/AMDGPU/loop-idiom.ll +++ test/CodeGen/AMDGPU/loop-idiom.ll @@ -1,6 +1,7 @@ -; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s -; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Make sure loop-idiom doesn't create memcpy or memset. There are no library @@ -11,15 +12,15 @@ ; SI-NOT: {{^}}llvm.memcpy define amdgpu_kernel void @no_memcpy(i8 addrspace(3)* %in, i32 %size) { entry: - %dest = alloca i8, i32 32 + %dest = alloca i8, i32 32, addrspace(5) br label %for.body for.body: %0 = phi i32 [0, %entry], [%4, %for.body] %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0 - %2 = getelementptr i8, i8* %dest, i32 %0 + %2 = getelementptr i8, i8 addrspace(5)* %dest, i32 %0 %3 = load i8, i8 addrspace(3)* %1 - store i8 %3, i8* %2 + store i8 %3, i8 addrspace(5)* %2 %4 = add i32 %0, 1 %5 = icmp eq i32 %4, %size br i1 %5, label %for.end, label %for.body @@ -35,13 +36,13 @@ ; SI-NOT: {{^}}memset_pattern16: define amdgpu_kernel void @no_memset(i32 %size) { entry: - %dest = alloca i8, i32 32 + %dest = alloca i8, i32 32, addrspace(5) br label %for.body for.body: %0 = phi i32 [0, %entry], [%2, %for.body] - %1 = getelementptr i8, i8* %dest, i32 %0 - store i8 0, i8* %1 + %1 = getelementptr i8, i8 addrspace(5)* %dest, i32 %0 + store i8 0, i8 addrspace(5)* %1 %2 = add i32 %0, 1 %3 = icmp eq i32 %2, %size br i1 %3, label %for.end, label %for.body Index: test/CodeGen/AMDGPU/loop_break.ll =================================================================== --- test/CodeGen/AMDGPU/loop_break.ll +++ test/CodeGen/AMDGPU/loop_break.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn---amdgiz -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Uses llvm.amdgcn.break Index: test/CodeGen/AMDGPU/lshl64-to-32.ll =================================================================== --- test/CodeGen/AMDGPU/lshl64-to-32.ll +++ test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}zext_shl64_to_32: ; GCN: s_lshl_b32 Index: test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/lshr.v2i16.ll +++ test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s ; GCN-LABEL: {{^}}s_lshr_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] Index: test/CodeGen/AMDGPU/mad-combine.ll =================================================================== --- test/CodeGen/AMDGPU/mad-combine.ll +++ test/CodeGen/AMDGPU/mad-combine.ll @@ -1,12 +1,12 @@ ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma. -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s ; Make sure we don't form mad with denormals -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 Index: test/CodeGen/AMDGPU/mad-mix-hi.ll =================================================================== --- test/CodeGen/AMDGPU/mad-mix-hi.ll +++ test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: ; GFX9: s_waitcnt Index: test/CodeGen/AMDGPU/mad-mix-lo.ll =================================================================== --- test/CodeGen/AMDGPU/mad-mix-lo.ll +++ test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s ; GCN-LABEL: mixlo_simple: ; GCN: s_waitcnt Index: test/CodeGen/AMDGPU/mad-mix.ll =================================================================== --- test/CodeGen/AMDGPU/mad-mix.ll +++ test/CodeGen/AMDGPU/mad-mix.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo: ; GFX9: v_mad_mix_f32 v0, v0, v1, v2 ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] Index: test/CodeGen/AMDGPU/mad24-get-global-id.ll =================================================================== --- test/CodeGen/AMDGPU/mad24-get-global-id.ll +++ test/CodeGen/AMDGPU/mad24-get-global-id.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; If the workgroup id range is restricted, we should be able to use ; mad24 for the usual indexing pattern. Index: test/CodeGen/AMDGPU/mad_int24.ll =================================================================== --- test/CodeGen/AMDGPU/mad_int24.ll +++ test/CodeGen/AMDGPU/mad_int24.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC ; FUNC-LABEL: {{^}}i32_mad24: ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. Index: test/CodeGen/AMDGPU/mad_uint24.ll =================================================================== --- test/CodeGen/AMDGPU/mad_uint24.ll +++ test/CodeGen/AMDGPU/mad_uint24.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC -; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/madak.ll =================================================================== --- test/CodeGen/AMDGPU/madak.ll +++ test/CodeGen/AMDGPU/madak.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Enable VI Index: test/CodeGen/AMDGPU/madmk.ll =================================================================== --- test/CodeGen/AMDGPU/madmk.ll +++ test/CodeGen/AMDGPU/madmk.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: None of these trigger madmk emission anymore. It is still ; possible, but requires the correct registers to be used which is Index: test/CodeGen/AMDGPU/max-literals.ll =================================================================== --- test/CodeGen/AMDGPU/max-literals.ll +++ test/CodeGen/AMDGPU/max-literals.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: ADD * Index: test/CodeGen/AMDGPU/max.i16.ll =================================================================== --- test/CodeGen/AMDGPU/max.i16.ll +++ test/CodeGen/AMDGPU/max.i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VIPLUS %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VIPLUS %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VIPLUS %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VIPLUS %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_imax_sge_i16: Index: test/CodeGen/AMDGPU/max.ll =================================================================== --- test/CodeGen/AMDGPU/max.ll +++ test/CodeGen/AMDGPU/max.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_test_imax_sge_i32: Index: test/CodeGen/AMDGPU/max3.ll =================================================================== --- test/CodeGen/AMDGPU/max3.ll +++ test/CodeGen/AMDGPU/max3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}v_test_imax3_sgt_i32: ; GCN: v_max3_i32 Index: test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll +++ test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}system_monotonic_monotonic ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} @@ -7,10 +7,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @system_monotonic_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in monotonic monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic ret void } @@ -20,10 +20,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acquire_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic ret void } @@ -33,10 +33,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @system_release_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic ret void } @@ -46,10 +46,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acq_rel_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic ret void } @@ -59,10 +59,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_seq_cst_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic ret void } @@ -72,10 +72,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acquire_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire ret void } @@ -85,10 +85,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_release_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire ret void } @@ -98,10 +98,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acq_rel_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire ret void } @@ -111,10 +111,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_seq_cst_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire ret void } @@ -124,10 +124,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_seq_cst_seq_cst( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -137,10 +137,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_monotonic_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic ret void } @@ -150,10 +150,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acquire_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic ret void } @@ -163,10 +163,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_release_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic ret void } @@ -176,10 +176,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acq_rel_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic ret void } @@ -189,10 +189,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_seq_cst_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic ret void } @@ -202,10 +202,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acquire_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire ret void } @@ -215,10 +215,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_release_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire ret void } @@ -228,10 +228,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acq_rel_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire ret void } @@ -241,10 +241,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_seq_cst_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire ret void } @@ -254,10 +254,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_seq_cst_seq_cst( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst ret void } @@ -267,10 +267,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_monotonic_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic ret void } @@ -280,10 +280,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acquire_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic ret void } @@ -293,10 +293,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_release_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic ret void } @@ -306,10 +306,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acq_rel_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic ret void } @@ -319,10 +319,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_seq_cst_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic ret void } @@ -332,10 +332,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acquire_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire ret void } @@ -345,10 +345,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_release_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") release acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire ret void } @@ -358,10 +358,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acq_rel_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire ret void } @@ -371,10 +371,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_seq_cst_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire ret void } @@ -384,10 +384,10 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_seq_cst_seq_cst( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -397,10 +397,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_monotonic_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic ret void } @@ -410,10 +410,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acquire_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic ret void } @@ -423,10 +423,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_release_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic ret void } @@ -436,10 +436,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acq_rel_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic ret void } @@ -449,10 +449,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_seq_cst_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic ret void } @@ -462,10 +462,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acquire_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire ret void } @@ -475,10 +475,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_release_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire ret void } @@ -488,10 +488,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acq_rel_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire ret void } @@ -501,10 +501,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_seq_cst_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire ret void } @@ -514,10 +514,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_seq_cst_seq_cst( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst ret void } @@ -527,10 +527,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_monotonic_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic ret void } @@ -540,10 +540,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acquire_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic ret void } @@ -553,10 +553,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_release_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic ret void } @@ -566,10 +566,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acq_rel_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic ret void } @@ -579,10 +579,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_seq_cst_monotonic( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic ret void } @@ -592,10 +592,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acquire_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire ret void } @@ -605,10 +605,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_release_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") release acquire ret void } @@ -618,10 +618,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acq_rel_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire ret void } @@ -631,10 +631,10 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_seq_cst_acquire( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire ret void } @@ -644,9 +644,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_seq_cst_seq_cst( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst ret void } Index: test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll +++ test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX6 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX8 %s ; FUNC-LABEL: {{^}}system_acquire ; GCN: BB#0 Index: test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll +++ test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}system_monotonic ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} @@ -7,9 +7,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @system_monotonic( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in monotonic + %val = atomicrmw volatile xchg i32* %out, i32 %in monotonic ret void } @@ -19,9 +19,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acquire( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acquire + %val = atomicrmw volatile xchg i32* %out, i32 %in acquire ret void } @@ -31,9 +31,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @system_release( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in release + %val = atomicrmw volatile xchg i32* %out, i32 %in release ret void } @@ -43,9 +43,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_acq_rel( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acq_rel + %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel ret void } @@ -55,9 +55,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @system_seq_cst( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst ret void } @@ -67,9 +67,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_monotonic( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") monotonic + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") monotonic ret void } @@ -79,9 +79,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acquire( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acquire + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acquire ret void } @@ -91,9 +91,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_release( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") release + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") release ret void } @@ -103,9 +103,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_acq_rel( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") acq_rel + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") acq_rel ret void } @@ -115,9 +115,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @singlethread_seq_cst( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("singlethread") seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread") seq_cst ret void } @@ -127,9 +127,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_monotonic( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") monotonic + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") monotonic ret void } @@ -139,9 +139,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acquire( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acquire + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire ret void } @@ -151,9 +151,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_release( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") release + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") release ret void } @@ -163,9 +163,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_acq_rel( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") acq_rel + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel ret void } @@ -175,9 +175,9 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: buffer_wbinvl1_vol define amdgpu_kernel void @agent_seq_cst( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("agent") seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst ret void } @@ -187,9 +187,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_monotonic( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") monotonic + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") monotonic ret void } @@ -199,9 +199,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acquire( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acquire + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire ret void } @@ -211,9 +211,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_release( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") release + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") release ret void } @@ -223,9 +223,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_acq_rel( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") acq_rel + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel ret void } @@ -235,9 +235,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @workgroup_seq_cst( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("workgroup") seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst ret void } @@ -247,9 +247,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_monotonic( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") monotonic + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") monotonic ret void } @@ -259,9 +259,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acquire( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acquire + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acquire ret void } @@ -271,9 +271,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_release( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") release + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") release ret void } @@ -283,9 +283,9 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_acq_rel( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") acq_rel + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") acq_rel ret void } @@ -295,8 +295,8 @@ ; CHECK-NOT: s_waitcnt vmcnt(0){{$}} ; CHECK-NOT: buffer_wbinvl1_vol define amdgpu_kernel void @wavefront_seq_cst( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("wavefront") seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst ret void } Index: test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll +++ test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll @@ -1,5 +1,5 @@ -; RUN: not llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: error: :0:0: in function invalid_fence void (): Unsupported synchronization scope define amdgpu_kernel void @invalid_fence() { @@ -8,36 +8,36 @@ ret void } -; CHECK: error: :0:0: in function invalid_load void (i32 addrspace(4)*, i32 addrspace(4)*): Unsupported synchronization scope +; CHECK: error: :0:0: in function invalid_load void (i32*, i32*): Unsupported synchronization scope define amdgpu_kernel void @invalid_load( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("invalid") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("invalid") seq_cst, align 4 + store i32 %val, i32* %out ret void } -; CHECK: error: :0:0: in function invalid_store void (i32, i32 addrspace(4)*): Unsupported synchronization scope +; CHECK: error: :0:0: in function invalid_store void (i32, i32*): Unsupported synchronization scope define amdgpu_kernel void @invalid_store( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("invalid") seq_cst, align 4 + store atomic i32 %in, i32* %out syncscope("invalid") seq_cst, align 4 ret void } -; CHECK: error: :0:0: in function invalid_cmpxchg void (i32 addrspace(4)*, i32, i32): Unsupported synchronization scope +; CHECK: error: :0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported synchronization scope define amdgpu_kernel void @invalid_cmpxchg( - i32 addrspace(4)* %out, i32 %in, i32 %old) { + i32* %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 - %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst + %gep = getelementptr i32, i32* %out, i32 4 + %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("invalid") seq_cst seq_cst ret void } -; CHECK: error: :0:0: in function invalid_rmw void (i32 addrspace(4)*, i32): Unsupported synchronization scope +; CHECK: error: :0:0: in function invalid_rmw void (i32*, i32): Unsupported synchronization scope define amdgpu_kernel void @invalid_rmw( - i32 addrspace(4)* %out, i32 %in) { + i32* %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope("invalid") seq_cst + %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("invalid") seq_cst ret void } Index: test/CodeGen/AMDGPU/memory-legalizer-load.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-load.ll +++ test/CodeGen/AMDGPU/memory-legalizer-load.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() @@ -12,10 +12,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in unordered, align 4 + store i32 %val, i32* %out ret void } @@ -26,10 +26,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in monotonic, align 4 + store i32 %val, i32* %out ret void } @@ -40,10 +40,10 @@ ; GCN-NEXT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in acquire, align 4 + store i32 %val, i32* %out ret void } @@ -54,10 +54,10 @@ ; GCN-NEXT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @system_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in seq_cst, align 4 + store i32 %val, i32* %out ret void } @@ -68,10 +68,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4 + store i32 %val, i32* %out ret void } @@ -82,10 +82,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4 + store i32 %val, i32* %out ret void } @@ -96,10 +96,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4 + store i32 %val, i32* %out ret void } @@ -110,10 +110,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @singlethread_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4 + store i32 %val, i32* %out ret void } @@ -124,10 +124,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4 + store i32 %val, i32* %out ret void } @@ -138,10 +138,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4 + store i32 %val, i32* %out ret void } @@ -152,10 +152,10 @@ ; GCN-NEXT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4 + store i32 %val, i32* %out ret void } @@ -166,10 +166,10 @@ ; GCN-NEXT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @agent_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4 + store i32 %val, i32* %out ret void } @@ -180,10 +180,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4 + store i32 %val, i32* %out ret void } @@ -194,10 +194,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4 + store i32 %val, i32* %out ret void } @@ -208,10 +208,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4 + store i32 %val, i32* %out ret void } @@ -222,10 +222,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @workgroup_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4 + store i32 %val, i32* %out ret void } @@ -236,10 +236,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4 + store i32 %val, i32* %out ret void } @@ -250,10 +250,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4 + store i32 %val, i32* %out ret void } @@ -264,10 +264,10 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4 + store i32 %val, i32* %out ret void } @@ -278,42 +278,42 @@ ; GCN-NOT: buffer_wbinvl1_vol ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @wavefront_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out + %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_private_0 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} define amdgpu_kernel void @nontemporal_private_0( - i32* %in, i32 addrspace(4)* %out) { + i32 addrspace(5)* %in, i32* %out) { entry: - %val = load i32, i32* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + %val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_private_1 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} define amdgpu_kernel void @nontemporal_private_1( - i32* %in, i32 addrspace(4)* %out) { + i32 addrspace(5)* %in, i32* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid - %val = load i32, i32* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + %val.gep = getelementptr inbounds i32, i32 addrspace(5)* %in, i32 %tid + %val = load i32, i32 addrspace(5)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_global_0 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} define amdgpu_kernel void @nontemporal_global_0( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { + i32 addrspace(1)* %in, i32* %out) { entry: %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + store i32 %val, i32* %out ret void } @@ -321,56 +321,56 @@ ; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} ; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} define amdgpu_kernel void @nontemporal_global_1( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { + i32 addrspace(1)* %in, i32* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_local_0 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @nontemporal_local_0( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { + i32 addrspace(3)* %in, i32* %out) { entry: %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_local_1 ; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @nontemporal_local_1( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { + i32 addrspace(3)* %in, i32* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_flat_0 ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} define amdgpu_kernel void @nontemporal_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + %val = load i32, i32* %in, align 4, !nontemporal !0 + store i32 %val, i32* %out ret void } ; GCN-LABEL: {{^}}nontemporal_flat_1 ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} define amdgpu_kernel void @nontemporal_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid - %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out + %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid + %val = load i32, i32* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32* %out ret void } Index: test/CodeGen/AMDGPU/memory-legalizer-store.ll =================================================================== --- test/CodeGen/AMDGPU/memory-legalizer-store.ll +++ test/CodeGen/AMDGPU/memory-legalizer-store.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd--amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() @@ -9,9 +9,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_unordered( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4 + store atomic i32 %in, i32* %out unordered, align 4 ret void } @@ -19,9 +19,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_monotonic( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4 + store atomic i32 %in, i32* %out monotonic, align 4 ret void } @@ -29,9 +29,9 @@ ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_release( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out release, align 4 + store atomic i32 %in, i32* %out release, align 4 ret void } @@ -39,9 +39,9 @@ ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @system_seq_cst( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 + store atomic i32 %in, i32* %out seq_cst, align 4 ret void } @@ -49,9 +49,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_unordered( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4 + store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4 ret void } @@ -59,9 +59,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_monotonic( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4 + store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4 ret void } @@ -69,9 +69,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_release( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4 + store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4 ret void } @@ -79,9 +79,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @singlethread_seq_cst( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4 + store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4 ret void } @@ -89,9 +89,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_unordered( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4 + store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4 ret void } @@ -99,9 +99,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_monotonic( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4 + store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4 ret void } @@ -109,9 +109,9 @@ ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_release( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4 + store atomic i32 %in, i32* %out syncscope("agent") release, align 4 ret void } @@ -119,9 +119,9 @@ ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @agent_seq_cst( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4 + store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4 ret void } @@ -129,9 +129,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_unordered( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4 + store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4 ret void } @@ -139,9 +139,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_monotonic( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4 + store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4 ret void } @@ -149,9 +149,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_release( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4 + store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4 ret void } @@ -159,9 +159,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @workgroup_seq_cst( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4 + store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4 ret void } @@ -169,9 +169,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_unordered( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4 + store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4 ret void } @@ -179,9 +179,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_monotonic( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4 + store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4 ret void } @@ -189,9 +189,9 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_release( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4 + store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4 ret void } @@ -199,31 +199,31 @@ ; GCN-NOT: s_waitcnt vmcnt(0){{$}} ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define amdgpu_kernel void @wavefront_seq_cst( - i32 %in, i32 addrspace(4)* %out) { + i32 %in, i32* %out) { entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4 + store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4 ret void } ; GCN-LABEL: {{^}}nontemporal_private_0 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} define amdgpu_kernel void @nontemporal_private_0( - i32 addrspace(4)* %in, i32* %out) { + i32* %in, i32 addrspace(5)* %out) { entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32* %out, !nontemporal !0 + %val = load i32, i32* %in, align 4 + store i32 %val, i32 addrspace(5)* %out, !nontemporal !0 ret void } ; GCN-LABEL: {{^}}nontemporal_private_1 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} define amdgpu_kernel void @nontemporal_private_1( - i32 addrspace(4)* %in, i32* %out) { + i32* %in, i32 addrspace(5)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid - store i32 %val, i32* %out.gep, !nontemporal !0 + %val = load i32, i32* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(5)* %out, i32 %tid + store i32 %val, i32 addrspace(5)* %out.gep, !nontemporal !0 ret void } @@ -231,9 +231,9 @@ ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} define amdgpu_kernel void @nontemporal_global_0( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { + i32* %in, i32 addrspace(1)* %out) { entry: - %val = load i32, i32 addrspace(4)* %in, align 4 + %val = load i32, i32* %in, align 4 store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 ret void } @@ -242,10 +242,10 @@ ; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} ; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} define amdgpu_kernel void @nontemporal_global_1( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { + i32* %in, i32 addrspace(1)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 + %val = load i32, i32* %in, align 4 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 ret void @@ -254,9 +254,9 @@ ; GCN-LABEL: {{^}}nontemporal_local_0 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @nontemporal_local_0( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { + i32* %in, i32 addrspace(3)* %out) { entry: - %val = load i32, i32 addrspace(4)* %in, align 4 + %val = load i32, i32* %in, align 4 store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 ret void } @@ -264,10 +264,10 @@ ; GCN-LABEL: {{^}}nontemporal_local_1 ; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @nontemporal_local_1( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { + i32* %in, i32 addrspace(3)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 + %val = load i32, i32* %in, align 4 %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 ret void @@ -276,22 +276,22 @@ ; GCN-LABEL: {{^}}nontemporal_flat_0 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} define amdgpu_kernel void @nontemporal_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(4)* %out, !nontemporal !0 + %val = load i32, i32* %in, align 4 + store i32 %val, i32* %out, !nontemporal !0 ret void } ; GCN-LABEL: {{^}}nontemporal_flat_1 ; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} define amdgpu_kernel void @nontemporal_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + i32* %in, i32* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid - store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0 + %val = load i32, i32* %in, align 4 + %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid + store i32 %val, i32* %out.gep, !nontemporal !0 ret void } Index: test/CodeGen/AMDGPU/merge-store-crash.ll =================================================================== --- test/CodeGen/AMDGPU/merge-store-crash.ll +++ test/CodeGen/AMDGPU/merge-store-crash.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; This is used to crash in LiveIntervalAnalysis via SILoadStoreOptimizer ; while fixing up the merge of two ds_write instructions. Index: test/CodeGen/AMDGPU/merge-store-usedef.ll =================================================================== --- test/CodeGen/AMDGPU/merge-store-usedef.ll +++ test/CodeGen/AMDGPU/merge-store-usedef.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: ; CHECK: ds_write_b32 Index: test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- test/CodeGen/AMDGPU/merge-stores.ll +++ test/CodeGen/AMDGPU/merge-stores.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; This test is mostly to test DAG store merging, so disable the vectorizer. ; Run with devices with different unaligned load restrictions. Index: test/CodeGen/AMDGPU/min3.ll =================================================================== --- test/CodeGen/AMDGPU/min3.ll +++ test/CodeGen/AMDGPU/min3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}v_test_imin3_slt_i32: ; GCN: v_min3_i32 Index: test/CodeGen/AMDGPU/missing-store.ll =================================================================== --- test/CodeGen/AMDGPU/missing-store.ll +++ test/CodeGen/AMDGPU/missing-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s @ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8 Index: test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll =================================================================== --- test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll +++ test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s ; Check that when mubuf addr64 instruction is handled in moveToVALU ; from the pointer, dead register writes are not emitted. Index: test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll =================================================================== --- test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll +++ test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: broken on VI because flat instructions need to be emitted ; instead of addr64 equivalent of the _OFFSET variants. Index: test/CodeGen/AMDGPU/move-to-valu-worklist.ll =================================================================== --- test/CodeGen/AMDGPU/move-to-valu-worklist.ll +++ test/CodeGen/AMDGPU/move-to-valu-worklist.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; In moveToVALU(), move to vector ALU is performed, all instrs in ; the use chain will be visited. We do not want the same node to be @@ -13,7 +13,7 @@ ; GCN-NEXT: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @in_worklist_once() #0 { bb: - %tmp = load i64, i64* undef + %tmp = load i64, i64 addrspace(5)* undef br label %bb1 bb1: ; preds = %bb1, %bb Index: test/CodeGen/AMDGPU/movreld-bug.ll =================================================================== --- test/CodeGen/AMDGPU/movreld-bug.ll +++ test/CodeGen/AMDGPU/movreld-bug.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}main: ; GCN: v_movreld_b32_e32 v0, Index: test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll +++ test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK ; Test that buffer_load_format with VGPR resource descriptor is properly ; legalized. Index: test/CodeGen/AMDGPU/mubuf.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf.ll +++ test/CodeGen/AMDGPU/mubuf.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() readnone Index: test/CodeGen/AMDGPU/mul.ll =================================================================== --- test/CodeGen/AMDGPU/mul.ll +++ test/CodeGen/AMDGPU/mul.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC ; mul24 and mad24 are affected Index: test/CodeGen/AMDGPU/mul_int24.ll =================================================================== --- test/CodeGen/AMDGPU/mul_int24.ll +++ test/CodeGen/AMDGPU/mul_int24.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_smul24_i32: ; GCN-NOT: bfe Index: test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll =================================================================== --- test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll +++ test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone Index: test/CodeGen/AMDGPU/mul_uint24-r600.ll =================================================================== --- test/CodeGen/AMDGPU/mul_uint24-r600.ll +++ test/CodeGen/AMDGPU/mul_uint24-r600.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_umul24_i32: ; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W Index: test/CodeGen/AMDGPU/multi-divergent-exit-region.ll =================================================================== --- test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn---amdgiz -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Add an extra verifier runs. There were some cases where invalid IR ; was produced but happened to be fixed by the later passes. Index: test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- test/CodeGen/AMDGPU/multilevel-break.ll +++ test/CodeGen/AMDGPU/multilevel-break.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; OPT-LABEL: {{^}}define amdgpu_vs void @multi_else_break( ; OPT: main_body: Index: test/CodeGen/AMDGPU/nested-loop-conditions.ll =================================================================== --- test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn---amdgiz -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; After structurizing, there are 3 levels of loops. The i1 phi ; conditions mutually depend on each other, so it isn't safe to delete Index: test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll =================================================================== --- test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll +++ test/CodeGen/AMDGPU/no-initializer-constant-addrspace.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=GCN -; RUN: llc -march=amdgcn -mcpu=tonga -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=GCN -; RUN: llc -march=r600 -mcpu=cypress -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=EG +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=GCN +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=GCN +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -filetype=obj < %s | llvm-readobj -relocations -symbols | FileCheck %s -check-prefix=EG ; GCN: R_AMDGPU_REL32 extern_const_addrspace ; EG: R_AMDGPU_ABS32 extern_const_addrspace Index: test/CodeGen/AMDGPU/no-shrink-extloads.ll =================================================================== --- test/CodeGen/AMDGPU/no-shrink-extloads.ll +++ test/CodeGen/AMDGPU/no-shrink-extloads.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/nop-data.ll =================================================================== --- test/CodeGen/AMDGPU/nop-data.ll +++ test/CodeGen/AMDGPU/nop-data.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s ; CHECK: kernel0: ; CHECK-NEXT: s_endpgm Index: test/CodeGen/AMDGPU/omod.ll =================================================================== --- test/CodeGen/AMDGPU/omod.ll +++ test/CodeGen/AMDGPU/omod.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; IEEE bit enabled for compute kernel, no shouldn't use. ; GCN-LABEL: {{^}}v_omod_div2_f32_enable_ieee_signed_zeros: Index: test/CodeGen/AMDGPU/opencl-image-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/opencl-image-metadata.ll +++ test/CodeGen/AMDGPU/opencl-image-metadata.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s ; Make sure the OpenCL Image lowering pass doesn't crash when argument metadata ; is not in expected order. @@ -19,6 +19,6 @@ !0 = !{void (i32 addrspace(1)*)* @kernel, !1, !2, !3, !4, !5} !1 = !{!"kernel_arg_addr_space", i32 0} !2 = !{!"kernel_arg_access_qual", !"none"} -!3 = !{!"kernel_arg_type", !"int*"} +!3 = !{!"kernel_arg_type", !"int addrspace(5)*"} !4 = !{!"kernel_arg_type_qual", !""} !5 = !{!"kernel_arg_name", !""} Index: test/CodeGen/AMDGPU/operand-folding.ll =================================================================== --- test/CodeGen/AMDGPU/operand-folding.ll +++ test/CodeGen/AMDGPU/operand-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s Index: test/CodeGen/AMDGPU/operand-spacing.ll =================================================================== --- test/CodeGen/AMDGPU/operand-spacing.ll +++ test/CodeGen/AMDGPU/operand-spacing.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s ; Make sure there isn't an extra space between the instruction name and first operands. Index: test/CodeGen/AMDGPU/or.ll =================================================================== --- test/CodeGen/AMDGPU/or.ll +++ test/CodeGen/AMDGPU/or.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}or_v2i32: Index: test/CodeGen/AMDGPU/over-max-lds-size.ll =================================================================== --- test/CodeGen/AMDGPU/over-max-lds-size.ll +++ test/CodeGen/AMDGPU/over-max-lds-size.ll @@ -1,6 +1,6 @@ -; RUN: not llc -march=amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck -check-prefix=ERROR %s -; RUN: not llc -march=amdgcn -mcpu=hawaii < %s 2>&1 | FileCheck -check-prefix=ERROR %s -; RUN: not llc -march=amdgcn -mcpu=fiji < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: local memory limit exceeded (400000) in use_huge_lds Index: test/CodeGen/AMDGPU/pack.v2f16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2f16.ll +++ test/CodeGen/AMDGPU/pack.v2f16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; GCN-LABEL: {{^}}s_pack_v2f16: Index: test/CodeGen/AMDGPU/pack.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2i16.ll +++ test/CodeGen/AMDGPU/pack.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; GCN-LABEL: {{^}}s_pack_v2i16: Index: test/CodeGen/AMDGPU/packed-op-sel.ll =================================================================== --- test/CodeGen/AMDGPU/packed-op-sel.ll +++ test/CodeGen/AMDGPU/packed-op-sel.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fma_vector_vector_scalar_lo: ; GCN: ds_read_b32 [[VEC0:v[0-9]+]] Index: test/CodeGen/AMDGPU/packetizer.ll =================================================================== --- test/CodeGen/AMDGPU/packetizer.ll +++ test/CodeGen/AMDGPU/packetizer.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s ; CHECK: {{^}}test: ; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X Index: test/CodeGen/AMDGPU/parallelorifcollapse.ll =================================================================== --- test/CodeGen/AMDGPU/parallelorifcollapse.ll +++ test/CodeGen/AMDGPU/parallelorifcollapse.ll @@ -1,5 +1,6 @@ ; Function Attrs: nounwind -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; ; CFG flattening should use parallel-or to generate branch conditions and ; then merge if-regions with the same bodies. @@ -14,23 +15,23 @@ ; CHECK-NEXT: OR_INT define amdgpu_kernel void @_Z9chk1D_512v() #0 { entry: - %a0 = alloca i32, align 4 - %b0 = alloca i32, align 4 - %c0 = alloca i32, align 4 - %d0 = alloca i32, align 4 - %a1 = alloca i32, align 4 - %b1 = alloca i32, align 4 - %c1 = alloca i32, align 4 - %d1 = alloca i32, align 4 - %data = alloca i32, align 4 - %0 = load i32, i32* %a0, align 4 - %1 = load i32, i32* %b0, align 4 + %a0 = alloca i32, align 4, addrspace(5) + %b0 = alloca i32, align 4, addrspace(5) + %c0 = alloca i32, align 4, addrspace(5) + %d0 = alloca i32, align 4, addrspace(5) + %a1 = alloca i32, align 4, addrspace(5) + %b1 = alloca i32, align 4, addrspace(5) + %c1 = alloca i32, align 4, addrspace(5) + %d1 = alloca i32, align 4, addrspace(5) + %data = alloca i32, align 4, addrspace(5) + %0 = load i32, i32 addrspace(5)* %a0, align 4 + %1 = load i32, i32 addrspace(5)* %b0, align 4 %cmp = icmp ne i32 %0, %1 br i1 %cmp, label %land.lhs.true, label %if.else land.lhs.true: ; preds = %entry - %2 = load i32, i32* %c0, align 4 - %3 = load i32, i32* %d0, align 4 + %2 = load i32, i32 addrspace(5)* %c0, align 4 + %3 = load i32, i32 addrspace(5)* %d0, align 4 %cmp1 = icmp ne i32 %2, %3 br i1 %cmp1, label %if.then, label %if.else @@ -38,18 +39,18 @@ br label %if.end if.else: ; preds = %land.lhs.true, %entry - store i32 1, i32* %data, align 4 + store i32 1, i32 addrspace(5)* %data, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %4 = load i32, i32* %a1, align 4 - %5 = load i32, i32* %b1, align 4 + %4 = load i32, i32 addrspace(5)* %a1, align 4 + %5 = load i32, i32 addrspace(5)* %b1, align 4 %cmp2 = icmp ne i32 %4, %5 br i1 %cmp2, label %land.lhs.true3, label %if.else6 land.lhs.true3: ; preds = %if.end - %6 = load i32, i32* %c1, align 4 - %7 = load i32, i32* %d1, align 4 + %6 = load i32, i32 addrspace(5)* %c1, align 4 + %7 = load i32, i32 addrspace(5)* %d1, align 4 %cmp4 = icmp ne i32 %6, %7 br i1 %cmp4, label %if.then5, label %if.else6 @@ -57,7 +58,7 @@ br label %if.end7 if.else6: ; preds = %land.lhs.true3, %if.end - store i32 1, i32* %data, align 4 + store i32 1, i32 addrspace(5)* %data, align 4 br label %if.end7 if.end7: ; preds = %if.else6, %if.then5 Index: test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -1,4 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FIXME: we should disable sdwa peephole because dead-code elimination, that ; runs after peephole, ruins this test (different register numbers) Index: test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll =================================================================== --- test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll +++ test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -verify-coalescing < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -verify-coalescing < %s ; The original and requires materializing a 64-bit immediate for ; s_and_b64. This is split into 2 x v_and_i32, part of the immediate Index: test/CodeGen/AMDGPU/predicate-dp4.ll =================================================================== --- test/CodeGen/AMDGPU/predicate-dp4.ll +++ test/CodeGen/AMDGPU/predicate-dp4.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=cayman +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman ; CHECK-LABEL: {{^}}main: ; CHECK: PRED_SETE_INT * Pred, Index: test/CodeGen/AMDGPU/predicates.ll =================================================================== --- test/CodeGen/AMDGPU/predicates.ll +++ test/CodeGen/AMDGPU/predicates.ll @@ -1,4 +1,4 @@ -; RUN: llc -spec-exec-max-speculation-cost=0 -march=r600 -r600-ir-structurize=0 -mcpu=redwood < %s | FileCheck %s +; RUN: llc -spec-exec-max-speculation-cost=0 -march=r600 -mtriple=r600---amdgiz -r600-ir-structurize=0 -mcpu=redwood < %s | FileCheck %s ; These tests make sure the compiler is optimizing branches using predicates ; when it is legal to do so. Index: test/CodeGen/AMDGPU/private-access-no-objects.ll =================================================================== --- test/CodeGen/AMDGPU/private-access-no-objects.ll +++ test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s ; There are no stack objects, but still a private memory access. The ; private access regiters need to be correctly initialized anyway, and @@ -19,7 +19,7 @@ ; OPTNONE-NOT: s_mov_b32 ; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s5 offen{{$}} define amdgpu_kernel void @store_to_undef() #0 { - store volatile i32 0, i32* undef + store volatile i32 0, i32 addrspace(5)* undef ret void } @@ -29,7 +29,7 @@ ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}} ; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @store_to_inttoptr() #0 { - store volatile i32 0, i32* inttoptr (i32 124 to i32*) + store volatile i32 0, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*) ret void } @@ -39,7 +39,7 @@ ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}} ; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}} define amdgpu_kernel void @load_from_undef() #0 { - %ld = load volatile i32, i32* undef + %ld = load volatile i32, i32 addrspace(5)* undef ret void } @@ -49,7 +49,7 @@ ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s5{{$}} ; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @load_from_inttoptr() #0 { - %ld = load volatile i32, i32* inttoptr (i32 124 to i32*) + %ld = load volatile i32, i32 addrspace(5)* inttoptr (i32 124 to i32 addrspace(5)*) ret void } Index: test/CodeGen/AMDGPU/private-memory-atomics.ll =================================================================== --- test/CodeGen/AMDGPU/private-memory-atomics.ll +++ test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -1,31 +1,32 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; This works because promote allocas pass replaces these with LDS atomics. ; Private atomics have no real use, but at least shouldn't crash on it. define amdgpu_kernel void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind { entry: - %tmp = alloca [2 x i32] - %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 - %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 - store i32 0, i32* %tmp1 - store i32 1, i32* %tmp2 - %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in - %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel + %tmp = alloca [2 x i32], addrspace(5) + %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %tmp1 + store i32 1, i32 addrspace(5)* %tmp2 + %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in + %tmp4 = atomicrmw add i32 addrspace(5)* %tmp3, i32 7 acq_rel store i32 %tmp4, i32 addrspace(1)* %out ret void } define amdgpu_kernel void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind { entry: - %tmp = alloca [2 x i32] - %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 - %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 - store i32 0, i32* %tmp1 - store i32 1, i32* %tmp2 - %tmp3 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in - %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic + %tmp = alloca [2 x i32], addrspace(5) + %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %tmp1 + store i32 1, i32 addrspace(5)* %tmp2 + %tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in + %tmp4 = cmpxchg i32 addrspace(5)* %tmp3, i32 0, i32 1 acq_rel monotonic %val = extractvalue { i32, i1 } %tmp4, 0 store i32 %val, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll +++ test/CodeGen/AMDGPU/promote-alloca-addrspacecast.ll @@ -1,19 +1,20 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; The types of the users of the addrspacecast should not be changed. ; CHECK-LABEL: @invalid_bitcast_addrspace( ; CHECK: getelementptr inbounds [256 x [1 x i32]], [256 x [1 x i32]] addrspace(3)* @invalid_bitcast_addrspace.data, i32 0, i32 %14 ; CHECK: bitcast [1 x i32] addrspace(3)* %{{[0-9]+}} to half addrspace(3)* -; CHECK: addrspacecast half addrspace(3)* %tmp to half addrspace(4)* -; CHECK: bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)* +; CHECK: addrspacecast half addrspace(3)* %tmp to half* +; CHECK: bitcast half* %tmp1 to <2 x i16>* define amdgpu_kernel void @invalid_bitcast_addrspace() #0 { entry: - %data = alloca [1 x i32], align 4 - %tmp = bitcast [1 x i32]* %data to half* - %tmp1 = addrspacecast half* %tmp to half addrspace(4)* - %tmp2 = bitcast half addrspace(4)* %tmp1 to <2 x i16> addrspace(4)* - %tmp3 = load <2 x i16>, <2 x i16> addrspace(4)* %tmp2, align 2 + %data = alloca [1 x i32], align 4, addrspace(5) + %tmp = bitcast [1 x i32] addrspace(5)* %data to half addrspace(5)* + %tmp1 = addrspacecast half addrspace(5)* %tmp to half* + %tmp2 = bitcast half* %tmp1 to <2 x i16>* + %tmp3 = load <2 x i16>, <2 x i16>* %tmp2, align 2 %tmp4 = bitcast <2 x i16> %tmp3 to <2 x half> ret void } Index: test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll +++ test/CodeGen/AMDGPU/promote-alloca-array-allocation.ll @@ -1,47 +1,48 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa-amdgiz -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Make sure this allocates the correct size if the alloca has a non-0 ; number of elements. ; CHECK-LABEL: @array_alloca( -; CHECK: %stack = alloca i32, i32 5, align 4 +; CHECK: %stack = alloca i32, i32 5, align 4, addrspace(5) define amdgpu_kernel void @array_alloca(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { entry: - %stack = alloca i32, i32 5, align 4 + %stack = alloca i32, i32 5, align 4, addrspace(5) %ld0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0 - %ld2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0 + %ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %ld2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1 - %ld3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1 + %ld3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %ld3, i32 addrspace(1)* %arrayidx13 ret void } ; CHECK-LABEL: @array_alloca_dynamic( -; CHECK: %stack = alloca i32, i32 %size, align 4 +; CHECK: %stack = alloca i32, i32 %size, align 4, addrspace(5) define amdgpu_kernel void @array_alloca_dynamic(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %size) #0 { entry: - %stack = alloca i32, i32 %size, align 4 + %stack = alloca i32, i32 %size, align 4, addrspace(5) %ld0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %stack, i32 %ld0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %ld1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %stack, i32 %ld1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds i32, i32* %stack, i32 0 - %ld2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 %ld1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 0 + %ld2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %ld2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds i32, i32* %stack, i32 1 - %ld3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds i32, i32 addrspace(5)* %stack, i32 1 + %ld3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %ld3, i32 addrspace(1)* %arrayidx13 ret void Index: test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll +++ test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s 2>&1 | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; FIXME: Error is misleading because it's not an indirect call. @@ -7,20 +8,20 @@ ; Make sure that AMDGPUPromoteAlloca doesn't crash if the called ; function is a constantexpr cast of a function. -declare void @foo(float*) #0 +declare void @foo(float addrspace(5)*) #0 declare void @foo.varargs(...) #0 ; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo define amdgpu_kernel void @crash_call_constexpr_cast() #0 { - %alloca = alloca i32 - call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0 + %alloca = alloca i32, addrspace(5) + call void bitcast (void (float addrspace(5)*)* @foo to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0 ret void } ; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 { - %alloca = alloca i32 - call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0 + %alloca = alloca i32, addrspace(5) + call void bitcast (void (...)* @foo.varargs to void (i32 addrspace(5)*)*)(i32 addrspace(5)* %alloca) #0 ret void } Index: test/CodeGen/AMDGPU/promote-alloca-globals.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-globals.ll +++ test/CodeGen/AMDGPU/promote-alloca-globals.ll @@ -1,5 +1,6 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown-amdgiz -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" @global_array0 = internal unnamed_addr addrspace(3) global [750 x [10 x i32]] undef, align 4 @@ -12,19 +13,19 @@ define amdgpu_kernel void @promote_alloca_size_256(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: - %stack = alloca [10 x i32], align 4 + %stack = alloca [10 x i32], align 4, addrspace(5) %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 0 + %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [10 x i32], [10 x i32] addrspace(5)* %stack, i32 0, i32 1 + %tmp3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %tmp3, i32 addrspace(1)* %arrayidx13 %v0 = getelementptr inbounds [750 x [10 x i32]], [750 x [10 x i32]] addrspace(3)* @global_array0, i32 0, i32 0, i32 0 Index: test/CodeGen/AMDGPU/promote-alloca-lifetime.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-lifetime.ll +++ test/CodeGen/AMDGPU/promote-alloca-lifetime.ll @@ -1,7 +1,8 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-promote-alloca %s | FileCheck -check-prefix=OPT %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 -declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 +declare void @llvm.lifetime.start.p0i8(i64, i8 addrspace(5)* nocapture) #0 +declare void @llvm.lifetime.end.p0i8(i64, i8 addrspace(5)* nocapture) #0 ; OPT-LABEL: @use_lifetime_promotable_lds( ; OPT-NOT: alloca i32 @@ -9,13 +10,13 @@ ; OPT: store i32 %tmp3, i32 addrspace(3)* define amdgpu_kernel void @use_lifetime_promotable_lds(i32 addrspace(1)* %arg) #2 { bb: - %tmp = alloca i32, align 4 - %tmp1 = bitcast i32* %tmp to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp1) + %tmp = alloca i32, align 4, addrspace(5) + %tmp1 = bitcast i32 addrspace(5)* %tmp to i8 addrspace(5)* + call void @llvm.lifetime.start.p0i8(i64 4, i8 addrspace(5)* %tmp1) %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 %tmp3 = load i32, i32 addrspace(1)* %tmp2 - store i32 %tmp3, i32* %tmp - call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp1) + store i32 %tmp3, i32 addrspace(5)* %tmp + call void @llvm.lifetime.end.p0i8(i64 4, i8 addrspace(5)* %tmp1) ret void } Index: test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -1,26 +1,27 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p1i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 +declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(5)* nocapture, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #0 -declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1 +declare i32 @llvm.objectsize.i32.p0i8(i8 addrspace(5)*, i1, i1) #1 ; CHECK-LABEL: @promote_with_memcpy( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %alloca = alloca [17 x i32], align 4 - %alloca.bc = bitcast [17 x i32]* %alloca to i8* + %alloca = alloca [17 x i32], align 4, addrspace(5) + %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p1i8.i32(i8 addrspace(5)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(5)* %alloca.bc, i32 68, i32 4, i1 false) ret void } @@ -29,12 +30,12 @@ ; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) ; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %alloca = alloca [17 x i32], align 4 - %alloca.bc = bitcast [17 x i32]* %alloca to i8* + %alloca = alloca [17 x i32], align 4, addrspace(5) + %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memmove.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memmove.p0i8.p1i8.i32(i8 addrspace(5)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) + call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(5)* %alloca.bc, i32 68, i32 4, i1 false) ret void } @@ -42,11 +43,11 @@ ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false) define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %alloca = alloca [17 x i32], align 4 - %alloca.bc = bitcast [17 x i32]* %alloca to i8* + %alloca = alloca [17 x i32], align 4, addrspace(5) + %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memset.p0i8.i32(i8* %alloca.bc, i8 7, i32 68, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8 addrspace(5)* %alloca.bc, i8 7, i32 68, i32 4, i1 false) ret void } @@ -54,9 +55,9 @@ ; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}} ; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false, i1 false) define amdgpu_kernel void @promote_with_objectsize(i32 addrspace(1)* %out) #0 { - %alloca = alloca [17 x i32], align 4 - %alloca.bc = bitcast [17 x i32]* %alloca to i8* - %size = call i32 @llvm.objectsize.i32.p0i8(i8* %alloca.bc, i1 false, i1 false) + %alloca = alloca [17 x i32], align 4, addrspace(5) + %alloca.bc = bitcast [17 x i32] addrspace(5)* %alloca to i8 addrspace(5)* + %size = call i32 @llvm.objectsize.i32.p0i8(i8 addrspace(5)* %alloca.bc, i1 false, i1 false) store i32 %size, i32 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; This shows that the amount of LDS estimate is sensitive to the order ; of the LDS globals. @@ -32,19 +33,19 @@ ; GCN: workgroup_group_segment_byte_size = 2340 define amdgpu_kernel void @promote_alloca_size_order_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %tmp0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %tmp3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %tmp3, i32 addrspace(1)* %arrayidx13 @@ -64,19 +65,19 @@ ; GCN: workgroup_group_segment_byte_size = 2352 define amdgpu_kernel void @promote_alloca_size_order_1(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %tmp0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %tmp3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %tmp3, i32 addrspace(1)* %arrayidx13 @@ -102,19 +103,19 @@ ; GCN: workgroup_group_segment_byte_size = 1060 define amdgpu_kernel void @promote_alloca_align_pad_guess_over_limit(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in, i32 %idx) #0 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %tmp0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 - store i32 4, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp0 + store i32 4, i32 addrspace(5)* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx10, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp1 + store i32 5, i32 addrspace(5)* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 + %tmp2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx12 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 + %tmp3 = load i32, i32 addrspace(5)* %arrayidx12 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 store i32 %tmp3, i32 addrspace(1)* %arrayidx13 Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; This normally would be fixed by instcombine to be compare to the GEP ; indices @@ -9,10 +10,10 @@ ; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b ; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1 define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { - %alloca = alloca [16 x i32], align 4 - %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a - %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b - %cmp = icmp eq i32* %ptr0, %ptr1 + %alloca = alloca [16 x i32], align 4, addrspace(5) + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a + %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b + %cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1 %zext = zext i1 %cmp to i32 store volatile i32 %zext, i32 addrspace(1)* %out ret void @@ -23,9 +24,9 @@ ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a ; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { - %alloca = alloca [16 x i32], align 4 - %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a - %cmp = icmp eq i32* %ptr0, null + %alloca = alloca [16 x i32], align 4, addrspace(5) + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a + %cmp = icmp eq i32 addrspace(5)* %ptr0, null %zext = zext i1 %cmp to i32 store volatile i32 %zext, i32 addrspace(1)* %out ret void @@ -36,29 +37,29 @@ ; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a ; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0 define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { - %alloca = alloca [16 x i32], align 4 - %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a - %cmp = icmp eq i32* null, %ptr0 + %alloca = alloca [16 x i32], align 4, addrspace(5) + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a + %cmp = icmp eq i32 addrspace(5)* null, %ptr0 %zext = zext i1 %cmp to i32 store volatile i32 %zext, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( -; CHECK: %alloca = alloca [16 x i32], align 4 -; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a -; CHECK: %ptr1 = call i32* @get_unknown_pointer() -; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1 +; CHECK: %alloca = alloca [16 x i32], align 4, addrspace(5) +; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a +; CHECK: %ptr1 = call i32 addrspace(5)* @get_unknown_pointer() +; CHECK: %cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1 define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { - %alloca = alloca [16 x i32], align 4 - %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a - %ptr1 = call i32* @get_unknown_pointer() - %cmp = icmp eq i32* %ptr0, %ptr1 + %alloca = alloca [16 x i32], align 4, addrspace(5) + %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a + %ptr1 = call i32 addrspace(5)* @get_unknown_pointer() + %cmp = icmp eq i32 addrspace(5)* %ptr0, %ptr1 %zext = zext i1 %cmp to i32 store volatile i32 %zext, i32 addrspace(1)* %out ret void } -declare i32* @get_unknown_pointer() #0 +declare i32 addrspace(5)* @get_unknown_pointer() #0 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" } Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; CHECK-LABEL: @branch_ptr_var_same_alloca( @@ -15,20 +16,20 @@ ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 { entry: - %alloca = alloca [64 x i32], align 4 + %alloca = alloca [64 x i32], align 4, addrspace(5) br i1 undef, label %if, label %else if: - %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a br label %endif else: - %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b + %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %b br label %endif endif: - %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] - store i32 0, i32* %phi.ptr, align 4 + %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ] + store i32 0, i32 addrspace(5)* %phi.ptr, align 4 ret void } @@ -36,16 +37,16 @@ ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ] define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { entry: - %alloca = alloca [64 x i32], align 4 + %alloca = alloca [64 x i32], align 4, addrspace(5) br i1 undef, label %if, label %endif if: - %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a br label %endif endif: - %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ] - store i32 0, i32* %phi.ptr, align 4 + %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ null, %entry ] + store i32 0, i32 addrspace(5)* %phi.ptr, align 4 ret void } @@ -53,16 +54,16 @@ ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ] define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { entry: - %alloca = alloca [64 x i32], align 4 + %alloca = alloca [64 x i32], align 4, addrspace(5) br i1 undef, label %if, label %endif if: - %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a br label %endif endif: - %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ] - store i32 0, i32* %phi.ptr, align 4 + %phi.ptr = phi i32 addrspace(5)* [ null, %entry ], [ %arrayidx0, %if ] + store i32 0, i32 addrspace(5)* %phi.ptr, align 4 ret void } @@ -75,44 +76,44 @@ ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 define amdgpu_kernel void @one_phi_value(i32 %a) #0 { entry: - %alloca = alloca [64 x i32], align 4 - %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + %alloca = alloca [64 x i32], align 4, addrspace(5) + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a br label %exit exit: - %phi.ptr = phi i32* [ %arrayidx0, %entry ] - store i32 0, i32* %phi.ptr, align 4 + %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %entry ] + store i32 0, i32 addrspace(5)* %phi.ptr, align 4 ret void } ; CHECK-LABEL: @branch_ptr_alloca_unknown_obj( -; CHECK: %alloca = alloca [64 x i32], align 4 +; CHECK: %alloca = alloca [64 x i32], align 4, addrspace(5) ; CHECK: if: -; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a +; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a ; CHECK: else: -; CHECK: %arrayidx1 = call i32* @get_unknown_pointer() +; CHECK: %arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer() ; CHECK: endif: -; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] -; CHECK: store i32 0, i32* %phi.ptr, align 4 +; CHECK: %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ] +; CHECK: store i32 0, i32 addrspace(5)* %phi.ptr, align 4 define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 { entry: - %alloca = alloca [64 x i32], align 4 + %alloca = alloca [64 x i32], align 4, addrspace(5) br i1 undef, label %if, label %else if: - %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a + %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 %a br label %endif else: - %arrayidx1 = call i32* @get_unknown_pointer() + %arrayidx1 = call i32 addrspace(5)* @get_unknown_pointer() br label %endif endif: - %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] - store i32 0, i32* %phi.ptr, align 4 + %phi.ptr = phi i32 addrspace(5)* [ %arrayidx0, %if ], [ %arrayidx1, %else ] + store i32 0, i32 addrspace(5)* %phi.ptr, align 4 ret void } @@ -122,7 +123,7 @@ ; int i = 0; ; #pragma nounroll -; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i) +; for (int addrspace(5)* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i) ; { ; *p = i; ; } @@ -132,13 +133,13 @@ ; GetUnderlyingObjects when looking at the icmp user. ; CHECK-LABEL: @ptr_induction_var_same_alloca( -; CHECK: %alloca = alloca [64 x i32], align 4 -; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] +; CHECK: %alloca = alloca [64 x i32], align 4, addrspace(5) +; CHECK: phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 { entry: - %alloca = alloca [64 x i32], align 4 - %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 - %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48 + %alloca = alloca [64 x i32], align 4, addrspace(5) + %arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2 + %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 48 br label %for.body for.cond.cleanup: ; preds = %for.body @@ -146,38 +147,38 @@ for.body: ; preds = %for.body, %entry %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] - store i32 %i.09, i32* %p.08, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 + %p.08 = phi i32 addrspace(5)* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] + store i32 %i.09, i32 addrspace(5)* %p.08, align 4 + %incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1 %inc = add nuw nsw i32 %i.09, 1 - %cmp = icmp eq i32* %incdec.ptr, %arrayidx1 + %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %arrayidx1 br i1 %cmp, label %for.cond.cleanup, label %for.body } -; extern int* get_unknown_pointer(void); +; extern int addrspace(5)* get_unknown_pointer(void); ; kernel void ptr_induction_var_alloca_unknown(void) ; { ; int alloca[64]; ; int i = 0; ; -; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i) +; for (int addrspace(5)* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i) ; { ; *p = i; ; } ; } ; CHECK-LABEL: @ptr_induction_var_alloca_unknown( -; CHECK: %alloca = alloca [64 x i32], align 4 -; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] -; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call +; CHECK: %alloca = alloca [64 x i32], align 4, addrspace(5) +; CHECK: %p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] +; CHECK: %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 { entry: - %alloca = alloca [64 x i32], align 4 - %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 - %call = tail call i32* @get_unknown_pointer() #2 - %cmp.7 = icmp eq i32* %arrayidx, %call + %alloca = alloca [64 x i32], align 4, addrspace(5) + %arrayidx = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %alloca, i32 0, i32 2 + %call = tail call i32 addrspace(5)* @get_unknown_pointer() #2 + %cmp.7 = icmp eq i32 addrspace(5)* %arrayidx, %call br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader for.body.preheader: ; preds = %entry @@ -191,14 +192,14 @@ for.body: ; preds = %for.body, %for.body.preheader %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] - store i32 %i.09, i32* %p.08, align 4 - %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 + %p.08 = phi i32 addrspace(5)* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] + store i32 %i.09, i32 addrspace(5)* %p.08, align 4 + %incdec.ptr = getelementptr inbounds i32, i32 addrspace(5)* %p.08, i32 1 %inc = add nuw nsw i32 %i.09, 1 - %cmp = icmp eq i32* %incdec.ptr, %call + %cmp = icmp eq i32 addrspace(5)* %incdec.ptr, %call br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body } -declare i32* @get_unknown_pointer() #0 +declare i32 addrspace(5)* @get_unknown_pointer() #0 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" } Index: test/CodeGen/AMDGPU/promote-alloca-volatile.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-volatile.ll +++ test/CodeGen/AMDGPU/promote-alloca-volatile.ll @@ -1,27 +1,28 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa-amdgiz -amdgpu-promote-alloca < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; CHECK-LABEL: @volatile_load( ; CHECK: alloca [4 x i32] -; CHECK: load volatile i32, i32* +; CHECK: load volatile i32, i32 addrspace(5)* define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: - %stack = alloca [4 x i32], align 4 + %stack = alloca [4 x i32], align 4, addrspace(5) %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp - %load = load volatile i32, i32* %arrayidx1 + %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp + %load = load volatile i32, i32 addrspace(5)* %arrayidx1 store i32 %load, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: @volatile_store( ; CHECK: alloca [4 x i32] -; CHECK: store volatile i32 %tmp, i32* +; CHECK: store volatile i32 %tmp, i32 addrspace(5)* define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: - %stack = alloca [4 x i32], align 4 + %stack = alloca [4 x i32], align 4, addrspace(5) %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp - store volatile i32 %tmp, i32* %arrayidx1 + %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %stack, i32 0, i32 %tmp + store volatile i32 %tmp, i32 addrspace(5)* %arrayidx1 ret void } @@ -32,11 +33,11 @@ ; CHECK: load volatile double define amdgpu_kernel void @volatile_and_non_volatile_load(double addrspace(1)* nocapture %arg, i32 %arg1) #0 { bb: - %tmp = alloca double, align 8 - store double 0.000000e+00, double* %tmp, align 8 + %tmp = alloca double, align 8, addrspace(5) + store double 0.000000e+00, double addrspace(5)* %tmp, align 8 - %tmp4 = load double, double* %tmp, align 8 - %tmp5 = load volatile double, double* %tmp, align 8 + %tmp4 = load double, double addrspace(5)* %tmp, align 8 + %tmp5 = load volatile double, double addrspace(5)* %tmp, align 8 store double %tmp4, double addrspace(1)* %arg ret void Index: test/CodeGen/AMDGPU/pv-packing.ll =================================================================== --- test/CodeGen/AMDGPU/pv-packing.ll +++ test/CodeGen/AMDGPU/pv-packing.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman | FileCheck %s ;CHECK: DOT4 T{{[0-9]\.X}} ;CHECK: MULADD_IEEE * T{{[0-9]\.W}} Index: test/CodeGen/AMDGPU/pv.ll =================================================================== --- test/CodeGen/AMDGPU/pv.ll +++ test/CodeGen/AMDGPU/pv.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz < %s | FileCheck %s ; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) ; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X Index: test/CodeGen/AMDGPU/r600-constant-array-fixup.ll =================================================================== --- test/CodeGen/AMDGPU/r600-constant-array-fixup.ll +++ test/CodeGen/AMDGPU/r600-constant-array-fixup.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj -march=r600 -mcpu=cypress -verify-machineinstrs < %s | llvm-readobj -relocations -symbols | FileCheck %s +; RUN: llc -filetype=obj -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | llvm-readobj -relocations -symbols | FileCheck %s @arr = internal unnamed_addr addrspace(2) constant [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 4 Index: test/CodeGen/AMDGPU/r600-encoding.ll =================================================================== --- test/CodeGen/AMDGPU/r600-encoding.ll +++ test/CodeGen/AMDGPU/r600-encoding.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600 %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600 %s ; The earliest R600 GPUs have a slightly different encoding than the rest of ; the VLIW4/5 GPUs. Index: test/CodeGen/AMDGPU/r600-export-fix.ll =================================================================== --- test/CodeGen/AMDGPU/r600-export-fix.ll +++ test/CodeGen/AMDGPU/r600-export-fix.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cedar | FileCheck %s ;CHECK: EXPORT T{{[0-9]}}.XYZW ;CHECK: EXPORT T{{[0-9]}}.0000 Index: test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll =================================================================== --- test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll +++ test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cayman < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s define amdgpu_ps void @main(<4 x float> inreg %arg, <4 x float> inreg %arg1) { main_body: Index: test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll =================================================================== --- test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll +++ test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress -start-after safe-stack %s -o - | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -start-after safe-stack %s -o - | FileCheck %s ; Don't crash ; CHECK: MAX_UINT Index: test/CodeGen/AMDGPU/r600.global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/r600.global_atomics.ll +++ test/CodeGen/AMDGPU/r600.global_atomics.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; TODO: Add _RTN versions and merge with the GCN test Index: test/CodeGen/AMDGPU/r600.private-memory.ll =================================================================== --- test/CodeGen/AMDGPU/r600.private-memory.ll +++ test/CodeGen/AMDGPU/r600.private-memory.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -12,13 +13,13 @@ define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: - %0 = alloca [2 x i32] - %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0 - %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1 - store i32 0, i32* %1 - store i32 1, i32* %2 - %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in - %4 = load i32, i32* %3 + %0 = alloca [2 x i32], addrspace(5) + %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 + %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %1 + store i32 1, i32 addrspace(5)* %2 + %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in + %4 = load i32, i32 addrspace(5)* %3 %5 = call i32 @llvm.r600.read.tidig.x() %6 = add i32 %4, %5 store i32 %6, i32 addrspace(1)* %out Index: test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll +++ test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}tgid_x: ; EG: MEM_RAT_CACHELESS STORE_RAW T1.X @@ -55,7 +55,7 @@ } ; FUNC-LABEL: {{^}}test_implicit: -; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 +; 36 prepended implicit bytes + 4(out pointer) + 4 addrspace(5)*4 = 56 ; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56 define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() Index: test/CodeGen/AMDGPU/r600cfg.ll =================================================================== --- test/CodeGen/AMDGPU/r600cfg.ll +++ test/CodeGen/AMDGPU/r600cfg.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) { main_body: Index: test/CodeGen/AMDGPU/rcp-pattern.ll =================================================================== --- test/CodeGen/AMDGPU/rcp-pattern.ll +++ test/CodeGen/AMDGPU/rcp-pattern.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rcp_pat_f32: ; GCN: s_load_dword [[SRC:s[0-9]+]] Index: test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll =================================================================== --- test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll +++ test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: invalid register "flat_scratch_lo" for subtarget. Index: test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll =================================================================== --- test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll +++ test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: invalid type for register "exec". Index: test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll =================================================================== --- test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll +++ test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s 2>&1 | FileCheck %s ; CHECK: invalid type for register "m0". Index: test/CodeGen/AMDGPU/read_register.ll =================================================================== --- test/CodeGen/AMDGPU/read_register.ll +++ test/CodeGen/AMDGPU/read_register.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.read_register.i32(metadata) #0 declare i64 @llvm.read_register.i64(metadata) #0 Index: test/CodeGen/AMDGPU/readcyclecounter.ll =================================================================== --- test/CodeGen/AMDGPU/readcyclecounter.ll +++ test/CodeGen/AMDGPU/readcyclecounter.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare i64 @llvm.readcyclecounter() #0 Index: test/CodeGen/AMDGPU/reduce-load-width-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/reduce-load-width-alignment.ll +++ test/CodeGen/AMDGPU/reduce-load-width-alignment.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}reduce_i64_load_align_4_width_to_i32: ; GCN: buffer_load_dword [[VAL:v[0-9]+]] Index: test/CodeGen/AMDGPU/reduce-store-width-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/reduce-store-width-alignment.ll +++ test/CodeGen/AMDGPU/reduce-store-width-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}store_v2i32_as_v4i16_align_4: ; GCN: s_load_dwordx2 Index: test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll =================================================================== --- test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll +++ test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -o /dev/null < %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o /dev/null < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -o /dev/null < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o /dev/null < %s ; The register coalescer introduces a verifier error which later ; results in a crash during scheduling. Index: test/CodeGen/AMDGPU/reorder-stores.ll =================================================================== --- test/CodeGen/AMDGPU/reorder-stores.ll +++ test/CodeGen/AMDGPU/reorder-stores.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store: ; SI: buffer_load_dwordx4 Index: test/CodeGen/AMDGPU/ret.ll =================================================================== --- test/CodeGen/AMDGPU/ret.ll +++ test/CodeGen/AMDGPU/ret.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vgpr: ; GCN: v_mov_b32_e32 v1, v0 Index: test/CodeGen/AMDGPU/ret_jump.ll =================================================================== --- test/CodeGen/AMDGPU/ret_jump.ll +++ test/CodeGen/AMDGPU/ret_jump.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; This should end with an no-op sequence of exec mask manipulations ; Mask should be in original state after executed unreachable block Index: test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll =================================================================== --- test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll +++ test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-any-address-space-out-arguments -amdgpu-rewrite-out-arguments < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa-amdgiz -amdgpu-any-address-space-out-arguments -amdgpu-rewrite-out-arguments < %s | FileCheck %s ; CHECK: %void_one_out_non_private_arg_i32_1_use = type { i32 } ; CHECK: %bitcast_pointer_as1 = type { <3 x i32> } Index: test/CodeGen/AMDGPU/rotl.i64.ll =================================================================== --- test/CodeGen/AMDGPU/rotl.i64.ll +++ test/CodeGen/AMDGPU/rotl.i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s ; BOTH-LABEL: {{^}}s_rotl_i64: ; BOTH-DAG: s_lshl_b64 Index: test/CodeGen/AMDGPU/rotl.ll =================================================================== --- test/CodeGen/AMDGPU/rotl.ll +++ test/CodeGen/AMDGPU/rotl.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rotl_i32: ; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x Index: test/CodeGen/AMDGPU/rotr.i64.ll =================================================================== --- test/CodeGen/AMDGPU/rotr.i64.ll +++ test/CodeGen/AMDGPU/rotr.i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=BOTH %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=BOTH %s ; BOTH-LABEL: {{^}}s_rotr_i64: ; BOTH-DAG: s_sub_i32 Index: test/CodeGen/AMDGPU/rotr.ll =================================================================== --- test/CodeGen/AMDGPU/rotr.ll +++ test/CodeGen/AMDGPU/rotr.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}rotr_i32: ; R600: BIT_ALIGN_INT Index: test/CodeGen/AMDGPU/rsq.ll =================================================================== --- test/CodeGen/AMDGPU/rsq.ll +++ test/CodeGen/AMDGPU/rsq.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.sqrt.f32(float) nounwind readnone Index: test/CodeGen/AMDGPU/rv7x0_count3.ll =================================================================== --- test/CodeGen/AMDGPU/rv7x0_count3.ll +++ test/CodeGen/AMDGPU/rv7x0_count3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=rv710 | FileCheck %s ; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80] define amdgpu_vs void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) { Index: test/CodeGen/AMDGPU/s_addk_i32.ll =================================================================== --- test/CodeGen/AMDGPU/s_addk_i32.ll +++ test/CodeGen/AMDGPU/s_addk_i32.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_addk_i32_k0: ; SI: s_load_dword [[VAL:s[0-9]+]] Index: test/CodeGen/AMDGPU/s_movk_i32.ll =================================================================== --- test/CodeGen/AMDGPU/s_movk_i32.ll +++ test/CodeGen/AMDGPU/s_movk_i32.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_movk_i32_k0: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} Index: test/CodeGen/AMDGPU/s_mulk_i32.ll =================================================================== --- test/CodeGen/AMDGPU/s_mulk_i32.ll +++ test/CodeGen/AMDGPU/s_mulk_i32.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}s_mulk_i32_k0: ; SI: s_load_dword [[VAL:s[0-9]+]] Index: test/CodeGen/AMDGPU/saddo.ll =================================================================== --- test/CodeGen/AMDGPU/saddo.ll +++ test/CodeGen/AMDGPU/saddo.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 @@ -17,7 +17,7 @@ ; Make sure we aren't using VGPRs for the source operand of s_mov_b64 ; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v -; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* +; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_ addrspace(5)* ; instructions ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 Index: test/CodeGen/AMDGPU/sampler-resource-id.ll =================================================================== --- test/CodeGen/AMDGPU/sampler-resource-id.ll +++ test/CodeGen/AMDGPU/sampler-resource-id.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_0: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] @@ -46,20 +46,20 @@ !0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50} !10 = !{!"kernel_arg_addr_space", i32 0, i32 1} !20 = !{!"kernel_arg_access_qual", !"none", !"none"} -!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"} -!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"} +!30 = !{!"kernel_arg_type", !"sampler_t", !"int addrspace(5)*"} +!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int addrspace(5)*"} !50 = !{!"kernel_arg_type_qual", !"", !""} !1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51} !11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1} !21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"} -!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"} -!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"} +!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int addrspace(5)*"} +!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int addrspace(5)*"} !51 = !{!"kernel_arg_type_qual", !"", !"", !""} !2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52} !12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1} !22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"} -!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"} -!42 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"} +!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int addrspace(5)*"} +!42 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int addrspace(5)*"} !52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} Index: test/CodeGen/AMDGPU/scalar_to_vector.ll =================================================================== --- test/CodeGen/AMDGPU/scalar_to_vector.ll +++ test/CodeGen/AMDGPU/scalar_to_vector.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; XXX - Why the packing? ; GCN-LABEL: {{^}}scalar_to_vector_v2i32: Index: test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll +++ test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s ; REQUIRES: asserts define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { Index: test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll +++ test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s ; REQUIRES: asserts define amdgpu_kernel void @main() #0 { Index: test/CodeGen/AMDGPU/schedule-fs-loop.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-fs-loop.ll +++ test/CodeGen/AMDGPU/schedule-fs-loop.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs < %s ; REQUIRES: asserts define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { Index: test/CodeGen/AMDGPU/schedule-global-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-global-loads.ll +++ test/CodeGen/AMDGPU/schedule-global-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s ; FIXME: This currently doesn't do a great job of clustering the ; loads, which end up with extra moves between them. Right now, it Index: test/CodeGen/AMDGPU/schedule-if-2.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-if-2.ll +++ test/CodeGen/AMDGPU/schedule-if-2.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs ;REQUIRES: asserts define amdgpu_kernel void @main() { Index: test/CodeGen/AMDGPU/schedule-if.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-if.ll +++ test/CodeGen/AMDGPU/schedule-if.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs ;REQUIRES: asserts define amdgpu_kernel void @main() { Index: test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll +++ test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=VI -check-prefix=GCN %s ; FUNC-LABEL: {{^}}cluster_arg_loads: ; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9 Index: test/CodeGen/AMDGPU/schedule-regpressure-limit.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-regpressure-limit.ll +++ test/CodeGen/AMDGPU/schedule-regpressure-limit.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck %s ; We expect a two digit VGPR usage here, not a three digit. ; CHECK: NumVgprs: {{[0-9][0-9]$}} Index: test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll +++ test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s ; SI: NumSgprs: {{[1-9]$}} ; SI: NumVgprs: {{[1-9]$}} Index: test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll +++ test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -march=amdgcn -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -mattr=+vgpr-spilling < %s | FileCheck -check-prefix=GCN %s declare void @llvm.amdgcn.s.barrier() nounwind convergent Index: test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll +++ test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -stress-sched -verify-misched ;REQUIRES: asserts define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) { Index: test/CodeGen/AMDGPU/scheduler-subrange-crash.ll =================================================================== --- test/CodeGen/AMDGPU/scheduler-subrange-crash.ll +++ test/CodeGen/AMDGPU/scheduler-subrange-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck %s ; REQUIRES: asserts ; ; This test used to crash with the following assertion: Index: test/CodeGen/AMDGPU/scratch-buffer.ll =================================================================== --- test/CodeGen/AMDGPU/scratch-buffer.ll +++ test/CodeGen/AMDGPU/scratch-buffer.ll @@ -1,5 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; When a frame index offset is more than 12-bits, make sure we don't store ; it in mubuf's offset field. @@ -15,26 +16,26 @@ define amdgpu_kernel void @legal_offset_fi(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) { entry: - %scratch0 = alloca [8192 x i32] - %scratch1 = alloca [8192 x i32] + %scratch0 = alloca [8192 x i32], addrspace(5) + %scratch1 = alloca [8192 x i32], addrspace(5) - %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0 - store i32 1, i32* %scratchptr0 + %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 0 + store i32 1, i32 addrspace(5)* %scratchptr0 - %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0 - store i32 2, i32* %scratchptr1 + %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 0 + store i32 2, i32 addrspace(5)* %scratchptr1 %cmp = icmp eq i32 %cond, 0 br i1 %cmp, label %if, label %else if: - %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset - %if_value = load i32, i32* %if_ptr + %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset + %if_value = load i32, i32 addrspace(5)* %if_ptr br label %done else: - %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset - %else_value = load i32, i32* %else_ptr + %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset + %else_value = load i32, i32 addrspace(5)* %else_ptr br label %done done: @@ -55,29 +56,29 @@ define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { entry: - %scratch0 = alloca [8192 x i32] - %scratch1 = alloca [8192 x i32] + %scratch0 = alloca [8192 x i32], addrspace(5) + %scratch1 = alloca [8192 x i32], addrspace(5) %offset0 = load i32, i32 addrspace(1)* %offsets - %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0 - store i32 %offset0, i32* %scratchptr0 + %scratchptr0 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %offset0 + store i32 %offset0, i32 addrspace(5)* %scratchptr0 %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1 %offset1 = load i32, i32 addrspace(1)* %offsetptr1 - %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1 - store i32 %offset1, i32* %scratchptr1 + %scratchptr1 = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %offset1 + store i32 %offset1, i32 addrspace(5)* %scratchptr1 %cmp = icmp eq i32 %cond, 0 br i1 %cmp, label %if, label %else if: - %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset - %if_value = load i32, i32* %if_ptr + %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset + %if_value = load i32, i32 addrspace(5)* %if_ptr br label %done else: - %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset - %else_value = load i32, i32* %else_ptr + %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset + %else_value = load i32, i32 addrspace(5)* %else_ptr br label %done done: @@ -90,10 +91,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:16{{$}} define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) { entry: - %array = alloca [8192 x i32] + %array = alloca [8192 x i32], addrspace(5) %ptr_offset = add i32 %offset, 4 - %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %ptr_offset - store i32 0, i32* %ptr + %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %ptr_offset + store i32 0, i32 addrspace(5)* %ptr ret void } @@ -101,11 +102,11 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:20 define amdgpu_kernel void @pos_vaddr_offset(i32 addrspace(1)* %out, i32 %offset) { entry: - %array = alloca [8192 x i32] - %ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 4 - store i32 0, i32* %ptr - %load_ptr = getelementptr [8192 x i32], [8192 x i32]* %array, i32 0, i32 %offset - %val = load i32, i32* %load_ptr + %array = alloca [8192 x i32], addrspace(5) + %ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 4 + store i32 0, i32 addrspace(5)* %ptr + %load_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %array, i32 0, i32 %offset + %val = load i32, i32 addrspace(5)* %load_ptr store i32 %val, i32 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/scripts/chaddr.sh =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/scripts/chaddr.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# +# script to convert .ll from old address space mapping +# to new address space mapping for AMDGPU. +# +# addr space change: +# 0 -> 5 +# 4 -> 0 +# + +if [[ $# -ne 2 ]]; then + echo "Convert .ll from old address space mapping to new address space mapping for AMDGPU" + echo "Usage: chaddr.sh file newfile" + exit +fi + +#set -x + +file=$1 +newf=$2 + +if grep amdgiz $file >/dev/null; then + exit +fi + +sed -e 's:\([]a-zA-Z0-9_][]a-zA-Z0-9_]*\)\*:\1 addrspace(5)*:g' \ + -e 's: addrspace(4)\*:*:g' \ + -e 's: addrspace(0)\*: addrspace(5)*:g' \ + -e 's:-mtriple=\([^ ]*\):-mtriple=\1-amdgiz:g' \ + -e 's:-march=\([^ ]*\):-march=\1 -mtriple=\1---amdgiz:g' \ + -e 's:-mtriple=\([^ ]*\) \(.*\) -mtriple=\([^ ]*\): \2 -mtriple=\3:g' \ + -e 's: = alloca \(.*\): = alloca \1, addrspace(5):g' \ + $file >$newf + +if ! grep mtriple $newf >/dev/null; then + sed -i -e 's:-march=\([^ ]*\):-march=\1 -mtriple=\1---amdgiz:g' $newf +fi + +## add missing datalayout +if grep alloca $file >/dev/null && ! grep datalayout $file >/dev/null; then + line=$(cat -n $file | grep ';.*RUN'| tail -1 |while read a b; do echo $a; done) + let "line=line+1" + sed -i "${line}i"'target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"' $newf +fi Index: test/CodeGen/AMDGPU/sdiv.ll =================================================================== --- test/CodeGen/AMDGPU/sdiv.ll +++ test/CodeGen/AMDGPU/sdiv.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; The code generated by sdiv is long and complex and may frequently change. ; The goal of this test is to make sure the ISel doesn't fail. Index: test/CodeGen/AMDGPU/sdivrem24.ll =================================================================== --- test/CodeGen/AMDGPU/sdivrem24.ll +++ test/CodeGen/AMDGPU/sdivrem24.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}sdiv24_i8: ; SI: v_cvt_f32_i32 Index: test/CodeGen/AMDGPU/sdivrem64.ll =================================================================== --- test/CodeGen/AMDGPU/sdivrem64.ll +++ test/CodeGen/AMDGPU/sdivrem64.ll @@ -1,6 +1,6 @@ -;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}s_test_sdiv: ;EG: RECIP_UINT Index: test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- test/CodeGen/AMDGPU/sdwa-peephole.ll +++ test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=NOSDWA -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=SDWA -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=SDWA -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-sdwa-peephole=0 -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=NOSDWA -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=SDWA -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -amdgpu-sdwa-peephole -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=SDWA -check-prefix=GCN %s ; GCN-LABEL: {{^}}add_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; -------------------------------------------------------------------------------- ; Don't fold if fneg can fold into the source Index: test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}add_select_fabs_fabs_f32: ; GCN: buffer_load_dword [[X:v[0-9]+]] Index: test/CodeGen/AMDGPU/select-i1.ll =================================================================== --- test/CodeGen/AMDGPU/select-i1.ll +++ test/CodeGen/AMDGPU/select-i1.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI Index: test/CodeGen/AMDGPU/select-opt.ll =================================================================== --- test/CodeGen/AMDGPU/select-opt.ll +++ test/CodeGen/AMDGPU/select-opt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Make sure to test with f32 and i32 compares. If we have to use float ; compares, we always have multiple condition registers. If we can do Index: test/CodeGen/AMDGPU/select-vectors.ll =================================================================== --- test/CodeGen/AMDGPU/select-vectors.ll +++ test/CodeGen/AMDGPU/select-vectors.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; Test expansion of scalar selects on vectors. ; Evergreen not enabled since it seems to be having problems with doubles. Index: test/CodeGen/AMDGPU/select.f16.ll =================================================================== --- test/CodeGen/AMDGPU/select.f16.ll +++ test/CodeGen/AMDGPU/select.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}select_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/select.ll =================================================================== --- test/CodeGen/AMDGPU/select.ll +++ test/CodeGen/AMDGPU/select.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; Normally icmp + select is optimized to select_cc, when this happens the Index: test/CodeGen/AMDGPU/select64.ll =================================================================== --- test/CodeGen/AMDGPU/select64.ll +++ test/CodeGen/AMDGPU/select64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}select0: ; i64 select should be split into two i32 selects, and we shouldn't need Index: test/CodeGen/AMDGPU/selectcc-cnd.ll =================================================================== --- test/CodeGen/AMDGPU/selectcc-cnd.ll +++ test/CodeGen/AMDGPU/selectcc-cnd.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE ;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x, Index: test/CodeGen/AMDGPU/selectcc-cnde-int.ll =================================================================== --- test/CodeGen/AMDGPU/selectcc-cnde-int.ll +++ test/CodeGen/AMDGPU/selectcc-cnde-int.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK-NOT: SETE_INT ;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x, Index: test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll =================================================================== --- test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll +++ test/CodeGen/AMDGPU/selectcc-icmp-select-float.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; Note additional optimizations may cause this SGT to be replaced with a -; CND* instruction. +; CND addrspace(5)* instruction. ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, literal.x, ; CHECK-NEXT: -1 ; Test a selectcc with i32 LHS/RHS and float True/False Index: test/CodeGen/AMDGPU/selectcc-opt.ll =================================================================== --- test/CodeGen/AMDGPU/selectcc-opt.ll +++ test/CodeGen/AMDGPU/selectcc-opt.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_a: @@ -56,7 +56,7 @@ ret void } -; Test a CND*_INT instruction with float true/false values +; Test a CND addrspace(5)*_INT instruction with float true/false values ; EG-LABEL: {{^}}test_c: ; EG: CND{{[GTE]+}}_INT define amdgpu_kernel void @test_c(float addrspace(1)* %out, i32 %in) { Index: test/CodeGen/AMDGPU/selectcc.ll =================================================================== --- test/CodeGen/AMDGPU/selectcc.ll +++ test/CodeGen/AMDGPU/selectcc.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}selectcc_i64: ; EG: XOR_INT Index: test/CodeGen/AMDGPU/selected-stack-object.ll =================================================================== --- test/CodeGen/AMDGPU/selected-stack-object.ll +++ test/CodeGen/AMDGPU/selected-stack-object.ll @@ -2,7 +2,7 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s ; See also local-stack-slot-bug.ll ; This fails because a stack object is created during instruction selection. Index: test/CodeGen/AMDGPU/set-dx10.ll =================================================================== --- test/CodeGen/AMDGPU/set-dx10.ll +++ test/CodeGen/AMDGPU/set-dx10.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; These tests check that floating point comparisons which are used by select ; to store integer true (-1) and false (0) values are lowered to one of the -; SET*DX10 instructions. +; SET addrspace(5)*DX10 instructions. ; CHECK: {{^}}fcmp_une_select_fptosi: ; CHECK: LSHR Index: test/CodeGen/AMDGPU/setcc-equivalent.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-equivalent.ll +++ test/CodeGen/AMDGPU/setcc-equivalent.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG %s ; EG-LABEL: {{^}}and_setcc_setcc_i32: ; EG: AND_INT Index: test/CodeGen/AMDGPU/setcc-fneg-constant.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-fneg-constant.ll +++ test/CodeGen/AMDGPU/setcc-fneg-constant.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; Test fcmp pred (fneg x), c -> fcmp (swapped pred) x, -c combine. Index: test/CodeGen/AMDGPU/setcc-opt.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-opt.ll +++ test/CodeGen/AMDGPU/setcc-opt.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: ; GCN-NOT: v_cmp Index: test/CodeGen/AMDGPU/setcc-sext.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-sext.ll +++ test/CodeGen/AMDGPU/setcc-sext.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}setcc_sgt_true_sext: ; GCN: v_cmp_le_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/setcc64.ll =================================================================== --- test/CodeGen/AMDGPU/setcc64.ll +++ test/CodeGen/AMDGPU/setcc64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s| FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; XXX: Merge this into setcc, once R600 supports 64-bit operations Index: test/CodeGen/AMDGPU/seto.ll =================================================================== --- test/CodeGen/AMDGPU/seto.ll +++ test/CodeGen/AMDGPU/seto.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] Index: test/CodeGen/AMDGPU/setuo.ll =================================================================== --- test/CodeGen/AMDGPU/setuo.ll +++ test/CodeGen/AMDGPU/setuo.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}main: ; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]] Index: test/CodeGen/AMDGPU/sext-eliminate.ll =================================================================== --- test/CodeGen/AMDGPU/sext-eliminate.ll +++ test/CodeGen/AMDGPU/sext-eliminate.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_add: Index: test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll =================================================================== --- test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll +++ test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s ; ; EG-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount: ; EG: MEM_{{.*}} MSKOR [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; ; ; Most SALU instructions ignore control flow, so we need to make sure Index: test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll +++ test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; Copy VGPR -> SGPR used twice as an instruction operand, which is then ; used in an REG_SEQUENCE that also needs to be handled. Index: test/CodeGen/AMDGPU/sgpr-copy.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-copy.ll +++ test/CodeGen/AMDGPU/sgpr-copy.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}phi1: ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0 Index: test/CodeGen/AMDGPU/sgprcopies.ll =================================================================== --- test/CodeGen/AMDGPU/sgprcopies.ll +++ test/CodeGen/AMDGPU/sgprcopies.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}checkTwoBlocksWithUniformBranch ; GCN: BB0_2 Index: test/CodeGen/AMDGPU/shared-op-cycle.ll =================================================================== --- test/CodeGen/AMDGPU/shared-op-cycle.ll +++ test/CodeGen/AMDGPU/shared-op-cycle.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; CHECK: {{^}}main: ; CHECK: MULADD_IEEE * Index: test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Extract the high bit of the 1st quarter ; GCN-LABEL: {{^}}v_uextract_bit_31_i128: Index: test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll +++ test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Fails with -enable-var-scope ; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half. Index: test/CodeGen/AMDGPU/shift-i64-opts.ll =================================================================== --- test/CodeGen/AMDGPU/shift-i64-opts.ll +++ test/CodeGen/AMDGPU/shift-i64-opts.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=FAST64 -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=SLOW64 -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=FAST64 -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire < %s | FileCheck -check-prefix=SLOW64 -check-prefix=GCN %s ; lshr (i64 x), c: c > 32 => reg_sequence lshr (i32 hi_32(x)), (c - 32), 0 Index: test/CodeGen/AMDGPU/shl-add-to-add-shl.ll =================================================================== --- test/CodeGen/AMDGPU/shl-add-to-add-shl.ll +++ test/CodeGen/AMDGPU/shl-add-to-add-shl.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji < %s | FileCheck %s ; Check transformation shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) ; Only one shift if expected, GEP shall not produce a separate shift Index: test/CodeGen/AMDGPU/shl.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/shl.v2i16.ll +++ test/CodeGen/AMDGPU/shl.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s ; GCN-LABEL: {{^}}s_shl_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] Index: test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_constant.ll +++ test/CodeGen/AMDGPU/shl_add_constant.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/shl_add_ptr.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_ptr.ll +++ test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s ; Test that doing a shift of a pointer with a constant add will be ; folded into the constant offset addressing mode even if the add has Index: test/CodeGen/AMDGPU/shrink-add-sub-constant.ll =================================================================== --- test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Test that add/sub with a constant is swapped to sub/add with negated ; constant to minimize code size. Index: test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn---amdgiz -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; OPT-LABEL: @annotate_unreachable_noloop( Index: test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll +++ test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn---amdgiz -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; OPT-LABEL: @annotate_unreachable( Index: test/CodeGen/AMDGPU/si-annotate-cf.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cf.ll +++ test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: Index: test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll =================================================================== --- test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll +++ test/CodeGen/AMDGPU/si-annotate-cfg-loop-assert.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test: ; CHECK s_and_saveexec_b64 Index: test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll =================================================================== --- test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll +++ test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll @@ -1,4 +1,4 @@ -; RUN: llc -o - %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after expand-isel-pseudos | FileCheck %s +; RUN: llc -o - %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs -stop-after expand-isel-pseudos | FileCheck %s ; This test verifies that the instruction selection will add the implicit ; register operands in the correct order when modifying the opcode of an ; instruction to V_ADD_I32_e32. Index: test/CodeGen/AMDGPU/si-lod-bias.ll =================================================================== --- test/CodeGen/AMDGPU/si-lod-bias.ll +++ test/CodeGen/AMDGPU/si-lod-bias.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; This shader has the potential to generated illegal VGPR to SGPR copies if ; the wrong register class is used for the REG_SEQUENCE instructions. Index: test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll =================================================================== --- test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll +++ test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}if_with_kill: ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]], Index: test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll =================================================================== --- test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator: ; GCN: v_cmp_eq_u32 Index: test/CodeGen/AMDGPU/si-scheduler.ll =================================================================== --- test/CodeGen/AMDGPU/si-scheduler.ll +++ test/CodeGen/AMDGPU/si-scheduler.ll @@ -3,7 +3,7 @@ ; The only way the subtarget knows that the si machine scheduler is being used ; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend ; won't know what scheduler we are using. -; RUN: llc -march=amdgcn --misched=si -mattr=si-scheduler < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz --misched=si -mattr=si-scheduler < %s | FileCheck %s ; The test checks the "si" machine scheduler pass works correctly. Index: test/CodeGen/AMDGPU/si-sgpr-spill.ll =================================================================== --- test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=TOVGPR %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling,-mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=TOVGPR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+vgpr-spilling,-mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; These tests check that the compiler won't crash when it needs to spill ; SGPRs. Index: test/CodeGen/AMDGPU/si-spill-cf.ll =================================================================== --- test/CodeGen/AMDGPU/si-spill-cf.ll +++ test/CodeGen/AMDGPU/si-spill-cf.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s ; If this occurs it is likely due to reordering and the restore was ; originally supposed to happen before SI_END_CF. Index: test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll =================================================================== --- test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s ; Make sure this doesn't crash. ; ALL-LABEL: {{^}}test: Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) Index: test/CodeGen/AMDGPU/si-vector-hang.ll =================================================================== --- test/CodeGen/AMDGPU/si-vector-hang.ll +++ test/CodeGen/AMDGPU/si-vector-hang.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s ; CHECK: {{^}}test_8_min_char: ; CHECK: buffer_store_byte Index: test/CodeGen/AMDGPU/sign_extend.ll =================================================================== --- test/CodeGen/AMDGPU/sign_extend.ll +++ test/CodeGen/AMDGPU/sign_extend.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}s_sext_i1_to_i32: ; GCN: v_cndmask_b32_e64 Index: test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll =================================================================== --- test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck %s ; Check we can compile this bugpoint-reduced test without an ; infinite loop in TLI.SimplifyDemandedBits() due to failure Index: test/CodeGen/AMDGPU/sint_to_fp.f64.ll =================================================================== --- test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/sint_to_fp.i64.ll =================================================================== --- test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600 Index: test/CodeGen/AMDGPU/sint_to_fp.ll =================================================================== --- test/CodeGen/AMDGPU/sint_to_fp.ll +++ test/CodeGen/AMDGPU/sint_to_fp.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32: ; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}} Index: test/CodeGen/AMDGPU/sitofp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/sitofp.f16.ll +++ test/CodeGen/AMDGPU/sitofp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}sitofp_i16_to_f16 ; GCN: buffer_load_{{sshort|ushort}} v[[A_I16:[0-9]+]] Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: ; CHECK-NEXT: ; BB#0: Index: test/CodeGen/AMDGPU/smed3.ll =================================================================== --- test/CodeGen/AMDGPU/smed3.ll +++ test/CodeGen/AMDGPU/smed3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/AMDGPU/sminmax.ll =================================================================== --- test/CodeGen/AMDGPU/sminmax.ll +++ test/CodeGen/AMDGPU/sminmax.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_abs_i32: ; GCN: s_abs_i32 Index: test/CodeGen/AMDGPU/sminmax.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s ; GCN-LABEL: {{^}}s_abs_v2i16: ; GFX9: s_load_dword [[VAL:s[0-9]+]] Index: test/CodeGen/AMDGPU/smrd-vccz-bug.ll =================================================================== --- test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 Index: test/CodeGen/AMDGPU/smrd.ll =================================================================== --- test/CodeGen/AMDGPU/smrd.ll +++ test/CodeGen/AMDGPU/smrd.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=SIVI %s ; SMRD load with an immediate offset. ; GCN-LABEL: {{^}}smrd0: Index: test/CodeGen/AMDGPU/sopk-compares.ll =================================================================== --- test/CodeGen/AMDGPU/sopk-compares.ll +++ test/CodeGen/AMDGPU/sopk-compares.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; Since this intrinsic is exposed as a constant after isel, use it to ; defeat the DAG's compare with constant canonicalizations. Index: test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll =================================================================== --- test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll +++ test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=TONGA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=TONGA %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; On Tonga and Iceland, limited SGPR availability means care must be taken to ; allocate scratch registers correctly. Check that this test compiles without Index: test/CodeGen/AMDGPU/spill-cfg-position.ll =================================================================== --- test/CodeGen/AMDGPU/spill-cfg-position.ll +++ test/CodeGen/AMDGPU/spill-cfg-position.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s ; Inline spiller can decide to move a spill as early as possible in the basic block. ; It will skip phis and label, but we also need to make sure it skips instructions Index: test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- test/CodeGen/AMDGPU/spill-m0.ll +++ test/CodeGen/AMDGPU/spill-m0.ll @@ -1,8 +1,8 @@ -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s ; XXX - Why does it like to use vcc? Index: test/CodeGen/AMDGPU/spill-scavenge-offset.ll =================================================================== --- test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 < %s | FileCheck %s -; RUN: llc -regalloc=basic -march=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 < %s | FileCheck %s +; RUN: llc -regalloc=basic -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; ; There is something about Tonga that causes this test to spend a lot of time ; in the default register allocator. Index: test/CodeGen/AMDGPU/spill-to-smem-m0.ll =================================================================== --- test/CodeGen/AMDGPU/spill-to-smem-m0.ll +++ test/CodeGen/AMDGPU/spill-to-smem-m0.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs -stop-before=prologepilog < %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs -stop-before=prologepilog < %s ; Spill to SMEM clobbers M0. Check that the implicit-def dead operand is present ; in the pseudo instructions. Index: test/CodeGen/AMDGPU/spill-wide-sgpr.ll =================================================================== --- test/CodeGen/AMDGPU/spill-wide-sgpr.ll +++ test/CodeGen/AMDGPU/spill-wide-sgpr.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s -; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s -; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VGPR %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=VMEM %s ; ALL-LABEL: {{^}}spill_sgpr_x2: ; SMEM: s_add_u32 m0, s3, 0x100{{$}} Index: test/CodeGen/AMDGPU/split-scalar-i64-add.ll =================================================================== --- test/CodeGen/AMDGPU/split-scalar-i64-add.ll +++ test/CodeGen/AMDGPU/split-scalar-i64-add.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() readnone Index: test/CodeGen/AMDGPU/split-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/split-smrd.ll +++ test/CodeGen/AMDGPU/split-smrd.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; FIXME: Move this to sgpr-copy.ll when this is fixed on VI. ; Make sure that when we split an smrd instruction in order to move it to Index: test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll =================================================================== --- test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll +++ test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" @sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef Index: test/CodeGen/AMDGPU/srem.ll =================================================================== --- test/CodeGen/AMDGPU/srem.ll +++ test/CodeGen/AMDGPU/srem.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 Index: test/CodeGen/AMDGPU/srl.ll =================================================================== --- test/CodeGen/AMDGPU/srl.ll +++ test/CodeGen/AMDGPU/srl.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; XUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 Index: test/CodeGen/AMDGPU/ssubo.ll =================================================================== --- test/CodeGen/AMDGPU/ssubo.ll +++ test/CodeGen/AMDGPU/ssubo.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone Index: test/CodeGen/AMDGPU/store-barrier.ll =================================================================== --- test/CodeGen/AMDGPU/store-barrier.ll +++ test/CodeGen/AMDGPU/store-barrier.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck %s ; This test is for a bug in the machine scheduler where stores without ; an underlying object would be moved across the barrier. In this Index: test/CodeGen/AMDGPU/store-hi16.ll =================================================================== --- test/CodeGen/AMDGPU/store-hi16.ll +++ test/CodeGen/AMDGPU/store-hi16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}store_global_hi_v2i16: ; GCN: s_waitcnt @@ -187,11 +187,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - store i16 %hi, i16 addrspace(4)* %out + store i16 %hi, i16* %out ret void } @@ -205,11 +205,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x half> %hi = extractelement <2 x half> %value, i32 1 - store half %hi, half addrspace(4)* %out + store half %hi, half* %out ret void } @@ -223,11 +223,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 { +define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 { entry: %hi32 = lshr i32 %value, 16 %hi = trunc i32 %hi32 to i16 - store i16 %hi, i16 addrspace(4)* %out + store i16 %hi, i16* %out ret void } @@ -241,12 +241,12 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 %trunc = trunc i16 %hi to i8 - store i8 %trunc, i8 addrspace(4)* %out + store i8 %trunc, i8* %out ret void } @@ -260,11 +260,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 { +define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 { entry: %hi32 = lshr i32 %value, 16 %hi = trunc i32 %hi32 to i8 - store i8 %hi, i8 addrspace(4)* %out + store i8 %hi, i8* %out ret void } @@ -278,12 +278,12 @@ ; VI: flat_store_short v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047 - store i16 %hi, i16 addrspace(4)* %gep + %gep = getelementptr inbounds i16, i16* %out, i64 2047 + store i16 %hi, i16* %gep ret void } @@ -296,12 +296,12 @@ ; VI: flat_store_short v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023 - store i16 %hi, i16 addrspace(4)* %gep + %gep = getelementptr inbounds i16, i16* %out, i64 -1023 + store i16 %hi, i16* %gep ret void } @@ -315,13 +315,13 @@ ; VI: flat_store_byte v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 %trunc = trunc i16 %hi to i8 - %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095 - store i8 %trunc, i8 addrspace(4)* %gep + %gep = getelementptr inbounds i8, i8* %out, i64 4095 + store i8 %trunc, i8* %gep ret void } @@ -335,13 +335,13 @@ ; VI: flat_store_byte v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 { +define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 %trunc = trunc i16 %hi to i8 - %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095 - store i8 %trunc, i8 addrspace(4)* %gep + %gep = getelementptr inbounds i8, i8* %out, i64 -4095 + store i8 %trunc, i8* %gep ret void } @@ -355,12 +355,12 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 { +define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 { entry: ; FIXME: ABI for pre-gfx9 %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - store i16 %hi, i16* %out + store i16 %hi, i16 addrspace(5)* %out ret void } @@ -374,12 +374,12 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 { +define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 { entry: ; FIXME: ABI for pre-gfx9 %value = bitcast i32 %arg to <2 x half> %hi = extractelement <2 x half> %value, i32 1 - store half %hi, half* %out + store half %hi, half addrspace(5)* %out ret void } @@ -393,11 +393,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 { +define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 { entry: %hi32 = lshr i32 %value, 16 %hi = trunc i32 %hi32 to i16 - store i16 %hi, i16* %out + store i16 %hi, i16 addrspace(5)* %out ret void } @@ -411,12 +411,12 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 { +define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 %trunc = trunc i16 %hi to i8 - store i8 %trunc, i8* %out + store i8 %trunc, i8 addrspace(5)* %out ret void } @@ -430,11 +430,11 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 { +define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 { entry: %hi32 = lshr i32 %value, 16 %hi = trunc i32 %hi32 to i8 - store i8 %hi, i8* %out + store i8 %hi, i8 addrspace(5)* %out ret void } @@ -447,12 +447,12 @@ ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 -define void @store_private_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 { +define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* %out, i32 %arg) #0 { entry: %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - %gep = getelementptr inbounds i16, i16* %out, i64 2047 - store i16 %hi, i16* %gep + %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2047 + store i16 %hi, i16 addrspace(5)* %gep ret void } @@ -473,7 +473,7 @@ ; FIXME: ABI for pre-gfx9 %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 - store volatile i16 %hi, i16* null + store volatile i16 %hi, i16 addrspace(5)* null ret void } @@ -493,7 +493,7 @@ %value = bitcast i32 %arg to <2 x i16> %hi = extractelement <2 x i16> %value, i32 1 %trunc = trunc i16 %hi to i8 - store volatile i8 %trunc, i8* null + store volatile i8 %trunc, i8 addrspace(5)* null ret void } Index: test/CodeGen/AMDGPU/store-v3i64.ll =================================================================== --- test/CodeGen/AMDGPU/store-v3i64.ll +++ test/CodeGen/AMDGPU/store-v3i64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}global_store_v3i64: ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 Index: test/CodeGen/AMDGPU/store_typed.ll =================================================================== --- test/CodeGen/AMDGPU/store_typed.ll +++ test/CodeGen/AMDGPU/store_typed.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s ; store to rat 0 ; FUNC-LABEL: {{^}}store_typed_rat0: Index: test/CodeGen/AMDGPU/structurize.ll =================================================================== --- test/CodeGen/AMDGPU/structurize.ll +++ test/CodeGen/AMDGPU/structurize.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood -r600-ir-structurize=0 | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -r600-ir-structurize=0 | FileCheck %s ; Test case for a crash in the AMDILCFGStructurizer from a CFG like this: ; ; entry Index: test/CodeGen/AMDGPU/structurize1.ll =================================================================== --- test/CodeGen/AMDGPU/structurize1.ll +++ test/CodeGen/AMDGPU/structurize1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -r600-if-convert=0 < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -r600-if-convert=0 < %s | FileCheck %s ; This tests for abug where the AMDILCFGStructurizer was crashing on loops ; like this: Index: test/CodeGen/AMDGPU/sub.i16.ll =================================================================== --- test/CodeGen/AMDGPU/sub.i16.ll +++ test/CodeGen/AMDGPU/sub.i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_i16: Index: test/CodeGen/AMDGPU/sub.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sub.v2i16.ll +++ test/CodeGen/AMDGPU/sub.v2i16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16: Index: test/CodeGen/AMDGPU/subreg-coalescer-crash.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-crash.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-crash.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL:{{^}}row_filter_C1_D0: define amdgpu_kernel void @row_filter_C1_D0() #0 { Index: test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -o - %s | FileCheck %s ; Don't crash when the use of an undefined value is only detected by the ; register coalescer because it is hidden with subregister insert/extract. target triple="amdgcn--" Index: test/CodeGen/AMDGPU/subreg-eliminate-dead.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-eliminate-dead.ll +++ test/CodeGen/AMDGPU/subreg-eliminate-dead.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=amdgcn---amdgiz -verify-machineinstrs -o - %s | FileCheck %s ; LiveRangeEdit::eliminateDeadDef did not update LiveInterval sub ranges ; properly. Index: test/CodeGen/AMDGPU/swizzle-export.ll =================================================================== --- test/CodeGen/AMDGPU/swizzle-export.ll +++ test/CodeGen/AMDGPU/swizzle-export.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck --check-prefix=EG %s ;EG: {{^}}main: ;EG: EXPORT T{{[0-9]+}}.XYXX Index: test/CodeGen/AMDGPU/syncscopes.ll =================================================================== --- test/CodeGen/AMDGPU/syncscopes.ll +++ test/CodeGen/AMDGPU/syncscopes.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes ; GCN: FLAT_STORE_DWORD killed %vgpr1_vgpr2, killed %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out) @@ -6,14 +6,14 @@ ; GCN: FLAT_STORE_DWORD killed %vgpr7_vgpr8, killed %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) define void @syncscopes( i32 %agent, - i32 addrspace(4)* %agent_out, + i32* %agent_out, i32 %workgroup, - i32 addrspace(4)* %workgroup_out, + i32* %workgroup_out, i32 %wavefront, - i32 addrspace(4)* %wavefront_out) { + i32* %wavefront_out) { entry: - store atomic i32 %agent, i32 addrspace(4)* %agent_out syncscope("agent") seq_cst, align 4 - store atomic i32 %workgroup, i32 addrspace(4)* %workgroup_out syncscope("workgroup") seq_cst, align 4 - store atomic i32 %wavefront, i32 addrspace(4)* %wavefront_out syncscope("wavefront") seq_cst, align 4 + store atomic i32 %agent, i32* %agent_out syncscope("agent") seq_cst, align 4 + store atomic i32 %workgroup, i32* %workgroup_out syncscope("workgroup") seq_cst, align 4 + store atomic i32 %wavefront, i32* %wavefront_out syncscope("wavefront") seq_cst, align 4 ret void } Index: test/CodeGen/AMDGPU/tail-call-cgp.ll =================================================================== --- test/CodeGen/AMDGPU/tail-call-cgp.ll +++ test/CodeGen/AMDGPU/tail-call-cgp.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -codegenprepare %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa-amdgiz -codegenprepare %s | FileCheck %s -define internal fastcc void @callee(i32* nocapture %p, i32 %a) #0 { - store volatile i32 %a, i32* %p, align 4 +define internal fastcc void @callee(i32 addrspace(5)* nocapture %p, i32 %a) #0 { + store volatile i32 %a, i32 addrspace(5)* %p, align 4 ret void } @@ -9,13 +9,13 @@ ; CHECK: tail call fastcc void @callee( ; CHECK-NEXT: ret void ; CHECK: ret void -define void @func_caller(i32* nocapture %p, i32 %a, i32 %b) #0 { +define void @func_caller(i32 addrspace(5)* nocapture %p, i32 %a, i32 %b) #0 { entry: %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %bb, label %ret bb: - tail call fastcc void @callee(i32* %p, i32 %a) + tail call fastcc void @callee(i32 addrspace(5)* %p, i32 %a) br label %ret ret: @@ -27,13 +27,13 @@ ; CHECK-NEXT: br label %ret ; CHECK: ret void -define amdgpu_kernel void @kernel_caller(i32* nocapture %p, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @kernel_caller(i32 addrspace(5)* nocapture %p, i32 %a, i32 %b) #0 { entry: %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %bb, label %ret bb: - tail call fastcc void @callee(i32* %p, i32 %a) + tail call fastcc void @callee(i32 addrspace(5)* %p, i32 %a) br label %ret ret: Index: test/CodeGen/AMDGPU/target-cpu.ll =================================================================== --- test/CodeGen/AMDGPU/target-cpu.ll +++ test/CodeGen/AMDGPU/target-cpu.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" declare i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() #1 @@ -81,10 +82,10 @@ ; CHECK: ; LDSByteSize: 5120 define amdgpu_kernel void @promote_alloca_enabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #5 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp - %load = load i32, i32* %arrayidx1 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp + %load = load i32, i32 addrspace(5)* %arrayidx1 store i32 %load, i32 addrspace(1)* %out ret void } @@ -95,10 +96,10 @@ ; CHECK: ScratchSize: 24 define amdgpu_kernel void @promote_alloca_disabled(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #6 { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [5 x i32], align 4, addrspace(5) %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp - %load = load i32, i32* %arrayidx1 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %tmp + %load = load i32, i32 addrspace(5)* %arrayidx1 store i32 %load, i32 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/tex-clause-antidep.ll =================================================================== --- test/CodeGen/AMDGPU/tex-clause-antidep.ll +++ test/CodeGen/AMDGPU/tex-clause-antidep.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK: TEX ;CHECK-NEXT: ALU Index: test/CodeGen/AMDGPU/texture-input-merge.ll =================================================================== --- test/CodeGen/AMDGPU/texture-input-merge.ll +++ test/CodeGen/AMDGPU/texture-input-merge.ll @@ -1,4 +1,4 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ;CHECK-NOT: MOV Index: test/CodeGen/AMDGPU/trap.ll =================================================================== --- test/CodeGen/AMDGPU/trap.ll +++ test/CodeGen/AMDGPU/trap.ll @@ -1,18 +1,18 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; enable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s ; disable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d-amdgiz -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s declare void @llvm.trap() #0 declare void @llvm.debugtrap() #0 Index: test/CodeGen/AMDGPU/trunc-bitcast-vector.ll =================================================================== --- test/CodeGen/AMDGPU/trunc-bitcast-vector.ll +++ test/CodeGen/AMDGPU/trunc-bitcast-vector.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s ; CHECK-LABEL: {{^}}trunc_i64_bitcast_v2i32: ; CHECK: buffer_load_dword v Index: test/CodeGen/AMDGPU/trunc-cmp-constant.ll =================================================================== --- test/CodeGen/AMDGPU/trunc-cmp-constant.ll +++ test/CodeGen/AMDGPU/trunc-cmp-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll =================================================================== --- test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll +++ test/CodeGen/AMDGPU/trunc-store-f64-to-f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}global_truncstore_f64_to_f16: ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/trunc-store-i1.ll =================================================================== --- test/CodeGen/AMDGPU/trunc-store-i1.ll +++ test/CodeGen/AMDGPU/trunc-store-i1.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}global_truncstore_i32_to_i1: Index: test/CodeGen/AMDGPU/trunc-store.ll =================================================================== --- test/CodeGen/AMDGPU/trunc-store.ll +++ test/CodeGen/AMDGPU/trunc-store.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}truncstore_arg_v16i32_to_v16i8: ; SI: buffer_store_dwordx4 Index: test/CodeGen/AMDGPU/trunc.ll =================================================================== --- test/CodeGen/AMDGPU/trunc.ll +++ test/CodeGen/AMDGPU/trunc.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs< %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone Index: test/CodeGen/AMDGPU/tti-unroll-prefs.ll =================================================================== --- test/CodeGen/AMDGPU/tti-unroll-prefs.ll +++ test/CodeGen/AMDGPU/tti-unroll-prefs.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=tahiti %s | FileCheck %s +; RUN: opt -loop-unroll -S -mtriple=amdgcn---amdgiz -mcpu=tahiti %s | FileCheck %s ; This IR comes from this OpenCL C code: ; Index: test/CodeGen/AMDGPU/uaddo.ll =================================================================== --- test/CodeGen/AMDGPU/uaddo.ll +++ test/CodeGen/AMDGPU/uaddo.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}s_uaddo_i64_zext: ; GCN: s_add_u32 Index: test/CodeGen/AMDGPU/udiv.ll =================================================================== --- test/CodeGen/AMDGPU/udiv.ll +++ test/CodeGen/AMDGPU/udiv.ll @@ -1,9 +1,9 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=+fp32-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}udiv_i32: ; EG-NOT: SETGE_INT Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_udivrem: ; EG: RECIP_UINT Index: test/CodeGen/AMDGPU/udivrem24.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem24.ll +++ test/CodeGen/AMDGPU/udivrem24.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}udiv24_i8: ; SI: v_cvt_f32_ubyte Index: test/CodeGen/AMDGPU/udivrem64.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem64.ll +++ test/CodeGen/AMDGPU/udivrem64.ll @@ -1,6 +1,6 @@ -;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s -;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_udiv: ;EG: RECIP_UINT Index: test/CodeGen/AMDGPU/uint_to_fp.f64.ll =================================================================== --- test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/uint_to_fp.i64.ll =================================================================== --- test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600 Index: test/CodeGen/AMDGPU/uint_to_fp.ll =================================================================== --- test/CodeGen/AMDGPU/uint_to_fp.ll +++ test/CodeGen/AMDGPU/uint_to_fp.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_uint_to_fp_i32_to_f32: ; SI: v_cvt_f32_u32_e32 Index: test/CodeGen/AMDGPU/uitofp.f16.ll =================================================================== --- test/CodeGen/AMDGPU/uitofp.f16.ll +++ test/CodeGen/AMDGPU/uitofp.f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uitofp_i16_to_f16 ; GCN: buffer_load_ushort v[[A_I16:[0-9]+]] Index: test/CodeGen/AMDGPU/umed3.ll =================================================================== --- test/CodeGen/AMDGPU/umed3.ll +++ test/CodeGen/AMDGPU/umed3.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 Index: test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- test/CodeGen/AMDGPU/unaligned-load-store.ll +++ test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s ; SI-LABEL: {{^}}local_unaligned_load_store_i16: ; SI: ds_read_u8 Index: test/CodeGen/AMDGPU/undefined-subreg-liverange.ll =================================================================== --- test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; We may have subregister live ranges that are undefined on some paths. The ; verifier should not complain about this. Index: test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll =================================================================== --- test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll +++ test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s -; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s -; XUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s +; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; XUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s ; SI hits an assertion at -O0, evergreen hits a not implemented unreachable. Index: test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll +++ test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; This used to raise an assertion due to how the choice between uniform and ; non-uniform branches was determined. Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}uniform_if_scc: ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 Index: test/CodeGen/AMDGPU/uniform-crash.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-crash.ll +++ test/CodeGen/AMDGPU/uniform-crash.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s ; GCN-LABEL: {{^}}icmp_2_users: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1 Index: test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll +++ test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=verde < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde < %s | FileCheck %s ; Test a simple uniform loop that lives inside non-uniform control flow. Index: test/CodeGen/AMDGPU/unify-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/unify-metadata.ll +++ test/CodeGen/AMDGPU/unify-metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -amdgpu-unify-metadata -S < %s | FileCheck -check-prefix=ALL %s +; RUN: opt -mtriple=amdgcn--amdhsa-amdgiz -amdgpu-unify-metadata -S < %s | FileCheck -check-prefix=ALL %s ; This test check that we have a singe metadata value after linking several ; modules for records such as opencl.ocl.version, llvm.ident and similar. Index: test/CodeGen/AMDGPU/unigine-liveness-crash.ll =================================================================== --- test/CodeGen/AMDGPU/unigine-liveness-crash.ll +++ test/CodeGen/AMDGPU/unigine-liveness-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck %s ; ; This test used to crash with the following assertion: ; llc: include/llvm/ADT/IntervalMap.h:632: unsigned int llvm::IntervalMapImpl::LeafNode >::insertFrom(unsigned int &, unsigned int, KeyT, KeyT, ValT) [KeyT = llvm::SlotIndex, ValT = llvm::LiveInterval *, N = 8, Traits = llvm::IntervalMapInfo]: Assertion `(i == Size || Traits::stopLess(b, start(i))) && "Overlapping insert"' failed. Index: test/CodeGen/AMDGPU/unroll.ll =================================================================== --- test/CodeGen/AMDGPU/unroll.ll +++ test/CodeGen/AMDGPU/unroll.ll @@ -1,5 +1,6 @@ -; RUN: opt -mtriple=amdgcn-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s -; RUN: opt -mtriple=r600-- -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s +; RUN: opt -mtriple=amdgcn---amdgiz -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s +; RUN: opt -mtriple=r600---amdgiz -loop-unroll -simplifycfg -sroa %s -S -o - | FileCheck %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; This test contains a simple loop that initializes an array declared in @@ -11,7 +12,7 @@ ; CHECK: store i32 5, i32 addrspace(1)* %out define amdgpu_kernel void @private_memory(i32 addrspace(1)* %out) { entry: - %0 = alloca [32 x i32] + %0 = alloca [32 x i32], addrspace(5) br label %loop.header loop.header: @@ -19,8 +20,8 @@ br label %loop.body loop.body: - %ptr = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 %counter - store i32 %counter, i32* %ptr + %ptr = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 %counter + store i32 %counter, i32 addrspace(5)* %ptr br label %loop.inc loop.inc: @@ -29,8 +30,8 @@ br i1 %1, label %exit, label %loop.header exit: - %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5 - %3 = load i32, i32* %2 + %2 = getelementptr [32 x i32], [32 x i32] addrspace(5)* %0, i32 0, i32 5 + %3 = load i32, i32 addrspace(5)* %2 store i32 %3, i32 addrspace(1)* %out ret void } @@ -74,7 +75,7 @@ ; CHECK-NEXT: getelementptr ; CHECK-NEXT: store ; CHECK-NOT: br -define amdgpu_kernel void @unroll_for_if(i32* %a) { +define amdgpu_kernel void @unroll_for_if(i32 addrspace(5)* %a) { entry: br label %for.body @@ -86,8 +87,8 @@ if.then: ; preds = %for.body %0 = sext i32 %i1 to i64 - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %0 - store i32 0, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %a, i64 %0 + store i32 0, i32 addrspace(5)* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then Index: test/CodeGen/AMDGPU/unsupported-cc.ll =================================================================== --- test/CodeGen/AMDGPU/unsupported-cc.ll +++ test/CodeGen/AMDGPU/unsupported-cc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; These tests are for condition codes that are not supported by the hardware Index: test/CodeGen/AMDGPU/urem.ll =================================================================== --- test/CodeGen/AMDGPU/urem.ll +++ test/CodeGen/AMDGPU/urem.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; The code generated by urem is long and complex and may frequently ; change. The goal of this test is to make sure the ISel doesn't fail Index: test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll =================================================================== --- test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.fma.f32(float, float, float) #1 declare double @llvm.fma.f64(double, double, double) #1 Index: test/CodeGen/AMDGPU/usubo.ll =================================================================== --- test/CodeGen/AMDGPU/usubo.ll +++ test/CodeGen/AMDGPU/usubo.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}s_usubo_i64_zext: ; GCN: s_sub_u32 Index: test/CodeGen/AMDGPU/v1i64-kernel-arg.ll =================================================================== --- test/CodeGen/AMDGPU/v1i64-kernel-arg.ll +++ test/CodeGen/AMDGPU/v1i64-kernel-arg.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck %s ; CHECK-LABEL: {{^}}kernel_arg_i64: define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { Index: test/CodeGen/AMDGPU/v_cndmask.ll =================================================================== --- test/CodeGen/AMDGPU/v_cndmask.ll +++ test/CodeGen/AMDGPU/v_cndmask.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll =================================================================== --- test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll +++ test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) #0 Index: test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac.ll +++ test/CodeGen/AMDGPU/v_mac.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s ; GCN-LABEL: {{^}}mac_vvv: ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} Index: test/CodeGen/AMDGPU/v_mac_f16.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac_f16.ll +++ test/CodeGen/AMDGPU/v_mac_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mac_f16: ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/v_madak_f16.ll =================================================================== --- test/CodeGen/AMDGPU/v_madak_f16.ll +++ test/CodeGen/AMDGPU/v_madak_f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}madak_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] Index: test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- test/CodeGen/AMDGPU/valu-i1.ll +++ test/CodeGen/AMDGPU/valu-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/vector-extract-insert.ll =================================================================== --- test/CodeGen/AMDGPU/vector-extract-insert.ll +++ test/CodeGen/AMDGPU/vector-extract-insert.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Test that when extracting the same unknown vector index from an ; insertelement the dynamic indexing is folded away. Index: test/CodeGen/AMDGPU/vectorize-global-local.ll =================================================================== --- test/CodeGen/AMDGPU/vectorize-global-local.ll +++ test/CodeGen/AMDGPU/vectorize-global-local.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-DAG: flat_load_dwordx4 ; CHECK-DAG: flat_load_dwordx4 ; CHECK-DAG: flat_load_dwordx4 Index: test/CodeGen/AMDGPU/vertex-fetch-encoding.ll =================================================================== --- test/CodeGen/AMDGPU/vertex-fetch-encoding.ll +++ test/CodeGen/AMDGPU/vertex-fetch-encoding.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cypress | FileCheck --check-prefix=EG --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=EG --check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=cypress | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}vtx_fetch32: ; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 Index: test/CodeGen/AMDGPU/vi-removed-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/vi-removed-intrinsics.ll +++ test/CodeGen/AMDGPU/vi-removed-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: foo.cl:1:42: in function rsq_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget Index: test/CodeGen/AMDGPU/vop-shrink.ll =================================================================== --- test/CodeGen/AMDGPU/vop-shrink.ll +++ test/CodeGen/AMDGPU/vop-shrink.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: Index: test/CodeGen/AMDGPU/vselect.ll =================================================================== --- test/CodeGen/AMDGPU/vselect.ll +++ test/CodeGen/AMDGPU/vselect.ll @@ -1,6 +1,6 @@ -;RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s +;RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s +;RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s ; FUNC-LABEL: {{^}}test_select_v2i32: Index: test/CodeGen/AMDGPU/vselect64.ll =================================================================== --- test/CodeGen/AMDGPU/vselect64.ll +++ test/CodeGen/AMDGPU/vselect64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; XXX: Merge this test into vselect.ll once SI supports 64-bit select. ; CHECK-LABEL: {{^}}test_select_v4i64: Index: test/CodeGen/AMDGPU/vtx-fetch-branch.ll =================================================================== --- test/CodeGen/AMDGPU/vtx-fetch-branch.ll +++ test/CodeGen/AMDGPU/vtx-fetch-branch.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood %s -o - | FileCheck %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood %s -o - | FileCheck %s ; This tests for a bug where vertex fetch clauses right before an ENDIF ; instruction where being emitted after the ENDIF. We were using ALU_POP_AFTER Index: test/CodeGen/AMDGPU/vtx-schedule.ll =================================================================== --- test/CodeGen/AMDGPU/vtx-schedule.ll +++ test/CodeGen/AMDGPU/vtx-schedule.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s ; This test is for a scheduler bug where VTX_READ instructions that used ; the result of another VTX_READ instruction were being grouped in the Index: test/CodeGen/AMDGPU/wait.ll =================================================================== --- test/CodeGen/AMDGPU/wait.ll +++ test/CodeGen/AMDGPU/wait.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT -; RUN: llc -march=amdgcn --misched=ilpmax -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX -; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz --misched=ilpmax -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz --misched=ilpmax -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX ; The ilpmax scheduler is used for the second test to get the ordering we want for the test. ; DEFAULT-LABEL: {{^}}main: Index: test/CodeGen/AMDGPU/waitcnt-flat.ll =================================================================== --- test/CodeGen/AMDGPU/waitcnt-flat.ll +++ test/CodeGen/AMDGPU/waitcnt-flat.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; If flat_store_dword and flat_load_dword use different registers for the data ; operand, this test is not broken. It just means it is no longer testing @@ -10,13 +10,13 @@ ; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]] ; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0) ; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @test(i32 addrspace(4)* %out, i32 %in) { - store volatile i32 0, i32 addrspace(4)* %out - %val = load volatile i32, i32 addrspace(4)* %out +define amdgpu_kernel void @test(i32* %out, i32 %in) { + store volatile i32 0, i32* %out + %val = load volatile i32, i32* %out ret void } -; Make sure lgkmcnt isn't used for global_* instructions +; Make sure lgkmcnt isn't used for global_ addrspace(5)* instructions ; GCN-LABEL: {{^}}test_waitcnt_type_flat_global: ; GFX9: global_load_dword [[LD:v[0-9]+]] ; GFX9-NEXT: s_waitcnt vmcnt(0){{$}} Index: test/CodeGen/AMDGPU/waitcnt-looptest.ll =================================================================== --- test/CodeGen/AMDGPU/waitcnt-looptest.ll +++ test/CodeGen/AMDGPU/waitcnt-looptest.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=GCN %s ; Check that the waitcnt insertion algorithm correctly propagates wait counts ; from before a loop to the loop header. @@ -17,8 +17,8 @@ define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 { bb: - store <2 x float> , <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 - store <2 x float> , <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 + store <2 x float> , <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4 + store <2 x float> , <2 x float>* bitcast (float* getelementptr ([100 x float], [100 x float]* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float]*), i64 0, i64 4) to <2 x float>*), align 4 br label %bb18 bb1: ; preds = %bb18 Index: test/CodeGen/AMDGPU/widen-vselect-and-mask.ll =================================================================== --- test/CodeGen/AMDGPU/widen-vselect-and-mask.ll +++ test/CodeGen/AMDGPU/widen-vselect-and-mask.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to ; create vselects with i64 condition masks. Index: test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll =================================================================== --- test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll +++ test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=VI %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; Check that WQM isn't triggered by image load/store intrinsics. ; @@ -657,17 +658,17 @@ ; CHECK: buffer_store_dwordx4 define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind { entry: - %array = alloca [32 x i32], align 4 + %array = alloca [32 x i32], align 4, addrspace(5) call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) - %s.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 0 - store volatile i32 %a, i32* %s.gep, align 4 + %s.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 0 + store volatile i32 %a, i32 addrspace(5)* %s.gep, align 4 call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 1, i32 0, i1 0, i1 0) - %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx - %c = load i32, i32* %c.gep, align 4 + %c.gep = getelementptr [32 x i32], [32 x i32] addrspace(5)* %array, i32 0, i32 %idx + %c = load i32, i32 addrspace(5)* %c.gep, align 4 %c.bc = bitcast i32 %c to float %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) Index: test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll =================================================================== --- test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll +++ test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll @@ -1,6 +1,6 @@ ; XFAIL: * ; REQUIRES: asserts -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -verify-machineinstrs < %s ; write_register doesn't prevent us from illegally trying to write a ; vgpr value into a scalar register, but I don't think there's much we Index: test/CodeGen/AMDGPU/write_register.ll =================================================================== --- test/CodeGen/AMDGPU/write_register.ll +++ test/CodeGen/AMDGPU/write_register.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s declare void @llvm.write_register.i32(metadata, i32) #0 declare void @llvm.write_register.i64(metadata, i64) #0 Index: test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll =================================================================== --- test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll +++ test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -mtriple=r600-- < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=redwood -mtriple=r600---amdgiz < %s | FileCheck %s ; We want all MULLO_INT inst to be last in their instruction group ;CHECK: {{^}}fill3d: Index: test/CodeGen/AMDGPU/xfail.r600.bitcast.ll =================================================================== --- test/CodeGen/AMDGPU/xfail.r600.bitcast.ll +++ test/CodeGen/AMDGPU/xfail.r600.bitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; XFAIL: * ; This is the failing part of the r600 bitacts tests Index: test/CodeGen/AMDGPU/xnor.ll =================================================================== --- test/CodeGen/AMDGPU/xnor.ll +++ test/CodeGen/AMDGPU/xnor.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s -; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s -; RUN: llc -march=amdgcn -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX800 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX800 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s ; GCN-LABEL: {{^}}scalar_xnor_i32_one_use ; GCN: s_xnor_b32 Index: test/CodeGen/AMDGPU/xor.ll =================================================================== --- test/CodeGen/AMDGPU/xor.ll +++ test/CodeGen/AMDGPU/xor.ll @@ -1,6 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}xor_v2i32: Index: test/CodeGen/AMDGPU/zero_extend.ll =================================================================== --- test/CodeGen/AMDGPU/zero_extend.ll +++ test/CodeGen/AMDGPU/zero_extend.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=r600 -mtriple=r600---amdgiz -mcpu=redwood | FileCheck %s --check-prefix=R600 ; R600: {{^}}s_mad_zext_i32_to_i64: ; R600: MEM_RAT_CACHELESS STORE_RAW Index: test/CodeGen/AMDGPU/zext-i64-bit-operand.ll =================================================================== --- test/CodeGen/AMDGPU/zext-i64-bit-operand.ll +++ test/CodeGen/AMDGPU/zext-i64-bit-operand.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}zext_or_operand_i64: ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} Index: test/CodeGen/AMDGPU/zext-lid.ll =================================================================== --- test/CodeGen/AMDGPU/zext-lid.ll +++ test/CodeGen/AMDGPU/zext-lid.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn < %s | FileCheck %s -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s ; CHECK-NOT: and_b32