Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1058,7 +1058,8 @@ if (RemoveIncompatibleFunctions) addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM)); - addPass(createAMDGPUAttributorPass()); + if (TM->getOptLevel() > CodeGenOpt::None) + addPass(createAMDGPUAttributorPass()); // FIXME: This pass adds 2 hacky attributes that can be replaced with an // analysis, and should be removed. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @stack_write_fi() { ; CHECK-LABEL: stack_write_fi: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_add_u32 s0, s0, s7 +; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_mov_b32 s4, 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s ; HSA-LABEL: name: default_kernel ; HSA: liveins: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind { ; HSA-VI-LABEL: name: i8_arg @@ -1427,7 +1427,7 @@ ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1441,7 +1441,7 @@ ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s8) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1462,7 +1462,7 @@ ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1476,7 +1476,7 @@ ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s16) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1500,7 +1500,7 @@ ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1517,7 +1517,7 @@ ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1541,7 +1541,7 @@ ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1558,7 +1558,7 @@ ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1582,7 +1582,7 @@ ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1599,7 +1599,7 @@ ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1623,7 +1623,7 @@ ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 @@ -1640,7 +1640,7 @@ ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 @@ -1731,7 +1731,7 @@ ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) + ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg @@ -1745,7 +1745,7 @@ ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) - ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6) + ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(6) %in.byref @@ -1838,8 +1838,8 @@ ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) - ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1859,8 +1859,8 @@ ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (s32) from %ir.in0.byref, addrspace 4) - ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load (s32) from %ir.in1.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) @@ -1882,7 +1882,7 @@ ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; HSA-VI-NEXT: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 @@ -1893,7 +1893,7 @@ ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4) + ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 %in = load i32, ptr addrspace(4) %in.byref Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -30,8 +30,8 @@ ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll @@ -7,6 +7,9 @@ define amdgpu_kernel void @constant_fold_vector_add() { ; CHECK-LABEL: name: constant_fold_vector_add ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -4,6 +4,9 @@ define amdgpu_kernel void @asm_convergent() convergent{ ; CHECK-LABEL: name: asm_convergent ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !0 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0 @@ -13,6 +16,9 @@ define amdgpu_kernel void @asm_simple_memory_clobber() { ; CHECK-LABEL: name: asm_simple_memory_clobber ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0 ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0 ; CHECK-NEXT: S_ENDPGM 0 @@ -24,6 +30,9 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() { ; CHECK-LABEL: name: asm_simple_vgpr_clobber ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !0 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0 @@ -33,6 +42,9 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() { ; CHECK-LABEL: name: asm_simple_sgpr_clobber ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !0 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0 @@ -42,6 +54,9 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { ; CHECK-LABEL: name: asm_simple_agpr_clobber ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !0 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0 @@ -51,9 +66,9 @@ define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1769483 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -79,8 +94,8 @@ define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -91,8 +106,8 @@ define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -104,9 +119,9 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 1769482 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8, 1769482 /* regdef:VGPR_32 */, def %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 @@ -121,9 +136,9 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %0, 3080202 /* regdef:VReg_64 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %8, 3080202 /* regdef:VReg_64 */, def %9 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -151,9 +166,12 @@ define amdgpu_kernel void @test_input_vgpr_imm() { ; CHECK-LABEL: name: test_input_vgpr_imm ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[COPY]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:VGPR_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, $0", "v"(i32 42) ret void @@ -162,9 +180,12 @@ define amdgpu_kernel void @test_input_sgpr_imm() { ; CHECK-LABEL: name: test_input_sgpr_imm ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[C]](s32) - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[C]](s32) + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:SReg_32 */, [[COPY1]] ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, $0", "s"(i32 42) ret void @@ -173,6 +194,9 @@ define amdgpu_kernel void @test_input_imm() { ; CHECK-LABEL: name: test_input_imm ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 ; CHECK-NEXT: INLINEASM &"s_mov_b64 s[0:1], $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 ; CHECK-NEXT: S_ENDPGM 0 @@ -188,8 +212,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 1769481 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %9, 1769481 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -203,8 +227,8 @@ ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %1, 262158 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1769482 /* regdef:VGPR_32 */, def %9, 262158 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a) @@ -220,8 +244,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %11 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 @@ -232,14 +256,14 @@ define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %10 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %4, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %12, 1900553 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %12 ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -261,10 +285,10 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %3, 1769482 /* regdef:VGPR_32 */, def %4, 1769482 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1769482 /* regdef:VGPR_32 */, def %11, 1769482 /* regdef:VGPR_32 */, def %12, 1769482 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %11 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %12 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %13 ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) @@ -282,11 +306,11 @@ define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1900554 /* regdef:SReg_32 */, def %8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1769482 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: @@ -298,6 +322,9 @@ define amdgpu_kernel void @asm_constraint_n_n() { ; CHECK-LABEL: name: asm_constraint_n_n ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr8_sgpr9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: INLINEASM &"s_trap ${0:n}", 1 /* sideeffect attdialect */, 13 /* imm */, 10 ; CHECK-NEXT: S_ENDPGM 0 tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1 Index: llvm/test/CodeGen/AMDGPU/attributor-noopt.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/attributor-noopt.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s + +; Check that AMDGPUAttributor is not run with -O0. +; OPT: .amdhsa_user_sgpr_private_segment_buffer 1 +; OPT: .amdhsa_user_sgpr_dispatch_ptr 0 +; OPT: .amdhsa_user_sgpr_queue_ptr 0 +; OPT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +; OPT: .amdhsa_user_sgpr_dispatch_id 0 +; OPT: .amdhsa_user_sgpr_flat_scratch_init 0 +; OPT: .amdhsa_user_sgpr_private_segment_size 0 +; OPT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; OPT: .amdhsa_system_sgpr_workgroup_id_x 1 +; OPT: .amdhsa_system_sgpr_workgroup_id_y 0 +; OPT: .amdhsa_system_sgpr_workgroup_id_z 0 +; OPT: .amdhsa_system_sgpr_workgroup_info 0 +; OPT: .amdhsa_system_vgpr_workitem_id 0 + +; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1 +; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1 +; NOOPT: .amdhsa_user_sgpr_queue_ptr 1 +; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +; NOOPT: .amdhsa_user_sgpr_dispatch_id 1 +; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0 +; NOOPT: .amdhsa_user_sgpr_private_segment_size 0 +; NOOPT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +; NOOPT: .amdhsa_system_sgpr_workgroup_id_x 1 +; NOOPT: .amdhsa_system_sgpr_workgroup_id_y 1 +; NOOPT: .amdhsa_system_sgpr_workgroup_id_z 1 +; NOOPT: .amdhsa_system_sgpr_workgroup_info 0 +; NOOPT: .amdhsa_system_vgpr_workitem_id 2 +define amdgpu_kernel void @foo() { + ret void +} Index: llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -30,10 +30,10 @@ ; ; GCN_DBG-LABEL: test_loop: ; GCN_DBG: ; %bb.0: ; %entry -; GCN_DBG-NEXT: s_load_dword s2, s[0:1], 0x9 +; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s2, 0 -; GCN_DBG-NEXT: s_load_dword s1, s[0:1], 0xa +; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: s_load_dword s1, s[4:5], 0xa ; GCN_DBG-NEXT: s_mov_b32 s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s2, -1 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) @@ -103,13 +103,13 @@ ; GCN-NEXT: ds_write_b32 v0, v1 ; GCN-NEXT: s_add_i32 s0, s0, 4 ; GCN-NEXT: s_mov_b64 vcc, vcc -; GCN-NEXT: s_cbranch_vccnz .LBB1_1 +; GCN-NEXT: s_cbranch_vccnz .LBB1_1 ; GCN-NEXT: ; %bb.2: ; %DummyReturnBlock ; GCN-NEXT: s_endpgm ; ; GCN_DBG-LABEL: loop_const_true: ; GCN_DBG: ; %bb.0: ; %entry -; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 @@ -174,7 +174,7 @@ ; ; GCN_DBG-LABEL: loop_const_false: ; GCN_DBG: ; %bb.0: ; %entry -; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 @@ -240,7 +240,7 @@ ; ; GCN_DBG-LABEL: loop_const_undef: ; GCN_DBG: ; %bb.0: ; %entry -; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 @@ -318,7 +318,7 @@ ; ; GCN_DBG-LABEL: loop_arg_0: ; GCN_DBG: ; %bb.0: ; %entry -; GCN_DBG-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 ; GCN_DBG-NEXT: v_mov_b32_e32 v1, 0 Index: llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll @@ -0,0 +1,22 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s +; RUN: not llc --crash -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s + +; AMDGPUAttributor deletes the function "by accident" so it's never +; codegened with optimizations. + +; OPT: .text +; OPT-NEXT: .section ".note.GNU-stack" +; OPT-NEXT: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" +; OPT-NEXT: .amdgpu_metadata +; OPT-NEXT: --- +; OPT-NEXT: amdhsa.kernels: [] +; OPT-NEXT: amdhsa.target: amdgcn-amd-amdhsa--gfx900 +; OPT-NEXT: amdhsa.version: +; OPT-NEXT: - 1 +; OPT-NEXT: - 1 +; OPT-NEXT: ... +define internal i32 @func() { + ret i32 0 +} + Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -42,18 +42,18 @@ ; ; GCN-O0-LABEL: simple_nested_if: ; GCN-O0: ; %bb.0: ; %bb -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s3 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-O0-NEXT: s_mov_b32 s14, -1 +; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 +; GCN-O0-NEXT: s_add_u32 s12, s12, s11 +; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 +; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec @@ -63,7 +63,7 @@ ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB0_4 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 @@ -90,7 +90,7 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB0_3 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 @@ -187,18 +187,18 @@ ; ; GCN-O0-LABEL: uncollapsable_nested_if: ; GCN-O0: ; %bb.0: ; %bb -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s3 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-O0-NEXT: s_mov_b32 s14, -1 +; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 +; GCN-O0-NEXT: s_add_u32 s12, s12, s11 +; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 +; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec @@ -208,7 +208,7 @@ ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB1_3 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 @@ -235,7 +235,7 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB1_4 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 @@ -261,7 +261,7 @@ ; GCN-O0-NEXT: s_branch .LBB1_5 ; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s2, v1, 4 ; GCN-O0-NEXT: v_readlane_b32 s3, v1, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3] @@ -367,19 +367,19 @@ ; ; GCN-O0-LABEL: nested_if_if_else: ; GCN-O0: ; %bb.0: ; %bb -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s3 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-O0-NEXT: s_mov_b32 s14, -1 +; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 +; GCN-O0-NEXT: s_add_u32 s12, s12, s11 +; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 +; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1] ; GCN-O0-NEXT: v_writelane_b32 v1, s2, 0 ; GCN-O0-NEXT: v_writelane_b32 v1, s3, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 @@ -405,7 +405,7 @@ ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_6 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 2 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0 @@ -428,7 +428,7 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB2_5 ; GCN-O0-NEXT: ; %bb.3: ; %bb.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 @@ -449,7 +449,7 @@ ; GCN-O0-NEXT: s_branch .LBB2_5 ; GCN-O0-NEXT: .LBB2_4: ; %bb.else ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 @@ -573,15 +573,15 @@ ; ; GCN-O0-LABEL: nested_if_else_if: ; GCN-O0: ; %bb.0: ; %bb -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s3 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-O0-NEXT: s_mov_b32 s14, -1 +; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 +; GCN-O0-NEXT: s_add_u32 s12, s12, s11 +; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 +; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: ; implicit-def: $sgpr0 ; GCN-O0-NEXT: v_mov_b32_e32 v4, 0 @@ -601,9 +601,9 @@ ; GCN-O0-NEXT: v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3] ; GCN-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec ; GCN-O0-NEXT: v_mov_b32_e32 v6, v2 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 @@ -632,9 +632,9 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 ; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: s_mov_b32 s4, s2 @@ -656,8 +656,8 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: s_mov_b32 s4, s2 @@ -671,9 +671,9 @@ ; GCN-O0-NEXT: buffer_store_dword v0, v[2:3], s[0:3], 0 addr64 offset:8 ; GCN-O0-NEXT: s_branch .LBB3_7 ; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: s_mov_b32 s2, s0 @@ -695,8 +695,8 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: s_mov_b32 s4, s2 @@ -783,18 +783,18 @@ ; ; GCN-O0-LABEL: s_endpgm_unsafe_barrier: ; GCN-O0: ; %bb.0: ; %bb -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe8f000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s3 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-O0-NEXT: s_mov_b32 s14, -1 +; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 +; GCN-O0-NEXT: s_add_u32 s12, s12, s11 +; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 +; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: v_writelane_b32 v1, s0, 0 ; GCN-O0-NEXT: v_writelane_b32 v1, s1, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v2, v0 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 ; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec @@ -805,7 +805,7 @@ ; GCN-O0-NEXT: s_cbranch_execz .LBB4_2 ; GCN-O0-NEXT: ; %bb.1: ; %bb.then ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 ; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 Index: llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll +++ llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll @@ -43,9 +43,9 @@ ; GCN-LABEL: name: uniform_opt_lshr_and_cmp ; GCN: bb.0.entry: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN-NEXT: liveins: $sgpr0_sgpr1 + ; GCN-NEXT: liveins: $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) ; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4) ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -11,10 +11,10 @@ ; GCN-LABEL: name: extract_w_offset_vgpr ; GCN: bb.0.entry: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 - ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) + ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 @@ -56,22 +56,22 @@ ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2 ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1 ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0 - ; GCN-NEXT: undef %28.sub0:vreg_512 = COPY [[COPY1]] - ; GCN-NEXT: %28.sub1:vreg_512 = COPY [[COPY2]] - ; GCN-NEXT: %28.sub2:vreg_512 = COPY [[COPY3]] - ; GCN-NEXT: %28.sub3:vreg_512 = COPY [[COPY4]] - ; GCN-NEXT: %28.sub4:vreg_512 = COPY [[COPY5]] - ; GCN-NEXT: %28.sub5:vreg_512 = COPY [[COPY6]] - ; GCN-NEXT: %28.sub6:vreg_512 = COPY [[COPY7]] - ; GCN-NEXT: %28.sub7:vreg_512 = COPY [[COPY8]] - ; GCN-NEXT: %28.sub8:vreg_512 = COPY [[COPY9]] - ; GCN-NEXT: %28.sub9:vreg_512 = COPY [[COPY10]] - ; GCN-NEXT: %28.sub10:vreg_512 = COPY [[COPY11]] - ; GCN-NEXT: %28.sub11:vreg_512 = COPY [[COPY12]] - ; GCN-NEXT: %28.sub12:vreg_512 = COPY [[COPY13]] - ; GCN-NEXT: %28.sub13:vreg_512 = COPY [[COPY14]] - ; GCN-NEXT: %28.sub14:vreg_512 = COPY [[COPY15]] - ; GCN-NEXT: %28.sub15:vreg_512 = COPY [[COPY16]] + ; GCN-NEXT: undef %35.sub0:vreg_512 = COPY [[COPY1]] + ; GCN-NEXT: %35.sub1:vreg_512 = COPY [[COPY2]] + ; GCN-NEXT: %35.sub2:vreg_512 = COPY [[COPY3]] + ; GCN-NEXT: %35.sub3:vreg_512 = COPY [[COPY4]] + ; GCN-NEXT: %35.sub4:vreg_512 = COPY [[COPY5]] + ; GCN-NEXT: %35.sub5:vreg_512 = COPY [[COPY6]] + ; GCN-NEXT: %35.sub6:vreg_512 = COPY [[COPY7]] + ; GCN-NEXT: %35.sub7:vreg_512 = COPY [[COPY8]] + ; GCN-NEXT: %35.sub8:vreg_512 = COPY [[COPY9]] + ; GCN-NEXT: %35.sub9:vreg_512 = COPY [[COPY10]] + ; GCN-NEXT: %35.sub10:vreg_512 = COPY [[COPY11]] + ; GCN-NEXT: %35.sub11:vreg_512 = COPY [[COPY12]] + ; GCN-NEXT: %35.sub12:vreg_512 = COPY [[COPY13]] + ; GCN-NEXT: %35.sub13:vreg_512 = COPY [[COPY14]] + ; GCN-NEXT: %35.sub14:vreg_512 = COPY [[COPY15]] + ; GCN-NEXT: %35.sub15:vreg_512 = COPY [[COPY16]] ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -81,12 +81,12 @@ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) - ; GCN-NEXT: dead %45:vgpr_32 = COPY [[DEF]] + ; GCN-NEXT: dead [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %28, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec - ; GCN-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] + ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %35, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec + ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -51,9 +51,6 @@ ; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O0-NEXT: Expand reduction intrinsics ; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions -; GCN-O0-NEXT: AMDGPU Attributor -; GCN-O0-NEXT: FunctionPass Manager -; GCN-O0-NEXT: Cycle Info Analysis ; GCN-O0-NEXT: CallGraph Construction ; GCN-O0-NEXT: Call Graph SCC Pass Manager ; GCN-O0-NEXT: AMDGPU Annotate Kernel Features Index: llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}test_debug_value: ; NOOPT: .loc 1 1 42 prologue_end ; /tmp/test_debug_value.cl:1:42 -; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; NOOPT-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 ; NOOPT-NEXT: .Ltmp ; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- $sgpr4_sgpr5 Index: llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -27,23 +27,23 @@ ; CHECK-LABEL: csr_vgpr_spill_fp_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: s_mov_b32 s18, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[16:17] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 ; CHECK-NEXT: v_writelane_b32 v1, s31, 1 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, callee_has_fp@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, callee_has_fp@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND @@ -54,7 +54,7 @@ ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s6 +; CHECK-NEXT: s_mov_b32 s33, s18 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: @@ -67,18 +67,28 @@ ; CHECK-LABEL: kernel_call: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7 -; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; CHECK-NEXT: s_add_u32 s0, s0, s7 +; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, csr_vgpr_spill_fp_callee@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, csr_vgpr_spill_fp_callee@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: v_writelane_b32 v3, s16, 0 +; CHECK-NEXT: s_mov_b32 s13, s15 +; CHECK-NEXT: s_mov_b32 s12, s14 +; CHECK-NEXT: v_readlane_b32 s14, v3, 0 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, csr_vgpr_spill_fp_callee@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, csr_vgpr_spill_fp_callee@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b32 s15, 20 +; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2 +; CHECK-NEXT: s_mov_b32 s15, 10 +; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1 +; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 +; CHECK-NEXT: ; implicit-def: $sgpr15 +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm bb: tail call fastcc void @csr_vgpr_spill_fp_callee() @@ -90,23 +100,23 @@ ; CHECK-LABEL: csr_vgpr_spill_fp_tailcall_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[16:17] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: v_writelane_b32 v1, s33, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, callee_has_fp@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, callee_has_fp@rel32@hi+12 ; CHECK-NEXT: v_readlane_b32 s33, v1, 0 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[18:19], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_setpc_b64 s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[18:19] +; CHECK-NEXT: s_setpc_b64 s[16:17] bb: call void asm sideeffect "; clobber csr v40", "~{v40}"() tail call fastcc void @callee_has_fp() @@ -117,18 +127,28 @@ ; CHECK-LABEL: kernel_tailcall: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7 -; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; CHECK-NEXT: s_add_u32 s0, s0, s7 +; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, csr_vgpr_spill_fp_tailcall_callee@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, csr_vgpr_spill_fp_tailcall_callee@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: v_writelane_b32 v3, s16, 0 +; CHECK-NEXT: s_mov_b32 s13, s15 +; CHECK-NEXT: s_mov_b32 s12, s14 +; CHECK-NEXT: v_readlane_b32 s14, v3, 0 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, csr_vgpr_spill_fp_tailcall_callee@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, csr_vgpr_spill_fp_tailcall_callee@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b32 s15, 20 +; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2 +; CHECK-NEXT: s_mov_b32 s15, 10 +; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1 +; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 +; CHECK-NEXT: ; implicit-def: $sgpr15 +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm bb: tail call fastcc void @csr_vgpr_spill_fp_tailcall_callee() @@ -152,29 +172,29 @@ ; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, s33 +; CHECK-NEXT: s_mov_b32 s18, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[16:17] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v1, s30, 0 ; CHECK-NEXT: v_writelane_b32 v1, s31, 1 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, tail_call@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, tail_call@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, tail_call@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, tail_call@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s6 +; CHECK-NEXT: s_mov_b32 s33, s18 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -186,29 +206,29 @@ ; CHECK-LABEL: caller_save_vgpr_spill_fp: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s7, s33 +; CHECK-NEXT: s_mov_b32 s19, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[16:17] ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v2, s30, 0 ; CHECK-NEXT: v_writelane_b32 v2, s31, 1 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, caller_save_vgpr_spill_fp_tail_call@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, caller_save_vgpr_spill_fp_tail_call@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, caller_save_vgpr_spill_fp_tail_call@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, caller_save_vgpr_spill_fp_tail_call@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s7 +; CHECK-NEXT: s_mov_b32 s33, s19 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -220,18 +240,28 @@ ; CHECK-LABEL: kernel: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7 -; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 -; CHECK-NEXT: s_add_u32 s0, s0, s7 +; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17 +; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, caller_save_vgpr_spill_fp@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, caller_save_vgpr_spill_fp@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] -; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] -; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] -; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: v_writelane_b32 v3, s16, 0 +; CHECK-NEXT: s_mov_b32 s13, s15 +; CHECK-NEXT: s_mov_b32 s12, s14 +; CHECK-NEXT: v_readlane_b32 s14, v3, 0 +; CHECK-NEXT: s_getpc_b64 s[16:17] +; CHECK-NEXT: s_add_u32 s16, s16, caller_save_vgpr_spill_fp@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s17, s17, caller_save_vgpr_spill_fp@rel32@hi+12 +; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3] +; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] +; CHECK-NEXT: s_mov_b32 s15, 20 +; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2 +; CHECK-NEXT: s_mov_b32 s15, 10 +; CHECK-NEXT: v_lshlrev_b32_e64 v1, s15, v1 +; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 +; CHECK-NEXT: ; implicit-def: $sgpr15 +; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] +; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm entry: %call = call i32 @caller_save_vgpr_spill_fp() Index: llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, i32 %in) #0 { ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND @@ -442,7 +442,7 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %in) #1 { ; GCN-LABEL: split_sgpr_spill_2_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND @@ -667,9 +667,9 @@ ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s54, -1 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 -; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: s_load_dword s0, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -902,9 +902,9 @@ ; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s54, -1 ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 -; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 +; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART Index: llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -7,9 +7,9 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 { ; GCN-LABEL: partial_no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_add_u32 s0, s0, s7 +; GCN-NEXT: s_add_u32 s0, s0, s15 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_load_dword s4, s[4:5], 0x2 +; GCN-NEXT: s_load_dword s4, s[8:9], 0x2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART Index: llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -16,11 +16,11 @@ ; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_mov_b32 s18, s33 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 +; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill @@ -135,21 +135,23 @@ ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v255, s30, 0 ; GCN-NEXT: v_writelane_b32 v255, s31, 1 +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:444 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, child_function@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, child_function@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GCN-NEXT: s_mov_b64 s[22:23], s[2:3] +; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] +; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] +; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s31, v255, 1 ; GCN-NEXT: v_readlane_b32 s30, v255, 0 ; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload @@ -264,10 +266,10 @@ ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_mov_b32 s33, s18 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) @@ -308,11 +310,11 @@ ; GCN-LABEL: spill_to_lowest_available_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_mov_b32 s18, s33 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 +; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill @@ -426,21 +428,23 @@ ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v254, s30, 0 ; GCN-NEXT: v_writelane_b32 v254, s31, 1 +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:440 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, child_function@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, child_function@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 +; GCN-NEXT: s_mov_b64 s[22:23], s[2:3] +; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] +; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] +; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s31, v254, 1 ; GCN-NEXT: v_readlane_b32 s30, v254, 0 ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload @@ -554,10 +558,10 @@ ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_mov_b32 s33, s18 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) @@ -1000,15 +1004,17 @@ ; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, child_function@gotpcrel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, child_function@gotpcrel32@hi+12 +; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 ; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -1121,7 +1127,7 @@ ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[16:17] %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, ptr addrspace(5) %alloca @@ -1506,7 +1512,7 @@ ; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s6, s33 +; GCN-NEXT: s_mov_b32 s18, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill @@ -1621,31 +1627,31 @@ ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 s[14:15], exec +; GCN-NEXT: s_mov_b64 s[26:27], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 ; GCN-NEXT: v_writelane_b32 v1, s30, 0 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[14:15] -; GCN-NEXT: s_mov_b64 s[12:13], exec +; GCN-NEXT: s_mov_b64 exec, s[26:27] +; GCN-NEXT: s_mov_b64 s[24:25], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: v_writelane_b32 v0, s31, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[12:13] -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: s_mov_b64 exec, s[24:25] +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra@rel32@hi+12 +; GCN-NEXT: s_mov_b64 s[22:23], s[2:3] +; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] +; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] +; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload @@ -1653,7 +1659,7 @@ ; GCN-NEXT: v_readlane_b32 s31, v1, 0 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 @@ -1776,7 +1782,7 @@ ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s6 +; GCN-NEXT: s_mov_b32 s33, s18 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] call void @child_function_ipra() @@ -2049,10 +2055,10 @@ ; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra_tail_call@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra_tail_call@rel32@hi+12 -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_getpc_b64 s[16:17] +; GCN-NEXT: s_add_u32 s16, s16, child_function_ipra_tail_call@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s17, s17, child_function_ipra_tail_call@rel32@hi+12 +; GCN-NEXT: s_setpc_b64 s[16:17] tail call void @child_function_ipra_tail_call() ret void } Index: llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll +++ llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll @@ -9,7 +9,7 @@ define amdgpu_kernel void @test_sopk_size(i32 %var.mode) { ; GFX10-LABEL: test_sopk_size: ; GFX10: ; %bb.0: -; GFX10: s_load_b32 s0, s[0:1], 0x0 +; GFX10: s_load_b32 s0, s[4:5], 0x0 ; GFX10: s_mov_b32 s1, 3 ; GFX10: s_setreg_b32 hwreg(HW_REG_MODE, 0, 2), s1 ; GFX10: s_waitcnt lgkmcnt(0) Index: llvm/test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -80,7 +80,7 @@ ; Force save and restore of m0 during SMEM spill ; GCN-LABEL: {{^}}m0_unavailable_spill: -; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}} +; GCN: s_load_dword [[REG0:s[0-9]+]], s[4:5], {{0x[0-9]+}} ; GCN: ; def m0, 1 Index: llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK-LABEL: __omp_offloading_16_dd2df_main_l9: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: s_add_u32 s0, s0, s7 +; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, v0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 Index: llvm/test/CodeGen/AMDGPU/wwm-reserved.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -105,22 +105,24 @@ ; GFX9-LABEL: {{^}}call: define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] -; GFX9-O0-DAG: s_mov_b32 s4, 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v2, [[ARG]] +; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}} +; GFX9-O0-DAG: v_mov_b32_e32 v{{[0-9]+}}, [[ARG]] ; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) -; GFX9: v_mov_b32_e32 v0, v2 +; GFX9-O0: v_mov_b32_e32 v0, v6 +; GFX9-O3: v_mov_b32_e32 v0, v2 ; GFX9: s_swappc_b64 %tmp134 = call i32 @called(i32 %tmp107) -; GFX9: v_mov_b32_e32 v1, v0 +; GFX9-O3: v_mov_b32_e32 v1, v0 ; GFX9-O3: v_add_u32_e32 v1, v1, v2 -; GFX9-O0: v_add_u32_e64 v1, v1, v2 +; GFX9-O0: v_mov_b32_e32 v3, v0 +; GFX9-O0: v_add_u32_e64 v3, v3, v6 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) ; GFX9: buffer_store_dword v0 @@ -298,22 +300,24 @@ ; GFX9-LABEL: {{^}}strict_wwm_call: define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] -; GFX9-O0-DAG: s_mov_b32 s4, 0{{$}} -; GFX9-O0-DAG: v_mov_b32_e32 v2, [[ARG]] +; GFX9-O0-DAG: s_mov_b32 s3, 0{{$}} +; GFX9-O0-DAG: v_mov_b32_e32 v6, [[ARG]] ; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) -; GFX9: v_mov_b32_e32 v0, v2 +; GFX9-O3: v_mov_b32_e32 v0, v2 +; GFX9-O0: v_mov_b32_e32 v0, v6 ; GFX9: s_swappc_b64 %tmp134 = call i32 @strict_wwm_called(i32 %tmp107) -; GFX9: v_mov_b32_e32 v1, v0 +; GFX9-O3: v_mov_b32_e32 v1, v0 ; GFX9-O3: v_add_u32_e32 v1, v1, v2 -; GFX9-O0: v_add_u32_e64 v1, v1, v2 +; GFX9-O0: v_mov_b32_e32 v3, v0 +; GFX9-O0: v_add_u32_e64 v3, v3, v6 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp136) ; GFX9: buffer_store_dword v0