diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -173,12 +173,6 @@ "fast", "fast register allocator", createFastVGPRRegisterAllocator); } -static cl::opt EnableSROA( - "amdgpu-sroa", - cl::desc("Run SROA after promote alloca pass"), - cl::ReallyHidden, - cl::init(true)); - static cl::opt EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), @@ -1011,8 +1005,6 @@ if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createAMDGPUPromoteAlloca()); - if (EnableSROA) - addPass(createSROAPass()); if (isPassEnabled(EnableScalarIRPasses)) addStraightLineScalarOptimizationPasses(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; This is a copy of sibling-call.ll, but stops after the IRTranslator. define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll --- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}store_fi_lifetime: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -2,10 +2,10 @@ ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-SICIVI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-SICIVI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-scalarize-global-loads=false -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-scalarize-global-loads=false < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/CodeGen/AMDGPU/extload-private.ll b/llvm/test/CodeGen/AMDGPU/extload-private.ll --- a/llvm/test/CodeGen/AMDGPU/extload-private.ll +++ b/llvm/test/CodeGen/AMDGPU/extload-private.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}load_i8_sext_private: ; SI: buffer_load_sbyte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca,+enable-flat-scratch -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-MUBUF,MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca,+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11 %s ; Test that non-entry function frame indices are expanded properly to diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Kernels are not called, so there is no call preserved mask. ; GCN-LABEL: {{^}}kernel: diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -189,7 +189,6 @@ ; GCN-O1-NEXT: Expand Atomic instructions ; GCN-O1-NEXT: AMDGPU Promote Alloca ; GCN-O1-NEXT: Dominator Tree Construction -; GCN-O1-NEXT: SROA ; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: Uniformity Analysis ; GCN-O1-NEXT: AMDGPU IR optimizations @@ -461,7 +460,6 @@ ; GCN-O1-OPTS-NEXT: Expand Atomic instructions ; GCN-O1-OPTS-NEXT: AMDGPU Promote Alloca ; GCN-O1-OPTS-NEXT: Dominator Tree Construction -; GCN-O1-OPTS-NEXT: SROA ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis @@ -755,7 +753,6 @@ ; GCN-O2-NEXT: Expand Atomic instructions ; GCN-O2-NEXT: AMDGPU Promote Alloca ; GCN-O2-NEXT: Dominator Tree Construction -; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O2-NEXT: Scalar Evolution Analysis @@ -1059,7 +1056,6 @@ ; GCN-O3-NEXT: Expand Atomic instructions ; GCN-O3-NEXT: AMDGPU Promote Alloca ; GCN-O3-NEXT: Dominator Tree Construction -; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O3-NEXT: Scalar Evolution Analysis diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX803 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900-FLATSCR %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX803 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900-FLATSCR %s define <2 x i16> @load_local_lo_hi_v2i16_multi_use_lo(ptr addrspace(3) noalias %in) #0 { ; GFX900-LABEL: load_local_lo_hi_v2i16_multi_use_lo: @@ -2705,4 +2705,3 @@ } attributes #0 = { nounwind } - diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll --- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GFX900-MUBUF %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck --check-prefix=GFX803 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX900,GFX900-FLATSCR %s define <2 x i16> @load_local_lo_v2i16_undeflo(ptr addrspace(3) %in) #0 { ; GFX900-LABEL: load_local_lo_v2i16_undeflo: diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Test calls when called by other callable functions rather than ; kernels. diff --git a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll --- a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll +++ b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck %s ; ; CFG flattening should use parallel-and mode to generate branch conditions and ; then merge if-regions with the same bodies. diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll --- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -63,8 +63,8 @@ ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 } ; REMARK-NEXT: Function: test_kernel ; REMARK-NEXT: Args: -; REMARK-NEXT: - String: ' Dynamic Stack: -; REMARK-NEXT: - DynamicStack: 'False' +; REMARK-NEXT: - String: ' Dynamic Stack: +; REMARK-NEXT: - DynamicStack: 'False' ; REMARK-NEXT: .. ; REMARK-NEXT: --- !Analysis ; REMARK-NEXT: Pass: kernel-resource-usage @@ -179,7 +179,7 @@ ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39 ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32 ; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10 -; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64 +; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 ; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True ; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 @@ -187,7 +187,7 @@ ; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) - + define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { %alloca = alloca <10 x i64>, align 16, addrspace(5) call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s target datalayout = "A5" ; FIXME: Why is this commuted only sometimes? diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-MUBUF %s -; RxN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-MUBUF %s +; RxN: llc -march=amdgcn -mcpu=gfx906 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s ; GCN-LABEL: {{^}}store_global_hi_v2i16: ; GCN: s_waitcnt diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll @@ -1,31 +1,31 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s define i64 @i64_test(i64 %i) nounwind readnone { - %loc = alloca i64 - %j = load i64, i64 * %loc + %loc = alloca i64, addrspace(5) + %j = load i64, ptr addrspace(5) %loc %r = add i64 %i, %j ret i64 %r } define i64 @i32_test(i32 %i) nounwind readnone { - %loc = alloca i32 - %j = load i32, i32 * %loc + %loc = alloca i32, addrspace(5) + %j = load i32, ptr addrspace(5) %loc %r = add i32 %i, %j %ext = zext i32 %r to i64 ret i64 %ext } define i64 @i16_test(i16 %i) nounwind readnone { - %loc = alloca i16 - %j = load i16, i16 * %loc + %loc = alloca i16, addrspace(5) + %j = load i16, ptr addrspace(5) %loc %r = add i16 %i, %j %ext = zext i16 %r to i64 ret i64 %ext } define i64 @i8_test(i8 %i) nounwind readnone { - %loc = alloca i8 - %j = load i8, i8 * %loc + %loc = alloca i8, addrspace(5) + %j = load i8, ptr addrspace(5) %loc %r = add i8 %i, %j %ext = zext i8 %r to i64 ret i64 %ext diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected @@ -5,10 +5,15 @@ ; CHECK-LABEL: i64_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 +; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] - %loc = alloca i64 - %j = load i64, i64 * %loc + %loc = alloca i64, addrspace(5) + %j = load i64, ptr addrspace(5) %loc %r = add i64 %i, %j ret i64 %r } @@ -17,11 +22,13 @@ ; CHECK-LABEL: i32_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %loc = alloca i32 - %j = load i32, i32 * %loc + %loc = alloca i32, addrspace(5) + %j = load i32, ptr addrspace(5) %loc %r = add i32 %i, %j %ext = zext i32 %r to i64 ret i64 %ext @@ -31,11 +38,14 @@ ; CHECK-LABEL: i16_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: buffer_load_ushort v1, off, s[0:3], s32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %loc = alloca i16 - %j = load i16, i16 * %loc + %loc = alloca i16, addrspace(5) + %j = load i16, ptr addrspace(5) %loc %r = add i16 %i, %j %ext = zext i16 %r to i64 ret i64 %ext @@ -45,11 +55,14 @@ ; CHECK-LABEL: i8_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] - %loc = alloca i8 - %j = load i8, i8 * %loc + %loc = alloca i8, addrspace(5) + %j = load i8, ptr addrspace(5) %loc %r = add i8 %i, %j %ext = zext i8 %r to i64 ret i64 %ext diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected @@ -70,10 +70,40 @@ ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, s33 +; CHECK-NEXT: s_mov_b32 s8, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_addk_i32 s32, 0x600 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v2, 3 +; CHECK-NEXT: v_mov_b32_e32 v3, 4 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz .LBB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 +; CHECK-NEXT: .LBB0_2: ; %Flow +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; CHECK-NEXT: s_cbranch_execz .LBB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_mov_b32 s33, s4 +; CHECK-NEXT: s_addk_i32 s32, 0xfa00 +; CHECK-NEXT: s_mov_b32 s33, s8 +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; ; CHECK-LABEL: main: @@ -84,16 +114,31 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_addk_i32 s32, 0x600 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v2, 1 +; CHECK-NEXT: v_mov_b32_e32 v3, 2 +; CHECK-NEXT: v_mov_b32_e32 v4, 3 +; CHECK-NEXT: v_mov_b32_e32 v5, 4 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: flat_store_dword v[0:1], v2 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:12 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 +; CHECK-NEXT: s_addk_i32 s32, 0xfa00 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected @@ -11,10 +11,40 @@ ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, s33 +; CHECK-NEXT: s_mov_b32 s8, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_addk_i32 s32, 0x600 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: v_mov_b32_e32 v1, 2 +; CHECK-NEXT: v_mov_b32_e32 v2, 3 +; CHECK-NEXT: v_mov_b32_e32 v3, 4 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz .LBB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 +; CHECK-NEXT: .LBB0_2: ; %Flow +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; CHECK-NEXT: s_cbranch_execz .LBB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_mov_b32_e32 v0, 1 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_mov_b32 s33, s4 +; CHECK-NEXT: s_addk_i32 s32, 0xfa00 +; CHECK-NEXT: s_mov_b32 s33, s8 +; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] %1 = alloca i32, align 4, addrspace(5) %2 = alloca i32, align 4, addrspace(5) @@ -61,16 +91,31 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_addk_i32 s32, 0x600 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v2, 1 +; CHECK-NEXT: v_mov_b32_e32 v3, 2 +; CHECK-NEXT: v_mov_b32_e32 v4, 3 +; CHECK-NEXT: v_mov_b32_e32 v5, 4 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:12 +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 ; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: flat_store_dword v[0:1], v2 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:12 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:16 +; CHECK-NEXT: s_addk_i32 s32, 0xfa00 ; CHECK-NEXT: s_mov_b32 s33, s6 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected @@ -3,12 +3,20 @@ define i64 @i64_test(i64 %i) nounwind readnone { ; CHECK-LABEL: i64_test: -; CHECK: SelectionDAG has 9 nodes: +; CHECK: SelectionDAG has 25 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $vgpr0, IMPLICIT_DEF:i32 -; CHECK-NEXT: t17: i32 = V_MOV_B32_e32 TargetConstant:i32<0> -; CHECK-NEXT: t13: ch,glue = CopyToReg t11, Register:i32 $vgpr1, t17, t11:1 -; CHECK-NEXT: t14: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t13, t13:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %1 +; CHECK-NEXT: t49: i64 = REG_SEQUENCE # D:1 TargetConstant:i32<53>, t2, TargetConstant:i32<3>, t4, TargetConstant:i32<11> +; CHECK-NEXT: t26: i32,ch = BUFFER_LOAD_DWORD_OFFEN TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 +; CHECK-NEXT: t29: i32,ch = BUFFER_LOAD_DWORD_OFFEN TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<4>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 +; CHECK-NEXT: t32: v2i32 = REG_SEQUENCE # D:1 TargetConstant:i32<53>, t26, TargetConstant:i32<3>, t29, TargetConstant:i32<11> +; CHECK-NEXT: t10: i64 = V_ADD_U64_PSEUDO # D:1 t49, t32 +; CHECK-NEXT: t23: i32 = EXTRACT_SUBREG # D:1 t10, TargetConstant:i32<3> +; CHECK-NEXT: t16: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t23 +; CHECK-NEXT: t38: i32 = EXTRACT_SUBREG # D:1 t10, TargetConstant:i32<11> +; CHECK-NEXT: t18: ch,glue = CopyToReg # D:1 t16, Register:i32 $vgpr1, t38, t16:1 +; CHECK-NEXT: t19: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t18, t18:1 ; CHECK-EMPTY: %loc = alloca i64, addrspace(5) %j = load i64, ptr addrspace(5) %loc @@ -18,12 +26,15 @@ define i64 @i32_test(i32 %i) nounwind readnone { ; CHECK-LABEL: i32_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 15 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t6: i32,ch = BUFFER_LOAD_DWORD_OFFEN TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 +; CHECK-NEXT: t7: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t6, TargetConstant:i1<0> +; CHECK-NEXT: t14: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t7 +; CHECK-NEXT: t22: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t16: ch,glue = CopyToReg # D:1 t14, Register:i32 $vgpr1, t22, t14:1 +; CHECK-NEXT: t17: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t16, t16:1 ; CHECK-EMPTY: %loc = alloca i32, addrspace(5) %j = load i32, ptr addrspace(5) %loc @@ -34,12 +45,17 @@ define i64 @i16_test(i16 %i) nounwind readnone { ; CHECK-LABEL: i16_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 18 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t19: i32,ch = BUFFER_LOAD_USHORT_OFFEN TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 +; CHECK-NEXT: t20: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t19, TargetConstant:i1<0> +; CHECK-NEXT: t24: i32 = S_MOV_B32 TargetConstant:i32<65535> +; CHECK-NEXT: t25: i32 = V_AND_B32_e64 # D:1 t20, t24 +; CHECK-NEXT: t15: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t25 +; CHECK-NEXT: t31: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t17: ch,glue = CopyToReg # D:1 t15, Register:i32 $vgpr1, t31, t15:1 +; CHECK-NEXT: t18: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t17, t17:1 ; CHECK-EMPTY: %loc = alloca i16, addrspace(5) %j = load i16, ptr addrspace(5) %loc @@ -50,12 +66,17 @@ define i64 @i8_test(i8 %i) nounwind readnone { ; CHECK-LABEL: i8_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 18 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t19: i32,ch = BUFFER_LOAD_UBYTE_OFFEN TargetFrameIndex:i32<0>, Register:v4i32 $sgpr0_sgpr1_sgpr2_sgpr3, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i32<0>, TargetConstant:i1<0>, t0 +; CHECK-NEXT: t20: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t19, TargetConstant:i1<0> +; CHECK-NEXT: t24: i32 = S_MOV_B32 TargetConstant:i32<255> +; CHECK-NEXT: t25: i32 = V_AND_B32_e64 # D:1 t20, t24 +; CHECK-NEXT: t15: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t25 +; CHECK-NEXT: t31: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t17: ch,glue = CopyToReg # D:1 t15, Register:i32 $vgpr1, t31, t15:1 +; CHECK-NEXT: t18: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t17, t17:1 ; CHECK-EMPTY: %loc = alloca i8, addrspace(5) %j = load i8, ptr addrspace(5) %loc