Index: clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c =================================================================== --- clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c +++ clang/test/CodeGen/LoongArch/inline-asm-gcc-regs.c @@ -4,98 +4,98 @@ /// Check GCC register names and alias can be used in register variable definition. // CHECK-LABEL: @test_r0 -// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r0}"(i32 0) void test_r0() { register int a asm ("$r0"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_r12 -// CHECK: call void asm sideeffect "", "{$r12}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r12}"(i32 0) void test_r12() { register int a asm ("$r12"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_r31 -// CHECK: call void asm sideeffect "", "{$r31}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r31}"(i32 0) void test_r31() { register int a asm ("$r31"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_zero -// CHECK: call void asm sideeffect "", "{$r0}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r0}"(i32 0) void test_zero() { register int a asm ("$zero"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_a0 -// CHECK: call void asm sideeffect "", "{$r4}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r4}"(i32 0) void test_a0() { register int a asm ("$a0"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_t1 -// CHECK: call void asm sideeffect "", "{$r13}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r13}"(i32 0) void test_t1() { register int a asm ("$t1"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_fp -// CHECK: call void asm sideeffect "", "{$r22}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r22}"(i32 0) void test_fp() { register int a asm ("$fp"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_s2 -// CHECK: call void asm sideeffect "", "{$r25}"(i32 undef) +// CHECK: call void asm sideeffect "", "{$r25}"(i32 0) void test_s2() { register int a asm ("$s2"); asm ("" :: "r" (a)); } // CHECK-LABEL: @test_f0 -// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +// CHECK: call void asm sideeffect "", "{$f0}"(float 0.000000e+00) void test_f0() { register float a asm ("$f0"); asm ("" :: "f" (a)); } // CHECK-LABEL: @test_f14 -// CHECK: call void asm sideeffect "", "{$f14}"(float undef) +// CHECK: call void asm sideeffect "", "{$f14}"(float 0.000000e+00) void test_f14() { register float a asm ("$f14"); asm ("" :: "f" (a)); } // CHECK-LABEL: @test_f31 -// CHECK: call void asm sideeffect "", "{$f31}"(float undef) +// CHECK: call void asm sideeffect "", "{$f31}"(float 0.000000e+00) void test_f31() { register float a asm ("$f31"); asm ("" :: "f" (a)); } // CHECK-LABEL: @test_fa0 -// CHECK: call void asm sideeffect "", "{$f0}"(float undef) +// CHECK: call void asm sideeffect "", "{$f0}"(float 0.000000e+00) void test_fa0() { register float a asm ("$fa0"); asm ("" :: "f" (a)); } // CHECK-LABEL: @test_ft1 -// CHECK: call void asm sideeffect "", "{$f9}"(float undef) +// CHECK: call void asm sideeffect "", "{$f9}"(float 0.000000e+00) void test_ft1() { register float a asm ("$ft1"); asm ("" :: "f" (a)); } // CHECK-LABEL: @test_fs2 -// CHECK: call void asm sideeffect "", "{$f26}"(float undef) +// CHECK: call void asm sideeffect "", "{$f26}"(float 0.000000e+00) void test_fs2() { register float a asm ("$fs2"); asm ("" :: "f" (a)); Index: clang/test/CodeGenCXX/return.cpp =================================================================== --- clang/test/CodeGenCXX/return.cpp +++ clang/test/CodeGenCXX/return.cpp @@ -13,14 +13,14 @@ // CHECK-OPT: unreachable // -fno-strict-return should not emit trap + unreachable but it should return - // an undefined value instead. + // an undefined value instead. At opt, this is optimized to 0. // CHECK-NOSTRICT: alloca // CHECK-NOSTRICT-NEXT: load // CHECK-NOSTRICT-NEXT: ret i32 // CHECK-NOSTRICT-NEXT: } - // CHECK-NOSTRICT-OPT: ret i32 undef + // CHECK-NOSTRICT-OPT: ret i32 0 } enum Enum { Index: clang/test/CodeGenOpenCL/overload.cl =================================================================== --- clang/test/CodeGenOpenCL/overload.cl +++ clang/test/CodeGenOpenCL/overload.cl @@ -21,18 +21,18 @@ generic int *generic *gengen; generic int *local *genloc; generic int *global *genglob; - // CHECK-DAG: call spir_func void @_Z3fooPU3AS1iS0_(i32 addrspace(1)* noundef undef, i32 addrspace(1)* noundef undef) + // CHECK-DAG: call spir_func void @_Z3fooPU3AS1iS0_(i32 addrspace(1)* noundef null, i32 addrspace(1)* noundef null) foo(a, b); - // CHECK-DAG: call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* noundef undef, i32 addrspace(4)* noundef undef) + // CHECK-DAG: tail call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* noundef addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*), i32 addrspace(4)* noundef null) foo(b, c); - // CHECK-DAG: call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* noundef undef, i32 addrspace(4)* noundef undef) + // CHECK-DAG: tail call spir_func void @_Z3fooPU3AS4iS0_(i32 addrspace(4)* noundef addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*), i32 addrspace(4)* noundef addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)) foo(a, d); - // CHECK-DAG: call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* noundef undef, i32 addrspace(4)* addrspace(4)* noundef undef) + // CHECK-DAG: tail call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* noundef null, i32 addrspace(4)* addrspace(4)* noundef addrspacecast (i32 addrspace(4)* addrspace(3)* null to i32 addrspace(4)* addrspace(4)*)) bar(gengen, genloc); - // CHECK-DAG: call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* noundef undef, i32 addrspace(4)* addrspace(4)* noundef undef) + // CHECK-DAG: tail call spir_func void @_Z3barPU3AS4PU3AS4iS2_(i32 addrspace(4)* addrspace(4)* noundef null, i32 addrspace(4)* addrspace(4)* noundef addrspacecast (i32 addrspace(4)* addrspace(1)* null to i32 addrspace(4)* addrspace(4)*)) bar(gengen, genglob); - // CHECK-DAG: call spir_func void @_Z3barPU3AS1PU3AS4iS2_(i32 addrspace(4)* addrspace(1)* noundef undef, i32 addrspace(4)* addrspace(1)* noundef undef) + // CHECK-DAG: tail call spir_func void @_Z3barPU3AS1PU3AS4iS2_(i32 addrspace(4)* addrspace(1)* noundef null, i32 addrspace(4)* addrspace(1)* noundef null) bar(genglob, genglob); } Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -409,8 +410,8 @@ addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); - LI->eraseFromParent(); LBI.deleteValue(LI); + LI->eraseFromParent(); } // Finally, after the scan, check to see if the store is all that is left. @@ -429,8 +430,8 @@ } } // Remove the (now dead) store and alloca. - Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); + Info.OnlyStore->eraseFromParent(); AI->eraseFromParent(); return true; @@ -474,6 +475,8 @@ // binary search. llvm::sort(StoresByIndex, less_first()); + Value *FI = nullptr; + // Walk all of the loads from this alloca, replacing them with the nearest // store above them, if any. for (User *U : make_early_inc_range(AI->users())) { @@ -490,10 +493,19 @@ less_first()); Value *ReplVal; if (I == StoresByIndex.begin()) { - if (StoresByIndex.empty()) + if (StoresByIndex.empty()) { // If there are no stores, the load takes the undef value. - ReplVal = UndefValue::get(LI->getType()); - else + if (LI->use_empty()) + ReplVal = UndefValue::get(LI->getType()); + else { + // use a frozen undef value so that multiple loads of this alloca + // will compare properly. + if (!FI) + FI = IRBuilder<>(AI).CreateFreeze( + UndefValue::get(AI->getAllocatedType()), AI->getName() + ".fr"); + ReplVal = FI; + } + } else // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; @@ -515,8 +527,8 @@ ReplVal = PoisonValue::get(LI->getType()); LI->replaceAllUsesWith(ReplVal); - LI->eraseFromParent(); LBI.deleteValue(LI); + LI->eraseFromParent(); } // Remove the (now dead) stores and alloca. @@ -529,8 +541,8 @@ ConvertDebugDeclareToDebugValue(DII, SI, DIB); } } - SI->eraseFromParent(); LBI.deleteValue(SI); + SI->eraseFromParent(); } AI->eraseFromParent(); @@ -955,6 +967,13 @@ if (AI == AllocaLookup.end()) continue; + if (!LI->use_empty() && isa(IncomingVals[AI->second])) + // Freeze the undef value so that if there are multiple loads of this + // alloca, they will still compare properly. + IncomingVals[AI->second] = IRBuilder<>(Src).CreateFreeze( + UndefValue::get(Src->getAllocatedType()), + Src->getName() + ".fr"); + Value *V = IncomingVals[AI->second]; // If the load was marked as nonnull we don't want to lose Index: llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll +++ llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll @@ -38,12 +38,17 @@ ; GCN-NOT: v_movrel ; GCN-NOT: buffer_ -; GCN-NOT: v_cmp_ ; GCN-NOT: v_cndmask_ -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 -; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] -; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] -; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[ONE]] +; GCN: v_cndmask_b32_e64 [[ONE:v[0-9]+]], 2, 1, vcc +; GCN: v_cmp_gt_u32_e32 vcc, 3, v1 +; GCN: v_cndmask_b32_e32 v0, 0, [[ONE]], vcc +; GCN: v_cmp_ne_u32_e32 vcc, 3, [[ONE]] +; GCN: v_cndmask_b32_e32 v3, 1.0, [[ONE]], vcc +; GCN: v_cmp_ne_u32_e32 vcc, 2, [[ONE]] +; GCN: v_cndmask_b32_e32 v2, 1.0, [[ONE]], vcc +; GCN: v_cmp_ne_u32_e32 vcc, 1, [[ONE]] +; GCN: v_cndmask_b32_e32 v1, 1.0, [[ONE]], vcc +; GCN: v_cmp_ne_u32_e32 vcc, 0, [[ONE]] ; GCN: store_dwordx4 v{{.+}}, ; OPT: %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %alloca, i32 0, i32 %sel2 @@ -103,7 +108,7 @@ ; OPT-LABEL: define amdgpu_kernel void @half4_alloca_load4 ; GCN-NOT: buffer_ -; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff +; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff ; OPT: %gep = getelementptr inbounds <4 x half>, <4 x half> addrspace(5)* %alloca, i32 0, i32 %sel2 ; OPT: %0 = load <4 x half>, <4 x half> addrspace(5)* %alloca @@ -162,7 +167,7 @@ ; OPT-LABEL: define amdgpu_kernel void @short4_alloca_load4 ; GCN-NOT: buffer_ -; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff +; GCN: s_mov_b64 s[{{[0-9:]+}}], 0xffff ; OPT: %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(5)* %alloca, i32 0, i32 %sel2 ; OPT: %0 = load <4 x i16>, <4 x i16> addrspace(5)* %alloca @@ -191,7 +196,6 @@ ; OPT-LABEL: define i64 @ptr_alloca_bitcast ; GCN-NOT: buffer_ -; GCN: v_mov_b32_e32 v1, 0 ; OPT: %private_iptr = alloca <2 x i32>, align 8, addrspace(5) ; OPT: %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)* Index: llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll +++ llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll @@ -5,7 +5,7 @@ ; OPT-LABEL: @alloca_8xi64_max1024( ; OPT-NOT: alloca -; OPT: <8 x i64> +; OPT-NOT: <8 x i64> ; LIMIT32: alloca ; LIMIT32-NOT: <8 x i64> define amdgpu_kernel void @alloca_8xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 { @@ -37,7 +37,7 @@ ; OPT-LABEL: @alloca_16xi64_max512( ; OPT-NOT: alloca -; OPT: <16 x i64> +; OPT-NOT: <16 x i64> ; LIMIT32: alloca ; LIMIT32-NOT: <16 x i64> define amdgpu_kernel void @alloca_16xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 { @@ -85,7 +85,7 @@ ; OPT-LABEL: @alloca_9xi128_max256( ; OPT-NOT: alloca -; OPT: <9 x i128> +; OPT-NOT: <9 x i128> ; LIMIT32: alloca ; LIMIT32-NOT: <9 x i128> define amdgpu_kernel void @alloca_9xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 { @@ -101,7 +101,7 @@ ; OPT-LABEL: @alloca_16xi128_max256( ; OPT-NOT: alloca -; OPT: <16 x i128> +; OPT-NOT: <16 x i128> ; LIMIT32: alloca ; LIMIT32-NOT: <16 x i128> define amdgpu_kernel void @alloca_16xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 { @@ -133,7 +133,7 @@ ; OPT-LABEL: @alloca_9xi64_max256( ; OPT-NOT: alloca -; OPT: <9 x i64> +; OPT-NOT: <9 x i64> ; LIMIT32: alloca ; LIMIT32-NOT: <9 x i64> define amdgpu_kernel void @alloca_9xi64_max256(i64 addrspace(1)* %out, i32 %index) #2 { Index: llvm/test/Transforms/Mem2Reg/pr24179.ll =================================================================== --- llvm/test/Transforms/Mem2Reg/pr24179.ll +++ llvm/test/Transforms/Mem2Reg/pr24179.ll @@ -39,9 +39,10 @@ define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[C:%.*]] = call i1 @use(i32 undef) +; CHECK-NEXT: [[C:%.*]] = call i1 @use(i32 [[FR1]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll =================================================================== --- llvm/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll +++ llvm/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll @@ -95,9 +95,10 @@ define float* @no_store_single_load() { ; CHECK-LABEL: @no_store_single_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne float* undef, null +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze float* +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne float* [[FR1]], null ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; CHECK-NEXT: ret float* undef +; CHECK-NEXT: ret float* [[FR1]] ; entry: %buf = alloca float* @@ -108,15 +109,17 @@ define float* @no_store_multiple_loads(i1 %c) { ; CHECK-LABEL: @no_store_multiple_loads( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze float* +; CHECK-NEXT: [[FR3:%.*\.fr.*]] = freeze float* ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ne float* undef, null +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne float* [[FR2]], null ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) -; CHECK-NEXT: ret float* undef +; CHECK-NEXT: ret float* [[FR2]] ; CHECK: else: -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne float* undef, null +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne float* [[FR3]], null ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) -; CHECK-NEXT: ret float* undef +; CHECK-NEXT: ret float* [[FR3]] ; entry: %buf = alloca float* Index: llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll +++ llvm/test/Transforms/PhaseOrdering/X86/nancvt.ll @@ -42,7 +42,7 @@ ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 ; CHECK-NEXT: store volatile i32 2147228864, i32* @var, align 4 -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret i32 0 ; entry: %retval = alloca i32, align 4 Index: llvm/test/Transforms/SROA/address-spaces.ll =================================================================== --- llvm/test/Transforms/SROA/address-spaces.ll +++ llvm/test/Transforms/SROA/address-spaces.ll @@ -50,6 +50,7 @@ define void @copy_struct([5 x i64] %in.coerce, ptr addrspace(1) align 4 %ptr) { ; CHECK-LABEL: @copy_struct( ; CHECK-NEXT: for.end: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: [[IN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE:%.*]], 0 ; CHECK-NEXT: [[IN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 1 ; CHECK-NEXT: [[IN_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 2 @@ -60,7 +61,7 @@ ; CHECK-NEXT: [[IN_SROA_4_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i16 4 ; CHECK-NEXT: store i64 [[IN_COERCE_FCA_3_EXTRACT]], ptr addrspace(1) [[IN_SROA_4_20_PTR_SROA_IDX]], align 4 ; CHECK-NEXT: [[IN_SROA_5_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i16 12 -; CHECK-NEXT: store i32 undef, ptr addrspace(1) [[IN_SROA_5_20_PTR_SROA_IDX]], align 4 +; CHECK-NEXT: store i32 [[FR1]], ptr addrspace(1) [[IN_SROA_5_20_PTR_SROA_IDX]], align 4 ; CHECK-NEXT: ret void ; for.end: Index: llvm/test/Transforms/SROA/addrspacecast.ll =================================================================== --- llvm/test/Transforms/SROA/addrspacecast.ll +++ llvm/test/Transforms/SROA/addrspacecast.ll @@ -255,8 +255,9 @@ define void @select_addrspacecast_gv(i1 %a, i1 %b) { ; CHECK-LABEL: @select_addrspacecast_gv( +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i64 undef ; CHECK-NEXT: [[COND_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i64, ptr addrspace(1) @gv, align 8 -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 undef, i64 [[COND_SROA_SPECULATE_LOAD_FALSE]] +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 [[FR1]], i64 [[COND_SROA_SPECULATE_LOAD_FALSE]] ; CHECK-NEXT: ret void ; %c = alloca i64, align 8 @@ -270,8 +271,9 @@ define void @select_addrspacecast_gv_constexpr(i1 %a, i1 %b) { ; CHECK-LABEL: @select_addrspacecast_gv_constexpr( +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze i64 undef ; CHECK-NEXT: [[COND_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i64, ptr addrspace(2) addrspacecast (ptr addrspace(1) @gv to ptr addrspace(2)), align 8 -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 undef, i64 [[COND_SROA_SPECULATE_LOAD_FALSE]] +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 [[FR2]], i64 [[COND_SROA_SPECULATE_LOAD_FALSE]] ; CHECK-NEXT: ret void ; %c = alloca i64, align 8 @@ -285,7 +287,9 @@ define i8 @select_addrspacecast_i8(i1 %c) { ; CHECK-LABEL: @select_addrspacecast_i8( -; CHECK-NEXT: [[RET_SROA_SPECULATED:%.*]] = select i1 [[C:%.*]], i8 undef, i8 undef +; CHECK-NEXT: [[FR3:%.*\.fr.*]] = freeze i8 undef +; CHECK-NEXT: [[FR4:%.*\.fr.*]] = freeze i8 undef +; CHECK-NEXT: [[RET_SROA_SPECULATED:%.*]] = select i1 [[C:%.*]], i8 [[FR3]], i8 [[FR4]] ; CHECK-NEXT: ret i8 [[RET_SROA_SPECULATED]] ; %a = alloca i8 Index: llvm/test/Transforms/SROA/alloca-address-space.ll =================================================================== --- llvm/test/Transforms/SROA/alloca-address-space.ll +++ llvm/test/Transforms/SROA/alloca-address-space.ll @@ -49,6 +49,7 @@ define void @copy_struct([5 x i64] %in.coerce, ptr addrspace(1) align 4 %ptr) { ; CHECK-LABEL: @copy_struct( ; CHECK-NEXT: for.end: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: [[IN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE:%.*]], 0 ; CHECK-NEXT: [[IN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 1 ; CHECK-NEXT: [[IN_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 2 @@ -59,7 +60,7 @@ ; CHECK-NEXT: [[IN_SROA_4_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i16 4 ; CHECK-NEXT: store i64 [[IN_COERCE_FCA_3_EXTRACT]], ptr addrspace(1) [[IN_SROA_4_20_PTR_SROA_IDX]], align 4 ; CHECK-NEXT: [[IN_SROA_5_20_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i16 12 -; CHECK-NEXT: store i32 undef, ptr addrspace(1) [[IN_SROA_5_20_PTR_SROA_IDX]], align 4 +; CHECK-NEXT: store i32 [[FR1]], ptr addrspace(1) [[IN_SROA_5_20_PTR_SROA_IDX]], align 4 ; CHECK-NEXT: ret void ; for.end: Index: llvm/test/Transforms/SROA/basictest.ll =================================================================== --- llvm/test/Transforms/SROA/basictest.ll +++ llvm/test/Transforms/SROA/basictest.ll @@ -1087,7 +1087,8 @@ ; via integers can be promoted away. ; CHECK-LABEL: @PR14059.1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double undef to i64 +; CHECK-NEXT: [[FR3:%.*\.fr.*]] = freeze double undef +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[FR3]] to i64 ; CHECK-NEXT: [[X_SROA_0_I_0_INSERT_MASK:%.*]] = and i64 [[TMP0]], -4294967296 ; CHECK-NEXT: [[X_SROA_0_I_0_INSERT_INSERT:%.*]] = or i64 [[X_SROA_0_I_0_INSERT_MASK]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[X_SROA_0_I_0_INSERT_INSERT]] to double @@ -1296,8 +1297,9 @@ ; alloca (relying on the alloc size padding) doesn't trigger an assert. ; CHECK-LABEL: @PR14572.2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR5:%.*\.fr.*]] = freeze i8 undef ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> [[X:%.*]] to i24 -; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 [[FR5]] to i32 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 24 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 16777215 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] @@ -1423,7 +1425,8 @@ define void @PR15805(i1 %a, i1 %b) { ; CHECK-LABEL: @PR15805( -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 undef, i64 undef +; CHECK-NEXT: [[FR6:%.*\.fr.*]] = freeze i64 undef +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 [[FR6]], i64 [[FR6]] ; CHECK-NEXT: ret void ; @@ -1440,9 +1443,10 @@ ; order in which the uses of the alloca are visited. ; ; CHECK-LABEL: @PR15805.1( +; CHECK-NEXT: [[FR7:%.*\.fr.*]] = freeze i64 undef ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[A:%.*]], i64 undef, i64 undef +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[A:%.*]], i64 [[FR7]], i64 [[FR7]] ; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP:%.*]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -1663,9 +1667,10 @@ ; CHECK-LABEL: @PR22093( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 4 +; CHECK-NEXT: [[FR8:%.*\.fr.*]] = freeze i16 undef ; CHECK-NEXT: store volatile i16 42, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_LOAD:%.*]] = load i16, ptr [[A_SROA_0]], align 4 -; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 [[FR8]] to i32 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_3_0_INSERT_EXT]], 16 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i32 undef, 65535 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] @@ -1699,10 +1704,11 @@ ; CHECK-LABEL: @PR22093.2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 8 +; CHECK-NEXT: [[FR9:%.*\.fr.*]] = freeze i16 undef ; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store volatile i16 42, ptr [[A_SROA_0]], align 8 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_LOAD:%.*]] = load i16, ptr [[A_SROA_0]], align 8 -; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 [[FR9]] to i32 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_3_0_INSERT_EXT]], 16 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i32 undef, 65535 ; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] @@ -1883,6 +1889,7 @@ define void @test29(i32 %num, i32 %tid) { ; CHECK-LABEL: @test29( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR11:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[NUM:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP1]], label [[BB1:%.*]], label [[BB7:%.*]] ; CHECK: bb1: @@ -1896,7 +1903,7 @@ ; CHECK: bb3: ; CHECK-NEXT: br label [[BB5]] ; CHECK: bb4: -; CHECK-NEXT: store i32 undef, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: store i32 [[FR11]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: br label [[BB5]] ; CHECK: bb5: ; CHECK-NEXT: [[SUB]] = add i32 [[I_02]], -1 Index: llvm/test/Transforms/SROA/phi-and-select.ll =================================================================== --- llvm/test/Transforms/SROA/phi-and-select.ll +++ llvm/test/Transforms/SROA/phi-and-select.ll @@ -295,6 +295,7 @@ ; a PHI that can be speculated. ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: [[TEST:%.*]] = icmp ne i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[TEST]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: @@ -303,7 +304,7 @@ ; CHECK: else: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[PHI_SROA_SPECULATED:%.*]] = phi i32 [ undef, [[ELSE]] ], [ [[PHI_SROA_SPECULATE_LOAD_THEN]], [[THEN]] ] +; CHECK-NEXT: [[PHI_SROA_SPECULATED:%.*]] = phi i32 [ [[FR1]], [[ELSE]] ], [ [[PHI_SROA_SPECULATE_LOAD_THEN]], [[THEN]] ] ; CHECK-NEXT: ret i32 [[PHI_SROA_SPECULATED]] ; @@ -328,10 +329,11 @@ ; Same as @test8 but for a select rather than a PHI node. ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: [[TEST:%.*]] = icmp ne i32 [[B:%.*]], 0 ; CHECK-NEXT: [[LOADED_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[LOADED_SROA_SPECULATED:%.*]] = select i1 [[TEST]], i32 undef, i32 [[LOADED_SROA_SPECULATE_LOAD_FALSE]] +; CHECK-NEXT: [[LOADED_SROA_SPECULATED:%.*]] = select i1 [[TEST]], i32 [[FR2]], i32 [[LOADED_SROA_SPECULATE_LOAD_FALSE]] ; CHECK-NEXT: ret i32 [[LOADED_SROA_SPECULATED]] ; Index: llvm/test/Transforms/SROA/phi-gep.ll =================================================================== --- llvm/test/Transforms/SROA/phi-gep.ll +++ llvm/test/Transforms/SROA/phi-gep.ll @@ -442,6 +442,7 @@ define void @constant_value_phi(i1 %c1) { ; CHECK-LABEL: @constant_value_phi( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i16 undef ; CHECK-NEXT: br label [[LAND_LHS_TRUE_I:%.*]] ; CHECK: land.lhs.true.i: ; CHECK-NEXT: br i1 [[C1:%.*]], label [[COND_END_I:%.*]], label [[COND_END_I]] @@ -466,6 +467,9 @@ define i32 @test_sroa_phi_gep_multiple_values_from_same_block(i32 %arg) { ; CHECK-LABEL: @test_sroa_phi_gep_multiple_values_from_same_block( ; CHECK-NEXT: bb.1: +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze i32 undef +; CHECK-NEXT: [[FR3:%.*\.fr.*]] = freeze i32 undef +; CHECK-NEXT: [[FR4:%.*\.fr.*]] = freeze i32 undef ; CHECK-NEXT: switch i32 [[ARG:%.*]], label [[BB_3:%.*]] [ ; CHECK-NEXT: i32 1, label [[BB_2:%.*]] ; CHECK-NEXT: i32 2, label [[BB_2]] @@ -477,7 +481,7 @@ ; CHECK: bb.3: ; CHECK-NEXT: br label [[BB_4]] ; CHECK: bb.4: -; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ undef, [[BB_3]] ], [ undef, [[BB_2]] ], [ undef, [[BB_1:%.*]] ], [ undef, [[BB_1]] ] +; CHECK-NEXT: [[PHI_SROA_PHI_SROA_SPECULATED:%.*]] = phi i32 [ [[FR2]], [[BB_3]] ], [ [[FR3]], [[BB_2]] ], [ [[FR4]], [[BB_1:%.*]] ], [ [[FR4]], [[BB_1]] ] ; CHECK-NEXT: ret i32 [[PHI_SROA_PHI_SROA_SPECULATED]] ; bb.1: Index: llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll =================================================================== --- llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll +++ llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll @@ -9,6 +9,7 @@ define void @f2(i1 %c1) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i16 undef ; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: br label [[CLEANUP:%.*]] @@ -21,7 +22,7 @@ ; CHECK: if.else: ; CHECK-NEXT: br label [[LBL1]] ; CHECK: lbl1: -; CHECK-NEXT: [[G_0_SROA_SPECULATED:%.*]] = phi i16 [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ undef, [[IF_ELSE]] ] +; CHECK-NEXT: [[G_0_SROA_SPECULATED:%.*]] = phi i16 [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ [[FR1]], [[IF_ELSE]] ] ; CHECK-NEXT: unreachable ; CHECK: cleanup7: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/SROA/pr37267.ll =================================================================== --- llvm/test/Transforms/SROA/pr37267.ll +++ llvm/test/Transforms/SROA/pr37267.ll @@ -43,14 +43,16 @@ define i16 @f2() { ; CHECK-LABEL: @f2( ; CHECK-NEXT: bb1: -; CHECK-NEXT: [[A_3_SROA_2_2_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze i16 undef +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze i16 undef +; CHECK-NEXT: [[A_3_SROA_2_2_INSERT_EXT:%.*]] = zext i16 [[FR2]] to i32 ; CHECK-NEXT: [[A_3_SROA_2_2_INSERT_MASK:%.*]] = and i32 undef, -65536 ; CHECK-NEXT: [[A_3_SROA_2_2_INSERT_INSERT:%.*]] = or i32 [[A_3_SROA_2_2_INSERT_MASK]], [[A_3_SROA_2_2_INSERT_EXT]] -; CHECK-NEXT: [[A_3_SROA_0_2_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_3_SROA_0_2_INSERT_EXT:%.*]] = zext i16 [[FR1]] to i32 ; CHECK-NEXT: [[A_3_SROA_0_2_INSERT_SHIFT:%.*]] = shl i32 [[A_3_SROA_0_2_INSERT_EXT]], 16 ; CHECK-NEXT: [[A_3_SROA_0_2_INSERT_MASK:%.*]] = and i32 [[A_3_SROA_2_2_INSERT_INSERT]], 65535 ; CHECK-NEXT: [[A_3_SROA_0_2_INSERT_INSERT:%.*]] = or i32 [[A_3_SROA_0_2_INSERT_MASK]], [[A_3_SROA_0_2_INSERT_SHIFT]] -; CHECK-NEXT: [[RC:%.*]] = add i16 2, undef +; CHECK-NEXT: [[RC:%.*]] = add i16 2, [[FR1]] ; CHECK-NEXT: ret i16 [[RC]] ; Index: llvm/test/Transforms/SROA/same-promoted-undefs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/same-promoted-undefs.ll @@ -0,0 +1,164 @@ +; RUN: opt < %s -passes=sroa -S | FileCheck %s +; +; When sroa replaces loads with undefined values, the undefined values +; have an implicit value that must be preserved for comparison purposes +; +; Need to test 2 paths, one where alloca/loads/stores are in one basic block +; and when they are not in the same basic block + +%struct.array1 = type { [1 x i32] } +%struct.array2 = type { [1 x i32] } + +; Test all loads and references in same basic block +define void @SingleBlock() { +entry: + %a = alloca %struct.array1, align 4 + %b = load i32, ptr %a, align 4 + %c = load i32, ptr %a, align 4 + %0 = icmp sge i32 %b, %c + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @SingleBlock +; CHECK-NOT: br +; CHECK: [[FR1:%.*\.fr.*]] = freeze i32 undef +; CHECK: {{.*}} = icmp sge i32 [[FR1]], [[FR1]] + +; Test all loads and references in same basic block but used elsewhere +define void @SingleBlockLoadsUsedOutside() { +entry: + %a = alloca %struct.array1, align 4 + %b = load i32, ptr %a, align 4 + %c = load i32, ptr %a, align 4 + br label %lab1 + +lab1: + %0 = icmp slt i32 %b, %c + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @SingleBlockLoadsUsedOutside +; CHECK-NOT: br +; CHECK: [[FR2:%.*\.fr.*]] = freeze i32 undef +; CHECK-LABEL: lab1:{{.*}} +; CHECK-NOT: br +; CHECK: {{.*}} = icmp slt i32 [[FR2]], [[FR2]] + +; Test all loads and references in same basic block with different allocas +define void @SingleBlock2Allocas() { +entry: + %a1 = alloca %struct.array1, align 4 + %a2 = alloca %struct.array1, align 4 + %b = load i32, ptr %a1, align 4 + %c = load i32, ptr %a2, align 4 + %0 = icmp sge i32 %b, %c + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @SingleBlock2Allocas +; CHECK-NOT: br +; CHECK: [[FR3:%.*\.fr.*]] = freeze i32 undef +; CHECK: [[FR4:%.*\.fr.*]] = freeze i32 undef +; CHECK: {{.*}} = icmp sge i32 [[FR3]], [[FR4]] + +; Test all loads and references in same basic block but used elsewhere +; with 2 allocas +define void @SingleBlockLoadsUsedOutside2Allocas() { +entry: + %a1 = alloca %struct.array1, align 4 + %a2 = alloca %struct.array2, align 4 + %b = load i32, ptr %a1, align 4 + %c = load i32, ptr %a2, align 4 + br label %lab1 + +lab1: + %0 = icmp slt i32 %b, %c + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @SingleBlockLoadsUsedOutside2Allocas +; CHECK-NOT: br +; CHECK: [[FR5:%.*\.fr.*]] = freeze i32 undef +; CHECK: [[FR6:%.*\.fr.*]] = freeze i32 undef +; CHECK-LABEL: lab1:{{.*}} +; CHECK-NOT: br +; CHECK: {{.*}} = icmp slt i32 [[FR5]], [[FR6]] + +; Test multiblock scenario +define void @MultiBlock() { +entry: + %a = alloca %struct.array1, align 4 + br label %lab1 + +lab1: + %b = load i32, ptr %a, align 4 + br label %lab2 + +lab2: + %c = load i32, ptr %a, align 4 + br label %lab3 + +lab3: + %0 = icmp sle i32 %b, %c + br label %lab4 + +lab4: + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @MultiBlock +; CHECK-NOT: br +; CHECK: [[FR7:%.*\.fr.*]] = freeze i32 undef +; CHECK-LABEL: lab3:{{.*}} +; CHECK-NOT: br +; CHECK: {{.*}} = icmp sle i32 [[FR7]], [[FR7]] + +; Test loads and references in different basic blocks with different allocas +define void @MultiBlock2Allocas() { +entry: + %a1 = alloca %struct.array1, align 4 + %a2 = alloca %struct.array2, align 4 + br label %lab1 + +lab1: + %b = load i32, ptr %a1, align 4 + br label %lab2 + +lab2: + %c = load i32, ptr %a2, align 4 + br label %lab3 + +lab3: + %0 = icmp sle i32 %b, %c + br label %lab4 + +lab4: + br i1 %0, label %lab, label %lab + +lab: + ret void +} + +; CHECK-LABEL: define void @MultiBlock2Allocas +; CHECK-NOT: br +; CHECK: [[FR8:%.*\.fr.*]] = freeze i32 undef +; CHECK: [[FR9:%.*\.fr.*]] = freeze i32 undef +; CHECK-LABEL: lab3:{{.*}} +; CHECK-NOT: br +; CHECK: {{.*}} = icmp sle i32 [[FR8]], [[FR9]] + Index: llvm/test/Transforms/SROA/scalable-vectors.ll =================================================================== --- llvm/test/Transforms/SROA/scalable-vectors.ll +++ llvm/test/Transforms/SROA/scalable-vectors.ll @@ -66,8 +66,9 @@ ; GEP where the element type is a scalable vector. define @cast_alloca_from_svint32_t() { ; CHECK-LABEL: @cast_alloca_from_svint32_t( +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze <16 x i32> undef ; CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 16 -; CHECK-NEXT: store <16 x i32> undef, ptr [[RETVAL_COERCE]], align 16 +; CHECK-NEXT: store <16 x i32> [[FR1]], ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 16 ; CHECK-NEXT: ret [[TMP2]] ; Index: llvm/test/Transforms/SROA/select-load.ll =================================================================== --- llvm/test/Transforms/SROA/select-load.ll +++ llvm/test/Transforms/SROA/select-load.ll @@ -38,9 +38,11 @@ define void @test_multiple_loads_select(i1 %cmp){ ; CHECK-LABEL: @test_multiple_loads_select( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], ptr undef, ptr undef +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze ptr undef +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze ptr undef +; CHECK-NEXT: [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], ptr [[FR2]], ptr [[FR1]] ; CHECK-NEXT: call void @foo_i8(ptr [[ADDR_I8_SROA_SPECULATED]]) -; CHECK-NEXT: [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], ptr undef, ptr undef +; CHECK-NEXT: [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], ptr [[FR2]], ptr [[FR1]] ; CHECK-NEXT: call void @foo_i32(ptr [[ADDR_I32_SROA_SPECULATED]]) ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/SROA/slice-width.ll =================================================================== --- llvm/test/Transforms/SROA/slice-width.ll +++ llvm/test/Transforms/SROA/slice-width.ll @@ -94,9 +94,10 @@ define i32 @memcpy_vec3float_widening(ptr %x) { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze <4 x float> undef ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> [[FR1]] ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> ; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4 Index: llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll =================================================================== --- llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll +++ llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll @@ -237,10 +237,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze float undef +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze float undef ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) -; CHECK-NEXT: [[TMP0:%.*]] = bitcast float undef to i32 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float undef to i32 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[FR1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[FR2]] to i32 ; CHECK-NEXT: [[DATA:%.*]] = load [4 x float], ptr undef, align 4 ; CHECK-NEXT: [[DATA_FCA_0_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 0 ; CHECK-NEXT: store float [[DATA_FCA_0_EXTRACT]], ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 Index: llvm/test/Transforms/SROA/vector-conversion.ll =================================================================== --- llvm/test/Transforms/SROA/vector-conversion.ll +++ llvm/test/Transforms/SROA/vector-conversion.ll @@ -4,10 +4,11 @@ define <4 x i64> @vector_ptrtoint({<2 x ptr>, <2 x ptr>} %x) { ; CHECK-LABEL: @vector_ptrtoint( +; CHECK-NEXT: [[FR1:%.*\.fr]] = freeze <4 x i64> undef ; CHECK-NEXT: [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[X:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[X_FCA_0_EXTRACT]] to <2 x i64> ; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> -; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i64> [[A_SROA_0_0_VEC_EXPAND]], <4 x i64> undef +; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i64> [[A_SROA_0_0_VEC_EXPAND]], <4 x i64> [[FR1]] ; CHECK-NEXT: [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[X]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> [[X_FCA_1_EXTRACT]] to <2 x i64> ; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> @@ -25,10 +26,11 @@ define <4 x ptr> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) { ; CHECK-LABEL: @vector_inttoptr( +; CHECK-NEXT: [[FR2:%.*\.fr]] = freeze <4 x ptr> undef ; CHECK-NEXT: [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <2 x i64> [[X_FCA_0_EXTRACT]] to <2 x ptr> ; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x ptr> [[A_SROA_0_0_VEC_EXPAND]], <4 x ptr> undef +; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x ptr> [[A_SROA_0_0_VEC_EXPAND]], <4 x ptr> [[FR2]] ; CHECK-NEXT: [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <2 x i64> [[X_FCA_1_EXTRACT]] to <2 x ptr> ; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> @@ -46,10 +48,11 @@ define <2 x i64> @vector_ptrtointbitcast({<1 x ptr>, <1 x ptr>} %x) { ; CHECK-LABEL: @vector_ptrtointbitcast( +; CHECK-NEXT: [[FR3:%.*\.fr]] = freeze <2 x i64> undef ; CHECK-NEXT: [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x ptr>, <1 x ptr> } [[X:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <1 x ptr> [[X_FCA_0_EXTRACT]] to <1 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64 -; CHECK-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 +; CHECK-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> [[FR3]], i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x ptr>, <1 x ptr> } [[X]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint <1 x ptr> [[X_FCA_1_EXTRACT]] to <1 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to i64 Index: llvm/test/Transforms/SROA/vector-promotion.ll =================================================================== --- llvm/test/Transforms/SROA/vector-promotion.ll +++ llvm/test/Transforms/SROA/vector-promotion.ll @@ -241,7 +241,8 @@ define <4 x i32> @test_subvec_store() { ; CHECK-LABEL: @test_subvec_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> undef +; CHECK-NEXT: [[FR1:%.*\.fr.*]] = freeze <4 x i32> undef +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[FR1]] ; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_0_VECBLEND]] ; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_4_VECBLEND]] ; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[A_8_VECBLEND]], i32 3, i32 3 @@ -298,7 +299,8 @@ define <4 x float> @test_subvec_memset() { ; CHECK-LABEL: @test_subvec_memset( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> undef +; CHECK-NEXT: [[FR2:%.*\.fr.*]] = freeze <4 x float> undef +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[FR2]] ; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_0_VECBLEND]] ; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_4_VECBLEND]] ; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float 0x38E0E0E0E0000000, i32 3 @@ -326,9 +328,10 @@ define <4 x float> @test_subvec_memcpy(ptr %x, ptr %y, ptr %z, ptr %f, ptr %out) { ; CHECK-LABEL: @test_subvec_memcpy( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR3:%.*\.fr.*]] = freeze <4 x float> undef ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> [[FR3]] ; CHECK-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, ptr [[Y:%.*]], align 1 ; CHECK-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]] @@ -365,8 +368,9 @@ define i32 @PR14212(<3 x i8> %val) { ; CHECK-LABEL: @PR14212( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR4:%.*\.fr.*]] = freeze i8 undef ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> [[VAL:%.*]] to i24 -; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32 +; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i8 [[FR4]] to i32 ; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[RETVAL_SROA_2_0_INSERT_EXT]], 24 ; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 16777215 ; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[RETVAL_SROA_2_0_INSERT_MASK]], [[RETVAL_SROA_2_0_INSERT_SHIFT]] @@ -410,8 +414,9 @@ define i32 @PR14349.2(<2 x i8> %x) { ; CHECK-LABEL: @PR14349.2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[FR5:%.*\.fr.*]] = freeze i16 undef ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[X:%.*]] to i16 -; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i16 [[FR5]] to i32 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 16 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 65535 ; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] @@ -491,7 +496,8 @@ ; on a single load with a vector type. ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NEXT: [[FR6:%.*\.fr.*]] = freeze <2 x i32> undef +; CHECK-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i32> [[FR6]], i32 [[X:%.*]], i32 0 ; CHECK-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[A_SROA_0_0_VEC_INSERT]], i32 [[Y:%.*]], i32 1 ; CHECK-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]] ; Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_asm.ll.expected @@ -5,7 +5,8 @@ ; CHECK-LABEL: i64_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v0, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %loc = alloca i64 %j = load i64, i64 * %loc @@ -17,7 +18,7 @@ ; CHECK-LABEL: i32_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %loc = alloca i32 @@ -31,7 +32,8 @@ ; CHECK-LABEL: i16_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %loc = alloca i16 @@ -45,7 +47,8 @@ ; CHECK-LABEL: i8_test: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %loc = alloca i8 Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_isel.ll.expected @@ -3,12 +3,18 @@ define i64 @i64_test(i64 %i) nounwind readnone { ; CHECK-LABEL: i64_test: -; CHECK: SelectionDAG has 9 nodes: +; CHECK: SelectionDAG has 19 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $vgpr0, IMPLICIT_DEF:i32 -; CHECK-NEXT: t17: i32 = V_MOV_B32_e32 TargetConstant:i32<0> -; CHECK-NEXT: t13: ch,glue = CopyToReg t11, Register:i32 $vgpr1, t17, t11:1 -; CHECK-NEXT: t14: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t13, t13:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %1 +; CHECK-NEXT: t31: i64 = REG_SEQUENCE # D:1 TargetConstant:i32<55>, t2, TargetConstant:i32<3>, t4, TargetConstant:i32<11> +; CHECK-NEXT: t7: i64 = COPY IMPLICIT_DEF:i64 +; CHECK-NEXT: t8: i64 = V_ADD_U64_PSEUDO # D:1 t31, t7 +; CHECK-NEXT: t21: i32 = EXTRACT_SUBREG # D:1 t8, TargetConstant:i32<3> +; CHECK-NEXT: t14: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t21 +; CHECK-NEXT: t25: i32 = EXTRACT_SUBREG # D:1 t8, TargetConstant:i32<11> +; CHECK-NEXT: t16: ch,glue = CopyToReg # D:1 t14, Register:i32 $vgpr1, t25, t14:1 +; CHECK-NEXT: t17: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t16, t16:1 ; CHECK-EMPTY: %loc = alloca i64 %j = load i64, i64 * %loc @@ -18,12 +24,15 @@ define i64 @i32_test(i32 %i) nounwind readnone { ; CHECK-LABEL: i32_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 14 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t4: i32 = COPY IMPLICIT_DEF:i32 +; CHECK-NEXT: t5: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t4, TargetConstant:i1<0> +; CHECK-NEXT: t12: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t5 +; CHECK-NEXT: t20: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t14: ch,glue = CopyToReg # D:1 t12, Register:i32 $vgpr1, t20, t12:1 +; CHECK-NEXT: t15: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t14, t14:1 ; CHECK-EMPTY: %loc = alloca i32 %j = load i32, i32 * %loc @@ -34,12 +43,17 @@ define i64 @i16_test(i16 %i) nounwind readnone { ; CHECK-LABEL: i16_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 17 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t18: i32 = COPY IMPLICIT_DEF:i32 +; CHECK-NEXT: t19: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t18, TargetConstant:i1<0> +; CHECK-NEXT: t28: i32 = S_MOV_B32 TargetConstant:i32<65535> +; CHECK-NEXT: t29: i32 = V_AND_B32_e64 # D:1 t19, t28 +; CHECK-NEXT: t13: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t29 +; CHECK-NEXT: t38: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t15: ch,glue = CopyToReg # D:1 t13, Register:i32 $vgpr1, t38, t13:1 +; CHECK-NEXT: t16: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t15, t15:1 ; CHECK-EMPTY: %loc = alloca i16 %j = load i16, i16 * %loc @@ -50,12 +64,17 @@ define i64 @i8_test(i8 %i) nounwind readnone { ; CHECK-LABEL: i8_test: -; CHECK: SelectionDAG has 8 nodes: -; CHECK-NEXT: t5: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK: SelectionDAG has 17 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t7: ch,glue = CopyToReg t0, Register:i32 $vgpr0, t5 -; CHECK-NEXT: t9: ch,glue = CopyToReg t7, Register:i32 $vgpr1, t5, t7:1 -; CHECK-NEXT: t10: ch = SI_RETURN Register:i32 $vgpr0, Register:i32 $vgpr1, t9, t9:1 +; CHECK-NEXT: t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %0 +; CHECK-NEXT: t18: i32 = COPY IMPLICIT_DEF:i32 +; CHECK-NEXT: t19: i32,i1 = V_ADD_CO_U32_e64 # D:1 t2, t18, TargetConstant:i1<0> +; CHECK-NEXT: t28: i32 = S_MOV_B32 TargetConstant:i32<255> +; CHECK-NEXT: t29: i32 = V_AND_B32_e64 # D:1 t19, t28 +; CHECK-NEXT: t13: ch,glue = CopyToReg # D:1 t0, Register:i32 $vgpr0, t29 +; CHECK-NEXT: t38: i32 = V_MOV_B32_e32 TargetConstant:i32<0> +; CHECK-NEXT: t15: ch,glue = CopyToReg # D:1 t13, Register:i32 $vgpr1, t38, t13:1 +; CHECK-NEXT: t16: ch = SI_RETURN # D:1 Register:i32 $vgpr0, Register:i32 $vgpr1, t15, t15:1 ; CHECK-EMPTY: %loc = alloca i8 %j = load i8, i8 * %loc