diff --git a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll --- a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll +++ b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll @@ -2,13 +2,13 @@ ; This testcase produces a situation with unused value numbers in subregister ; liveranges that get distributed by ConnectedVNInfoEqClasses. -define amdgpu_kernel void @hoge() { +define amdgpu_kernel void @hoge(i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - br i1 undef, label %bb2, label %bb23 + br i1 %c0, label %bb2, label %bb23 bb2: - br i1 undef, label %bb6, label %bb8 + br i1 %c1, label %bb6, label %bb8 bb6: %tmp7 = or i64 undef, undef @@ -20,7 +20,7 @@ br i1 %tmp10, label %bb11, label %bb23 bb11: - br i1 undef, label %bb20, label %bb17 + br i1 %c2, label %bb20, label %bb17 bb17: br label %bb20 @@ -36,10 +36,10 @@ bb25: %tmp26 = phi i32 [ %tmp24, %bb23 ], [ undef, %bb25 ] - br i1 undef, label %bb25, label %bb30 + br i1 %c3, label %bb25, label %bb30 bb30: - br i1 undef, label %bb32, label %bb34 + br i1 %c4, label %bb32, label %bb34 bb32: %tmp33 = zext i32 %tmp26 to i64 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll @@ -20,8 +20,8 @@ ; GCN: v_bfe_i32 ; GCN: v_bfe_i32 -define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i16>, ptr addrspace(1) %p0 @@ -41,8 +41,8 @@ ; GCN-LABEL: extract_2xi64 ; GCN-COUNT-2: v_cndmask_b32 -define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i64>, ptr addrspace(1) %p0 @@ -62,8 +62,8 @@ ; GCN-LABEL: extract_4xi64 ; GCN-COUNT-4: v_cndmask_b32 -define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i64>, ptr addrspace(1) %p0 @@ -83,8 +83,8 @@ ; GCN-LABEL: extract_8xi64 ; GCN-COUNT-8: v_cndmask_b32 -define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <16 x i64>, ptr addrspace(1) %p0 @@ -104,8 +104,8 @@ ; GCN-LABEL: extract_2xf64 ; GCN-COUNT-2: v_cndmask_b32 -define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x double>, ptr addrspace(1) %p0 @@ -125,8 +125,8 @@ ; GCN-LABEL: extract_4xf64 ; GCN-COUNT-4: v_cndmask_b32 -define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x double>, ptr addrspace(1) %p0 @@ -146,8 +146,8 @@ ; GCN-LABEL: extract_8xf64 ; GCN-COUNT-8: v_cndmask_b32 -define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <16 x double>, ptr addrspace(1) %p0 diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll --- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -42,9 +42,9 @@ ; SI-LABEL: {{^}}vcopy_i1_undef ; SI: v_cndmask_b32_e64 ; SI: v_cndmask_b32_e64 -define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p) { +define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %x = load <2 x float>, ptr addrspace(1) %p diff --git a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll --- a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll @@ -7,9 +7,9 @@ ; CHECK-NOT: COPY [[IMPDEF0]] ; CHECK-NOT: COPY [[IMPDEF1]] ; CHECK: .false: -define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p) { +define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %x = load <2 x float>, ptr addrspace(1) %p diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -299,9 +299,9 @@ ; Check aggregate types are handled properly. ; CHECK-LABEL: mad_u64 ; CHECK: v_mad_u64_u32 -define void @mad_u64(i32 %x) { +define void @mad_u64(i32 %x, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0) diff --git a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll @@ -4,10 +4,10 @@ @gv = external unnamed_addr addrspace(4) constant [239 x i32], align 4 -define amdgpu_kernel void @opencv_cvtfloat_crash(ptr addrspace(1) %out, i32 %x) nounwind { +define amdgpu_kernel void @opencv_cvtfloat_crash(ptr addrspace(1) %out, i32 %x, i1 %c0) nounwind { %val = load i32, ptr addrspace(4) getelementptr ([239 x i32], ptr addrspace(4) @gv, i64 0, i64 239), align 4 %mul12 = mul nsw i32 %val, 7 - br i1 undef, label %exit, label %bb + br i1 %c0, label %exit, label %bb bb: %cmp = icmp slt i32 %x, 0 diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -4,7 +4,7 @@ @_RSENC_gDcd_______________________________ = external protected addrspace(1) externally_initialized global [4096 x i8], align 16 -define protected amdgpu_kernel void @_RSENC_PRInit__________________________________() local_unnamed_addr #0 { +define protected amdgpu_kernel void @_RSENC_PRInit__________________________________(i1 %c0) local_unnamed_addr #0 { entry: %runtimeVersionCopy = alloca [128 x i8], align 16, addrspace(5) %licenseVersionCopy = alloca [128 x i8], align 16, addrspace(5) @@ -18,7 +18,7 @@ br i1 %cmp13, label %cleanup.cont, label %if.end15 if.end15: ; preds = %if.end - br i1 undef, label %cleanup.cont, label %lor.lhs.false17 + br i1 %c0, label %cleanup.cont, label %lor.lhs.false17 lor.lhs.false17: ; preds = %if.end15 br label %while.cond.i diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll --- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll @@ -78,12 +78,12 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call_multi_bb: ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg, i1 %c0) #0 { bb1: %in.1 = load <32 x float>, ptr addrspace(1) %arg %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) store <32 x float> %mai.1, ptr addrspace(1) %arg - br i1 undef, label %bb2, label %bb3 + br i1 %c0, label %bb2, label %bb3 br label %bb2 bb2: diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll @@ -13,10 +13,10 @@ ; CHECK: endif: ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ %arrayidx1, %else ] ; CHECK: store i32 0, ptr addrspace(3) %phi.ptr, align 4 -define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %else + br i1 %c0, label %if, label %else if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -34,10 +34,10 @@ ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0( ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ null, %entry ] -define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %endif + br i1 %c0, label %if, label %endif if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -51,10 +51,10 @@ ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1( ; CHECK: %phi.ptr = phi ptr addrspace(3) [ null, %entry ], [ %arrayidx0, %if ] -define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %endif + br i1 %c0, label %if, label %endif if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -97,10 +97,10 @@ ; CHECK: endif: ; CHECK: %phi.ptr = phi ptr addrspace(5) [ %arrayidx0, %if ], [ %arrayidx1, %else ] ; CHECK: store i32 0, ptr addrspace(5) %phi.ptr, align 4 -define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %else + br i1 %c0, label %if, label %else if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -77,13 +77,13 @@ ret void } -define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 { +define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c, i1 %c0) #0 { entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b store i32 0, ptr addrspace(5) %ptr0 - br i1 undef, label %bb1, label %bb2 + br i1 %c0, label %bb1, label %bb2 bb1: %ptr2 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %c diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll --- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll @@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 -define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3) #1 { +define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 { bb: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %cmp0 = icmp eq i32 %id.x, 0 @@ -18,7 +18,7 @@ bb4: ; preds = %bb6, %bb %tmp5 = phi <2 x i32> [ zeroinitializer, %bb ], [ %tmp13, %bb6 ] - br i1 undef, label %bb15, label %bb16 + br i1 %c0, label %bb15, label %bb16 bb6: ; preds = %bb6, %bb3 %tmp7 = phi <2 x i32> [ zeroinitializer, %bb3 ], [ %tmp13, %bb6 ] diff --git a/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll b/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll --- a/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll @@ -3,23 +3,23 @@ ; definition on every path (there should at least be IMPLICIT_DEF instructions). target triple = "amdgcn--" -define amdgpu_kernel void @func() { +define amdgpu_kernel void @func(i1 %c0, i1 %c1, i1 %c2) { B0: - br i1 undef, label %B1, label %B2 + br i1 %c0, label %B1, label %B2 B1: br label %B2 B2: %v0 = phi <4 x float> [ zeroinitializer, %B1 ], [ , %B0 ] - br i1 undef, label %B20.1, label %B20.2 + br i1 %c1, label %B20.1, label %B20.2 B20.1: br label %B20.2 B20.2: %v2 = phi <4 x float> [ zeroinitializer, %B20.1 ], [ %v0, %B2 ] - br i1 undef, label %B30.1, label %B30.2 + br i1 %c2, label %B30.1, label %B30.2 B30.1: %sub = fsub <4 x float> %v2, undef diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll @@ -11,7 +11,7 @@ ; GCN: s_and_saveexec_b64 ; GCN-NOT: s_endpgm ; GCN: .Lfunc_end0 -define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg) #0 { +define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg, i1 %c0) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br label %bb1 @@ -20,7 +20,7 @@ %tmp2 = sext i32 %tmp to i64 %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2 %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16 - br i1 undef, label %bb3, label %bb5 ; label order reversed + br i1 %c0, label %bb3, label %bb5 ; label order reversed bb3: ; preds = %bb1 %tmp6 = extractelement <4 x float> %tmp4, i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll --- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -19,7 +19,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0 ; CHECK: s_endpgm -define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -31,7 +31,7 @@ br label %bb12 bb11: ; preds = %bb30 - br i1 undef, label %bb37, label %bb38 + br i1 %c0, label %bb37, label %bb38 bb12: ; preds = %bb30, %bb br i1 false, label %.preheader, label %.loopexit145 @@ -43,7 +43,7 @@ %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] %tmp15 = add nsw i32 %tmp14, -3 %tmp16 = mul i32 %tmp14, 21 - br i1 undef, label %bb17, label %.loopexit + br i1 %c1, label %bb17, label %.loopexit bb17: ; preds = %bb13 %tmp18 = mul i32 %tmp15, 224 @@ -52,7 +52,7 @@ .loopexit: ; preds = %bb21, %bb13 %tmp20 = add nuw nsw i32 %tmp14, 16 - br i1 undef, label %bb13, label %bb26 + br i1 %c2, label %bb13, label %bb26 bb21: ; preds = %bb21, %bb17 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] @@ -60,7 +60,7 @@ %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23 store float undef, ptr addrspace(3) %tmp24, align 4 %tmp25 = add nuw i32 %tmp22, 8 - br i1 undef, label %bb21, label %.loopexit + br i1 %c3, label %bb21, label %.loopexit bb26: ; preds = %.loopexit br label %bb31 @@ -72,7 +72,7 @@ br i1 %tmp29, label %.preheader, label %.loopexit145 bb30: ; preds = %bb31 - br i1 undef, label %bb11, label %bb12 + br i1 %c4, label %bb11, label %bb12 bb31: ; preds = %bb31, %bb26 %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] @@ -80,7 +80,7 @@ %tmp34 = load float, ptr addrspace(3) %tmp33, align 4 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) - br i1 undef, label %bb30, label %bb31 + br i1 %c5, label %bb30, label %bb31 bb37: ; preds = %bb11 br label %bb38 diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll --- a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s -define void @test() #1 { +define void @test(i1 %c0) #1 { ; Clean up the unreachable blocks introduced with LowerSwitch pass. ; This test ensures that, in the pass flow, UnreachableBlockElim pass ; follows the LowerSwitch. Otherwise, this testcase will crash @@ -22,7 +22,7 @@ ; GCN: bb.{{[0-9]+}}.UnifiedReturnBlock: entry: %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - br i1 undef, label %entry.true.blk, label %entry.false.blk + br i1 %c0, label %entry.true.blk, label %entry.false.blk entry.true.blk: ; preds = %entry %exit.cmp = icmp ult i32 %idx, 3 diff --git a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll @@ -25,7 +25,7 @@ ; GCN: {{^}}[[LOOP:.L[A-Z0-9_]+]]: ; GCN: s_cbranch_scc1 [[LOOP]] ; GCN: {{^}}[[BB0]]: -define amdgpu_kernel void @fix_sgpr_live_ranges_crash(i32 %arg, i32 %arg1) { +define amdgpu_kernel void @fix_sgpr_live_ranges_crash(i32 %arg, i32 %arg1, i1 %c0) { bb: %cnd = trunc i32 %arg to i1 br i1 %cnd, label %bb2, label %bb5 @@ -45,7 +45,7 @@ br i1 %tmp10, label %bb11, label %bb12 bb11: ; preds = %bb11, %bb5 - br i1 undef, label %bb11, label %bb12 + br i1 %c0, label %bb11, label %bb12 bb12: ; preds = %bb11, %bb5 ret void diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -6,11 +6,11 @@ ; GCN-NOT: v_accvgpr ; GCN-COUNT-8: global_store_dwordx4 ; GCN-NOT: v_accvgpr -define amdgpu_kernel void @test_v1024() { +define amdgpu_kernel void @test_v1024(i1 %c0) { entry: %alloca = alloca <32 x i32>, align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 128, i1 false) - br i1 undef, label %if.then.i.i, label %if.else.i + br i1 %c0, label %if.then.i.i, label %if.else.i if.then.i.i: ; preds = %entry call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr addrspace(5) align 4 undef, i64 128, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -2290,7 +2290,7 @@ ret void } -define amdgpu_kernel void @test_branch_true() #2 { +define amdgpu_kernel void @test_branch_true(i1 %c0) #2 { ; GFX1032-LABEL: test_branch_true: ; GFX1032: ; %bb.0: ; %entry ; GFX1032-NEXT: s_mov_b32 vcc_lo, exec_lo @@ -2327,7 +2327,7 @@ br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.end, label %for.body + br i1 %c0, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void