diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll @@ -71,8 +71,8 @@ ret void } -define void @br_undef() { -; WAVE64-LABEL: br_undef: +define void @br_undef(i1 %c0) { +; WAVE64-LABEL: br_[[%c0:%.*]]: ; WAVE64: ; %bb.0: ; %.exit ; WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE64-NEXT: .LBB2_1: ; %bb0 @@ -84,7 +84,7 @@ ; WAVE64-NEXT: ; %bb.2: ; %.exit5 ; WAVE64-NEXT: s_setpc_b64 s[30:31] ; -; WAVE32-LABEL: br_undef: +; WAVE32-LABEL: br_[[%c1:%.*]]: ; WAVE32: ; %bb.0: ; %.exit ; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 @@ -100,7 +100,7 @@ br label %bb0 bb0: - br i1 undef, label %.exit5, label %bb0 + br i1 %c0, label %.exit5, label %bb0 .exit5: ret void diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -511,7 +511,7 @@ ret void } -define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5) #3 { +define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg2, i64 %arg3, <2 x half> %arg4, <2 x half> %arg5, i1 %c0) #3 { ; GFX908-LABEL: introduced_copy_to_sgpr: ; GFX908: ; %bb.0: ; %bb ; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc @@ -840,7 +840,7 @@ bb9: ; preds = %bb12, %bb %i10 = phi i64 [ %arg3, %bb ], [ %i13, %bb12 ] - br i1 undef, label %bb14, label %bb12 + br i1 %c0, label %bb14, label %bb12 bb12: ; preds = %bb58, %bb9 %i13 = add nuw nsw i64 %i10, %i8 diff --git a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll --- a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll @@ -6,11 +6,11 @@ ; Test for compilation only. This generated an invalid machine instruction ; by trying to commute the operands of a V_CMP_EQ_i32_e32 instruction, both ; of which were in SGPRs. -define amdgpu_vs float @main(i32 %v) { +define amdgpu_vs float @main(i32 %v, i1 %c0) { main_body: %d1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 960, i32 0) %d2 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 976, i32 0) - br i1 undef, label %ENDIF56, label %IF57 + br i1 %c0, label %ENDIF56, label %IF57 IF57: ; preds = %ENDIF %v.1 = mul i32 %v, 2 diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -225,7 +225,7 @@ br i1 false, label %for.body, label %for.exit } -define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounwind { +define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n, i1 %c0) nounwind { ; GCN-LABEL: loop_const_undef: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s0, s[0:1], 0x9 @@ -286,7 +286,7 @@ %add = fadd float %vecload, 1.0 store float %add, ptr addrspace(3) %arrayidx, align 8 %inc = add i32 %indvar, 1 - br i1 undef, label %for.body, label %for.exit + br i1 %c0, label %for.body, label %for.exit } define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -35,10 +35,10 @@ ; GCN: buffer_store_dword ; GCN: s_endpgm -define amdgpu_kernel void @sink_ubfe_i32(ptr addrspace(1) %out, i32 %arg1) #0 { +define amdgpu_kernel void @sink_ubfe_i32(ptr addrspace(1) %out, i32 %arg1, i1 %c0) #0 { entry: %shr = lshr i32 %arg1, 8 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i32 %shr, 255 @@ -75,10 +75,10 @@ ; OPT: ret ; GCN-LABEL: {{^}}sink_sbfe_i32: -define amdgpu_kernel void @sink_sbfe_i32(ptr addrspace(1) %out, i32 %arg1) #0 { +define amdgpu_kernel void @sink_sbfe_i32(ptr addrspace(1) %out, i32 %arg1, i1 %c0) #0 { entry: %shr = ashr i32 %arg1, 8 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i32 %shr, 255 @@ -132,10 +132,10 @@ ; GCN: buffer_store_short ; GCN: s_endpgm -define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1) #0 { +define amdgpu_kernel void @sink_ubfe_i16(ptr addrspace(1) %out, i16 %arg1, i1 %c0) #0 { entry: %shr = lshr i16 %arg1, 4 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i16 %shr, 255 @@ -183,10 +183,10 @@ ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]] ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(ptr addrspace(1) %out, i64 %arg1) #0 { +define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(ptr addrspace(1) %out, i64 %arg1, i1 %c0) #0 { entry: %shr = lshr i64 %arg1, 30 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i64 %shr, 255 @@ -231,10 +231,10 @@ ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @sink_ubfe_i64_low32(ptr addrspace(1) %out, i64 %arg1) #0 { +define amdgpu_kernel void @sink_ubfe_i64_low32(ptr addrspace(1) %out, i64 %arg1, i1 %c0) #0 { entry: %shr = lshr i64 %arg1, 15 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i64 %shr, 255 @@ -277,10 +277,10 @@ ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @sink_ubfe_i64_high32(ptr addrspace(1) %out, i64 %arg1) #0 { +define amdgpu_kernel void @sink_ubfe_i64_high32(ptr addrspace(1) %out, i64 %arg1, i1 %c0) #0 { entry: %shr = lshr i64 %arg1, 35 - br i1 undef, label %bb0, label %bb1 + br i1 %c0, label %bb0, label %bb1 bb0: %val0 = and i64 %shr, 255 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll --- a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll +++ b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll @@ -2,13 +2,13 @@ ; This testcase produces a situation with unused value numbers in subregister ; liveranges that get distributed by ConnectedVNInfoEqClasses. -define amdgpu_kernel void @hoge() { +define amdgpu_kernel void @hoge(i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - br i1 undef, label %bb2, label %bb23 + br i1 %c0, label %bb2, label %bb23 bb2: - br i1 undef, label %bb6, label %bb8 + br i1 %c1, label %bb6, label %bb8 bb6: %tmp7 = or i64 undef, undef @@ -20,7 +20,7 @@ br i1 %tmp10, label %bb11, label %bb23 bb11: - br i1 undef, label %bb20, label %bb17 + br i1 %c2, label %bb20, label %bb17 bb17: br label %bb20 @@ -36,10 +36,10 @@ bb25: %tmp26 = phi i32 [ %tmp24, %bb23 ], [ undef, %bb25 ] - br i1 undef, label %bb25, label %bb30 + br i1 %c3, label %bb25, label %bb30 bb30: - br i1 undef, label %bb32, label %bb34 + br i1 %c4, label %bb32, label %bb34 bb32: %tmp33 = zext i32 %tmp26 to i64 diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll --- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll +++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll @@ -4,7 +4,7 @@ ; Test that unused lanes in the s_xor result are masked out with v_cndmask. -define i32 @combine_add_zext_xor() { +define i32 @combine_add_zext_xor(i1 %c0) { ; GFX1010-LABEL: combine_add_zext_xor: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -65,7 +65,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) @@ -86,7 +86,7 @@ ; Test that unused lanes in the s_xor result are masked out with v_cndmask. -define i32 @combine_sub_zext_xor() { +define i32 @combine_sub_zext_xor(i1 %c0) { ; GFX1010-LABEL: combine_sub_zext_xor: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -147,7 +147,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) @@ -168,7 +168,7 @@ ; Test that unused lanes in the s_or result are masked out with v_cndmask. -define i32 @combine_add_zext_or() { +define i32 @combine_add_zext_or(i1 %c0) { ; GFX1010-LABEL: combine_add_zext_or: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -227,7 +227,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) @@ -249,7 +249,7 @@ ; Test that unused lanes in the s_or result are masked out with v_cndmask. -define i32 @combine_sub_zext_or() { +define i32 @combine_sub_zext_or(i1 %c0) { ; GFX1010-LABEL: combine_sub_zext_or: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -308,7 +308,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) @@ -330,7 +330,7 @@ ; Test that unused lanes in the s_and result are masked out with v_cndmask. -define i32 @combine_add_zext_and() { +define i32 @combine_add_zext_and(i1 %c0) { ; GFX1010-LABEL: combine_add_zext_and: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -389,7 +389,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) @@ -411,7 +411,7 @@ ; Test that unused lanes in the s_and result are masked out with v_cndmask. -define i32 @combine_sub_zext_and() { +define i32 @combine_sub_zext_and(i1 %c0) { ; GFX1010-LABEL: combine_sub_zext_and: ; GFX1010: ; %bb.0: ; %.entry ; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -470,7 +470,7 @@ .a: ; preds = %bb9, %.entry %.2 = phi i32 [ 0, %.entry ], [ %i11, %bb9 ] - br i1 undef, label %bb9, label %bb + br i1 %c0, label %bb9, label %bb bb: ; preds = %.a %.i3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 %.2, i32 64, i32 1) diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll --- a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll @@ -10,9 +10,9 @@ ; ; GCN: s_endpgm -define amdgpu_ps void @main(i32 %in1) local_unnamed_addr { +define amdgpu_ps void @main(i32 %in1, i1 %c0) local_unnamed_addr { .entry: - br i1 undef, label %bb12, label %bb + br i1 %c0, label %bb12, label %bb bb: %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> undef, i32 %in1, i32 3 diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll --- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -384,9 +384,9 @@ ; GCN: {{^}}; %bb.0: ; GCN-NEXT: s_load_dwordx2 ; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}} -define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, ptr addrspace(1) %out) { +define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, ptr addrspace(1) %out, i1 %c0) { entry: - br i1 undef, label %else, label %if + br i1 %c0, label %else, label %if if: br label %done diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s -define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; SI-LABEL: vec_8xi16_extract_4xi16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -114,7 +114,7 @@ ; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4 ; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i16>, ptr addrspace(1) %p0 @@ -132,7 +132,7 @@ ret <4 x i16> %r2 } -define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; SI-LABEL: vec_8xi16_extract_4xi16_2: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -246,7 +246,7 @@ ; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i16>, ptr addrspace(1) %p0 @@ -264,7 +264,7 @@ ret <4 x i16> %r2 } -define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; SI-LABEL: vec_8xf16_extract_4xf16: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -383,7 +383,7 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: v_pack_b32_f16 v1, v5, v6 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <8 x half>, ptr addrspace(1) %p0 @@ -401,7 +401,7 @@ ret <4 x half> %r2 } -define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; ; SI-LABEL: vec_16xi16_extract_4xi16: ; SI: ; %bb.0: @@ -552,7 +552,7 @@ ; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <16 x i16>, ptr addrspace(1) %p0 @@ -570,7 +570,7 @@ ret <4 x i16> %r2 } -define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; ; SI-LABEL: vec_16xi16_extract_4xi16_2: ; SI: ; %bb.0: @@ -723,7 +723,7 @@ ; GFX9-NEXT: v_perm_b32 v0, v0, v3, s4 ; GFX9-NEXT: v_perm_b32 v1, v2, v1, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <16 x i16>, ptr addrspace(1) %p0 @@ -741,7 +741,7 @@ ret <4 x i16> %r2 } -define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { +define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { ; ; SI-LABEL: vec_16xf16_extract_4xf16: ; SI: ; %bb.0: @@ -899,7 +899,7 @@ ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: v_pack_b32_f16 v1, v5, v6 ; GFX9-NEXT: s_setpc_b64 s[30:31] - br i1 undef, label %T, label %F + br i1 %c0, label %T, label %F T: %t = load volatile <16 x half>, ptr addrspace(1) %p0 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll @@ -20,8 +20,8 @@ ; GCN: v_bfe_i32 ; GCN: v_bfe_i32 -define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x i16> @extract_2xi16(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i16>, ptr addrspace(1) %p0 @@ -41,8 +41,8 @@ ; GCN-LABEL: extract_2xi64 ; GCN-COUNT-2: v_cndmask_b32 -define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x i64> @extract_2xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i64>, ptr addrspace(1) %p0 @@ -62,8 +62,8 @@ ; GCN-LABEL: extract_4xi64 ; GCN-COUNT-4: v_cndmask_b32 -define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <4 x i64> @extract_4xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x i64>, ptr addrspace(1) %p0 @@ -83,8 +83,8 @@ ; GCN-LABEL: extract_8xi64 ; GCN-COUNT-8: v_cndmask_b32 -define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <8 x i64> @extract_8xi64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <16 x i64>, ptr addrspace(1) %p0 @@ -104,8 +104,8 @@ ; GCN-LABEL: extract_2xf64 ; GCN-COUNT-2: v_cndmask_b32 -define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <2 x double> @extract_2xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x double>, ptr addrspace(1) %p0 @@ -125,8 +125,8 @@ ; GCN-LABEL: extract_4xf64 ; GCN-COUNT-4: v_cndmask_b32 -define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <4 x double> @extract_4xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <8 x double>, ptr addrspace(1) %p0 @@ -146,8 +146,8 @@ ; GCN-LABEL: extract_8xf64 ; GCN-COUNT-8: v_cndmask_b32 -define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1) { - br i1 undef, label %T, label %F +define <8 x double> @extract_8xf64(ptr addrspace(1) %p0, ptr addrspace(1) %p1, i1 %c0) { + br i1 %c0, label %T, label %F T: %t = load volatile <16 x double>, ptr addrspace(1) %p0 diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll --- a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -5,9 +5,9 @@ ; SI-LABEL: {{^}}br_implicit_def: ; SI: %bb.0: ; SI-NEXT: s_cbranch_scc1 -define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg) #0 { +define amdgpu_kernel void @br_implicit_def(ptr addrspace(1) %out, i32 %arg, i1 %c0) #0 { bb: - br i1 undef, label %bb1, label %bb2 + br i1 %c0, label %bb1, label %bb2 bb1: store volatile i32 123, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll --- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -42,9 +42,9 @@ ; SI-LABEL: {{^}}vcopy_i1_undef ; SI: v_cndmask_b32_e64 ; SI: v_cndmask_b32_e64 -define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p) { +define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %x = load <2 x float>, ptr addrspace(1) %p diff --git a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll --- a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll +++ b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll @@ -7,9 +7,9 @@ ; CHECK-NOT: COPY [[IMPDEF0]] ; CHECK-NOT: COPY [[IMPDEF1]] ; CHECK: .false: -define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p) { +define <2 x float> @vcopy_i1_undef(ptr addrspace(1) %p, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %x = load <2 x float>, ptr addrspace(1) %p diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -81,7 +81,7 @@ ret void } -define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) { +define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out, i1 %c0) { ; SI-LABEL: infinite_loops: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -122,7 +122,7 @@ ; SI-NEXT: s_endpgm ; IR-LABEL: @infinite_loops( ; IR-NEXT: entry: -; IR-NEXT: br i1 undef, label [[LOOP1:%.*]], label [[LOOP2:%.*]] +; IR-NEXT: br i1 [[%c0:%.*]], label [[LOOP1:%.*]], label [[LOOP2:%.*]] ; IR: loop1: ; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 ; IR-NEXT: br i1 true, label [[LOOP1]], label [[DUMMYRETURNBLOCK:%.*]] @@ -133,7 +133,7 @@ ; IR-NEXT: ret void ; entry: - br i1 undef, label %loop1, label %loop2 + br i1 %c0, label %loop1, label %loop2 loop1: store volatile i32 999, ptr addrspace(1) %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -299,9 +299,9 @@ ; Check aggregate types are handled properly. ; CHECK-LABEL: mad_u64 ; CHECK: v_mad_u64_u32 -define void @mad_u64(i32 %x) { +define void @mad_u64(i32 %x, i1 %c0) { entry: - br i1 undef, label %exit, label %false + br i1 %c0, label %exit, label %false false: %s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0) diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll --- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll @@ -26,7 +26,7 @@ declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 ; Function Attrs: norecurse -define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { +define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221, i1 %c0, i1 %c1, i1 %c2) #2 { ; GCN-LABEL: {{^}}svm_node_closure_bsdf: ; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, @@ -39,7 +39,7 @@ entry: %8 = extractelement <4 x i32> %node, i64 0 %cmp.i.not = icmp eq i32 undef, 0 - br i1 undef, label %common.ret.critedge, label %cond.true + br i1 %c0, label %common.ret.critedge, label %cond.true cond.true: ; preds = %entry %9 = load float, ptr null, align 4 @@ -122,10 +122,10 @@ br label %if.end627.sink.split if.else568: ; preds = %if.then413 - br i1 undef, label %bsdf_alloc.exit2214, label %if.then.i2198 + br i1 %c1, label %bsdf_alloc.exit2214, label %if.then.i2198 if.then.i2198: ; preds = %if.else568 - br i1 undef, label %closure_alloc.exit.i2210, label %if.end.i.i2207 + br i1 %c2, label %closure_alloc.exit.i2210, label %if.end.i.i2207 if.end.i.i2207: ; preds = %if.then.i2198 %arrayidx.i.i22028 = getelementptr inbounds %struct.ShaderData, ptr addrspace(1) %sd, i64 0, i32 30, i64 undef diff --git a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll --- a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll @@ -4,10 +4,10 @@ @gv = external unnamed_addr addrspace(4) constant [239 x i32], align 4 -define amdgpu_kernel void @opencv_cvtfloat_crash(ptr addrspace(1) %out, i32 %x) nounwind { +define amdgpu_kernel void @opencv_cvtfloat_crash(ptr addrspace(1) %out, i32 %x, i1 %c0) nounwind { %val = load i32, ptr addrspace(4) getelementptr ([239 x i32], ptr addrspace(4) @gv, i64 0, i64 239), align 4 %mul12 = mul nsw i32 %val, 7 - br i1 undef, label %exit, label %bb + br i1 %c0, label %exit, label %bb bb: %cmp = icmp slt i32 %x, 0 diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -4,7 +4,7 @@ @_RSENC_gDcd_______________________________ = external protected addrspace(1) externally_initialized global [4096 x i8], align 16 -define protected amdgpu_kernel void @_RSENC_PRInit__________________________________() local_unnamed_addr #0 { +define protected amdgpu_kernel void @_RSENC_PRInit__________________________________(i1 %c0) local_unnamed_addr #0 { entry: %runtimeVersionCopy = alloca [128 x i8], align 16, addrspace(5) %licenseVersionCopy = alloca [128 x i8], align 16, addrspace(5) @@ -18,7 +18,7 @@ br i1 %cmp13, label %cleanup.cont, label %if.end15 if.end15: ; preds = %if.end - br i1 undef, label %cleanup.cont, label %lor.lhs.false17 + br i1 %c0, label %cleanup.cont, label %lor.lhs.false17 lor.lhs.false17: ; preds = %if.end15 br label %while.cond.i diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll --- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll @@ -78,12 +78,12 @@ ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call_multi_bb: ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg, i1 %c0) #0 { bb1: %in.1 = load <32 x float>, ptr addrspace(1) %arg %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) store <32 x float> %mai.1, ptr addrspace(1) %arg - br i1 undef, label %bb2, label %bb3 + br i1 %c0, label %bb2, label %bb3 br label %bb2 bb2: diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll --- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll @@ -55,11 +55,11 @@ ; GCN: s_load_dword s ; GCN-NOT: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg) { +define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %c0) { ; CHECK-LABEL: @memory_phi_no_clobber( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 +; CHECK-NEXT: br i1 [[%c0:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0 @@ -76,7 +76,7 @@ ; bb: %i = load i32, ptr addrspace(1) %arg, align 4 - br i1 undef, label %if.then, label %if.else + br i1 %c0, label %if.then, label %if.else if.then: tail call void @llvm.amdgcn.s.barrier() @@ -101,11 +101,11 @@ ; GCN: global_store_dword ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg) { +define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %c0) { ; CHECK-LABEL: @memory_phi_clobber1( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 +; CHECK-NEXT: br i1 [[%c1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3 ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4 @@ -123,7 +123,7 @@ ; bb: %i = load i32, ptr addrspace(1) %arg, align 4 - br i1 undef, label %if.then, label %if.else + br i1 %c0, label %if.then, label %if.else if.then: %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 3 @@ -149,11 +149,11 @@ ; GCN: s_barrier ; GCN: global_load_dword ; GCN: global_store_dword -define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg) { +define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %c0) { ; CHECK-LABEL: @memory_phi_clobber2( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0 -; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 +; CHECK-NEXT: br i1 [[%c2:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0 ; CHECK: if.then: ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0 @@ -171,7 +171,7 @@ ; bb: %i = load i32, ptr addrspace(1) %arg, align 4 - br i1 undef, label %if.then, label %if.else + br i1 %c0, label %if.then, label %if.else if.then: tail call void @llvm.amdgcn.s.barrier() diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll @@ -13,10 +13,10 @@ ; CHECK: endif: ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ %arrayidx1, %else ] ; CHECK: store i32 0, ptr addrspace(3) %phi.ptr, align 4 -define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %else + br i1 %c0, label %if, label %else if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -34,10 +34,10 @@ ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0( ; CHECK: %phi.ptr = phi ptr addrspace(3) [ %arrayidx0, %if ], [ null, %entry ] -define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %endif + br i1 %c0, label %if, label %endif if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -51,10 +51,10 @@ ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1( ; CHECK: %phi.ptr = phi ptr addrspace(3) [ null, %entry ], [ %arrayidx0, %if ] -define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %endif + br i1 %c0, label %if, label %endif if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a @@ -97,10 +97,10 @@ ; CHECK: endif: ; CHECK: %phi.ptr = phi ptr addrspace(5) [ %arrayidx0, %if ], [ %arrayidx1, %else ] ; CHECK: store i32 0, ptr addrspace(5) %phi.ptr, align 4 -define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 { +define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b, i1 %c0) #0 { entry: %alloca = alloca [64 x i32], align 4, addrspace(5) - br i1 undef, label %if, label %else + br i1 %c0, label %if, label %else if: %arrayidx0 = getelementptr inbounds [64 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -77,13 +77,13 @@ ret void } -define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 { +define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c, i1 %c0) #0 { entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %ptr0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a %ptr1 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %b store i32 0, ptr addrspace(5) %ptr0 - br i1 undef, label %bb1, label %bb2 + br i1 %c0, label %bb1, label %bb2 bb1: %ptr2 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %c diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll --- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll @@ -6,7 +6,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 -define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3) #1 { +define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 { bb: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %cmp0 = icmp eq i32 %id.x, 0 @@ -18,7 +18,7 @@ bb4: ; preds = %bb6, %bb %tmp5 = phi <2 x i32> [ zeroinitializer, %bb ], [ %tmp13, %bb6 ] - br i1 undef, label %bb15, label %bb16 + br i1 %c0, label %bb15, label %bb16 bb6: ; preds = %bb6, %bb3 %tmp7 = phi <2 x i32> [ zeroinitializer, %bb3 ], [ %tmp13, %bb6 ] diff --git a/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll b/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll --- a/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll @@ -3,23 +3,23 @@ ; definition on every path (there should at least be IMPLICIT_DEF instructions). target triple = "amdgcn--" -define amdgpu_kernel void @func() { +define amdgpu_kernel void @func(i1 %c0, i1 %c1, i1 %c2) { B0: - br i1 undef, label %B1, label %B2 + br i1 %c0, label %B1, label %B2 B1: br label %B2 B2: %v0 = phi <4 x float> [ zeroinitializer, %B1 ], [ , %B0 ] - br i1 undef, label %B20.1, label %B20.2 + br i1 %c1, label %B20.1, label %B20.2 B20.1: br label %B20.2 B20.2: %v2 = phi <4 x float> [ zeroinitializer, %B20.1 ], [ %v0, %B2 ] - br i1 undef, label %B30.1, label %B30.2 + br i1 %c2, label %B30.1, label %B30.2 B30.1: %sub = fsub <4 x float> %v2, undef diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -9,7 +9,7 @@ ; GCN: s_cbranch_scc1 ; GCN-NOT: s_endpgm ; GCN: .Lfunc_end0 -define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 { +define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i1 %c0) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br label %bb1 @@ -18,7 +18,7 @@ %tmp2 = sext i32 %tmp to i64 %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2 %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16 - br i1 undef, label %bb5, label %bb3 + br i1 %c0, label %bb5, label %bb3 bb3: ; preds = %bb1 %tmp6 = extractelement <4 x float> %tmp4, i32 2 @@ -75,7 +75,7 @@ ; GCN: s_cbranch_scc1 ; GCN: s_endpgm ; GCN: .Lfunc_end -define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp) #0 { +define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp, i1 %c0) #0 { bb: br label %bb1 @@ -83,7 +83,7 @@ %tmp2 = sext i32 %tmp to i64 %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2 %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16 - br i1 undef, label %bb5, label %bb3 + br i1 %c0, label %bb5, label %bb3 bb3: ; preds = %bb1 %tmp6 = extractelement <4 x float> %tmp4, i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll @@ -11,7 +11,7 @@ ; GCN: s_and_saveexec_b64 ; GCN-NOT: s_endpgm ; GCN: .Lfunc_end0 -define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg) #0 { +define amdgpu_kernel void @annotate_unreachable(ptr addrspace(1) noalias nocapture readonly %arg, i1 %c0) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br label %bb1 @@ -20,7 +20,7 @@ %tmp2 = sext i32 %tmp to i64 %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2 %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16 - br i1 undef, label %bb3, label %bb5 ; label order reversed + br i1 %c0, label %bb3, label %bb5 ; label order reversed bb3: ; preds = %bb1 %tmp6 = extractelement <4 x float> %tmp4, i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll --- a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll @@ -7,7 +7,7 @@ ; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]] ; SI-NOT: v_readlane_b32 [[SAVED]] -define amdgpu_ps void @main() #0 { +define amdgpu_ps void @main(i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10, i1 %c11) #0 { main_body: %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 16, i32 0) %tmp1 = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 32, i32 0) @@ -306,7 +306,7 @@ br label %ENDIF2564 ELSE2632: ; preds = %ELSE2596 - br i1 undef, label %ENDIF2594, label %ELSE2650 + br i1 %c0, label %ENDIF2594, label %ELSE2650 ELSE2650: ; preds = %ELSE2632 %tmp226 = fcmp oeq float %temp292.11, %tmp110 @@ -315,7 +315,7 @@ br i1 %tmp228, label %IF2667, label %ELSE2668 IF2667: ; preds = %ELSE2650 - br i1 undef, label %ENDIF2594, label %ELSE2671 + br i1 %c1, label %ENDIF2594, label %ELSE2671 ELSE2668: ; preds = %ELSE2650 %tmp229 = fcmp oeq float %temp292.11, %tmp128 @@ -339,13 +339,13 @@ br i1 %tmp237, label %ENDIF2594, label %ELSE2740 ELSE2740: ; preds = %ELSE2704 - br i1 undef, label %IF2757, label %ELSE2758 + br i1 %c2, label %IF2757, label %ELSE2758 IF2757: ; preds = %ELSE2740 - br i1 undef, label %ENDIF2594, label %ELSE2761 + br i1 %c3, label %ENDIF2594, label %ELSE2761 ELSE2758: ; preds = %ELSE2740 - br i1 undef, label %IF2775, label %ENDIF2594 + br i1 %c4, label %IF2775, label %ENDIF2594 ELSE2761: ; preds = %IF2757 br label %ENDIF2594 @@ -355,10 +355,10 @@ br i1 %tmp238, label %ENDIF2594, label %ELSE2779 ELSE2779: ; preds = %IF2775 - br i1 undef, label %ENDIF2594, label %ELSE2782 + br i1 %c5, label %ENDIF2594, label %ELSE2782 ELSE2782: ; preds = %ELSE2779 - br i1 undef, label %ENDIF2594, label %ELSE2785 + br i1 %c6, label %ENDIF2594, label %ELSE2785 ELSE2785: ; preds = %ELSE2782 %tmp239 = fcmp olt float undef, 0.000000e+00 @@ -399,7 +399,7 @@ br label %LOOP ELSE2800: ; preds = %ELSE2797 - br i1 undef, label %ENDIF2795, label %ELSE2803 + br i1 %c7, label %ENDIF2795, label %ELSE2803 ELSE2803: ; preds = %ELSE2800 %tmp264 = fsub float %tmp20, undef @@ -451,16 +451,16 @@ br i1 %tmp306, label %ENDIF2795, label %ELSE2809 ELSE2809: ; preds = %ELSE2806 - br i1 undef, label %ENDIF2795, label %ELSE2812 + br i1 %c8, label %ENDIF2795, label %ELSE2812 ELSE2812: ; preds = %ELSE2809 - br i1 undef, label %ENDIF2795, label %ELSE2815 + br i1 %c9, label %ENDIF2795, label %ELSE2815 ELSE2815: ; preds = %ELSE2812 - br i1 undef, label %ENDIF2795, label %ELSE2818 + br i1 %c10, label %ENDIF2795, label %ELSE2818 ELSE2818: ; preds = %ELSE2815 - br i1 undef, label %ENDIF2795, label %ELSE2821 + br i1 %c11, label %ENDIF2795, label %ELSE2821 ELSE2821: ; preds = %ELSE2818 %tmp307 = fsub float %tmp56, undef diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll --- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -19,7 +19,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #0 ; CHECK: s_endpgm -define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -31,7 +31,7 @@ br label %bb12 bb11: ; preds = %bb30 - br i1 undef, label %bb37, label %bb38 + br i1 %c0, label %bb37, label %bb38 bb12: ; preds = %bb30, %bb br i1 false, label %.preheader, label %.loopexit145 @@ -43,7 +43,7 @@ %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] %tmp15 = add nsw i32 %tmp14, -3 %tmp16 = mul i32 %tmp14, 21 - br i1 undef, label %bb17, label %.loopexit + br i1 %c1, label %bb17, label %.loopexit bb17: ; preds = %bb13 %tmp18 = mul i32 %tmp15, 224 @@ -52,7 +52,7 @@ .loopexit: ; preds = %bb21, %bb13 %tmp20 = add nuw nsw i32 %tmp14, 16 - br i1 undef, label %bb13, label %bb26 + br i1 %c2, label %bb13, label %bb26 bb21: ; preds = %bb21, %bb17 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] @@ -60,7 +60,7 @@ %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23 store float undef, ptr addrspace(3) %tmp24, align 4 %tmp25 = add nuw i32 %tmp22, 8 - br i1 undef, label %bb21, label %.loopexit + br i1 %c3, label %bb21, label %.loopexit bb26: ; preds = %.loopexit br label %bb31 @@ -72,7 +72,7 @@ br i1 %tmp29, label %.preheader, label %.loopexit145 bb30: ; preds = %bb31 - br i1 undef, label %bb11, label %bb12 + br i1 %c4, label %bb11, label %bb12 bb31: ; preds = %bb31, %bb26 %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] @@ -80,7 +80,7 @@ %tmp34 = load float, ptr addrspace(3) %tmp33, align 4 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) - br i1 undef, label %bb30, label %bb31 + br i1 %c5, label %bb30, label %bb31 bb37: ; preds = %bb11 br label %bb38 diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll --- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll +++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll @@ -9,10 +9,10 @@ ; GCN: image_sample ; GCN: exp null -define amdgpu_ps float @sinking_img_sample() { +define amdgpu_ps float @sinking_img_sample(i1 %c0) { main_body: %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) - br i1 undef, label %endif1, label %if1 + br i1 %c0, label %endif1, label %if1 if1: ; preds = %main_body call void @llvm.amdgcn.kill(i1 false) #4 diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1137,7 +1137,7 @@ } ; bug 28550 -define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { +define amdgpu_ps void @phi_use_def_before_kill(float inreg %x, i1 %c0) #0 { ; SI-LABEL: phi_use_def_before_kill: ; SI: ; %bb.0: ; %bb ; SI-NEXT: v_add_f32_e64 v1, s0, 1.0 @@ -1269,7 +1269,7 @@ %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 %cmp.tmp2 = fcmp olt float %tmp2, 0.0 call void @llvm.amdgcn.kill(i1 %cmp.tmp2) - br i1 undef, label %phibb, label %bb8 + br i1 %c0, label %phibb, label %bb8 phibb: %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -686,9 +686,9 @@ ; GCN: buffer_load_dword v0, v0, ; GCN-NEXT: s_waitcnt ; GCN-NEXT: ; return to shader part epilog -define amdgpu_cs float @arg_divergence(i32 inreg %unused, <3 x i32> %arg4) #0 { +define amdgpu_cs float @arg_divergence(i32 inreg %unused, <3 x i32> %arg4, i1 %c0) #0 { main_body: - br i1 undef, label %if1, label %endif1 + br i1 %c0, label %if1, label %endif1 if1: ; preds = %main_body store i32 0, ptr addrspace(3) undef, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/split-smrd.ll b/llvm/test/CodeGen/AMDGPU/split-smrd.ll --- a/llvm/test/CodeGen/AMDGPU/split-smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/split-smrd.ll @@ -6,11 +6,11 @@ ; GCN-LABEL: {{^}}split_smrd_add_worklist: ; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 -define amdgpu_ps void @split_smrd_add_worklist(ptr addrspace(4) inreg %arg) #0 { +define amdgpu_ps void @split_smrd_add_worklist(ptr addrspace(4) inreg %arg, i1 %c0) #0 { bb: %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 96, i32 0) %tmp1 = bitcast float %tmp to i32 - br i1 undef, label %bb2, label %bb3 + br i1 %c0, label %bb2, label %bb3 bb2: ; preds = %bb unreachable diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll --- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll @@ -2,19 +2,19 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL:{{^}}row_filter_C1_D0: -define amdgpu_kernel void @row_filter_C1_D0() #0 { +define amdgpu_kernel void @row_filter_C1_D0(i1 %c0, i1 %c1, i1 %c2) #0 { entry: - br i1 undef, label %for.inc.1, label %do.body.preheader + br i1 %c0, label %for.inc.1, label %do.body.preheader do.body.preheader: ; preds = %entry %tmp = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1 - br i1 undef, label %do.body56.1, label %do.body90 + br i1 %c1, label %do.body56.1, label %do.body90 do.body90: ; preds = %do.body56.2, %do.body56.1, %do.body.preheader %tmp1 = phi <4 x i32> [ %tmp6, %do.body56.2 ], [ %tmp5, %do.body56.1 ], [ %tmp, %do.body.preheader ] %tmp2 = insertelement <4 x i32> %tmp1, i32 undef, i32 2 %tmp3 = insertelement <4 x i32> %tmp2, i32 undef, i32 3 - br i1 undef, label %do.body124.1, label %do.body.1562.preheader + br i1 %c2, label %do.body124.1, label %do.body.1562.preheader do.body.1562.preheader: ; preds = %do.body124.1, %do.body90 %storemerge = phi <4 x i32> [ %tmp3, %do.body90 ], [ %tmp7, %do.body124.1 ] @@ -42,23 +42,23 @@ ; GCN-LABEL: {{^}}foo: ; GCN: s_endpgm -define amdgpu_ps void @foo() #0 { +define amdgpu_ps void @foo(i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6) #0 { bb: - br i1 undef, label %bb2, label %bb1 + br i1 %c0, label %bb2, label %bb1 bb1: ; preds = %bb - br i1 undef, label %bb4, label %bb6 + br i1 %c1, label %bb4, label %bb6 bb2: ; preds = %bb4, %bb %tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ] - br i1 undef, label %bb9, label %bb13 + br i1 %c2, label %bb9, label %bb13 bb4: ; preds = %bb7, %bb6, %bb1 %tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ] br label %bb2 bb6: ; preds = %bb1 - br i1 undef, label %bb7, label %bb4 + br i1 %c3, label %bb7, label %bb4 bb7: ; preds = %bb6 %tmp8 = fmul float undef, undef @@ -71,7 +71,7 @@ br label %bb14 bb13: ; preds = %bb2 - br i1 undef, label %bb23, label %bb24 + br i1 %c4, label %bb23, label %bb24 bb14: ; preds = %bb27, %bb24, %bb9 %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ] @@ -82,11 +82,11 @@ ret void bb23: ; preds = %bb13 - br i1 undef, label %bb24, label %bb26 + br i1 %c5, label %bb24, label %bb26 bb24: ; preds = %bb26, %bb23, %bb13 %tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ] - br i1 undef, label %bb27, label %bb14 + br i1 %c6, label %bb27, label %bb14 bb26: ; preds = %bb23 br label %bb24 diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll --- a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s -define void @test() #1 { +define void @test(i1 %c0) #1 { ; Clean up the unreachable blocks introduced with LowerSwitch pass. ; This test ensures that, in the pass flow, UnreachableBlockElim pass ; follows the LowerSwitch. Otherwise, this testcase will crash @@ -22,7 +22,7 @@ ; GCN: bb.{{[0-9]+}}.UnifiedReturnBlock: entry: %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - br i1 undef, label %entry.true.blk, label %entry.false.blk + br i1 %c0, label %entry.true.blk, label %entry.false.blk entry.true.blk: ; preds = %entry %exit.cmp = icmp ult i32 %idx, 3 diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll --- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -5,7 +5,7 @@ ; We may have subregister live ranges that are undefined on some paths. The ; verifier should not complain about this. -define amdgpu_kernel void @func() #0 { +define amdgpu_kernel void @func(i1 %c0, i1 %c1) #0 { ; CHECK-LABEL: func: ; CHECK: ; %bb.0: ; %B0 ; CHECK-NEXT: s_mov_b32 s0, 0 @@ -18,14 +18,14 @@ ; CHECK-NEXT: ds_write_b32 v0, v0 ; CHECK-NEXT: s_endpgm B0: - br i1 undef, label %B1, label %B2 + br i1 %c0, label %B1, label %B2 B1: br label %B2 B2: %v0 = phi <4 x float> [ zeroinitializer, %B1 ], [ , %B0 ] - br i1 undef, label %B30.1, label %B30.2 + br i1 %c1, label %B30.1, label %B30.2 B30.1: %sub = fsub <4 x float> %v0, undef diff --git a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll --- a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll +++ b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll @@ -5,7 +5,7 @@ ; SI hits an assertion at -O0, evergreen hits a not implemented unreachable. ; COMMON-LABEL: {{^}}branch_true: -define amdgpu_kernel void @branch_true(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 { +define amdgpu_kernel void @branch_true(ptr addrspace(1) nocapture %main, i32 %main_stride, i1 %c0) #0 { entry: br i1 true, label %for.end, label %for.body.lr.ph @@ -27,7 +27,7 @@ %add.ptr3 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 %add.ptr4.sum %4 = load i32, ptr addrspace(1) %add.ptr3, align 4 %add.ptr6 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 undef - br i1 undef, label %for.end, label %for.body + br i1 %c0, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void @@ -36,7 +36,7 @@ ; COMMON-LABEL: {{^}}branch_false: ; SI: s_cbranch_scc1 ; SI: s_endpgm -define amdgpu_kernel void @branch_false(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 { +define amdgpu_kernel void @branch_false(ptr addrspace(1) nocapture %main, i32 %main_stride, i1 %c0) #0 { entry: br i1 false, label %for.end, label %for.body.lr.ph @@ -58,19 +58,19 @@ %add.ptr3 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 %add.ptr4.sum %4 = load i32, ptr addrspace(1) %add.ptr3, align 4 %add.ptr6 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 undef - br i1 undef, label %for.end, label %for.body + br i1 %c0, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void } -; COMMON-LABEL: {{^}}branch_undef: +; COMMON-LABEL: {{^}}branch_[[%c0:%.*]]: ; SI: s_cbranch_scc1 ; SI: s_cbranch_scc1 ; SI: s_endpgm -define amdgpu_kernel void @branch_undef(ptr addrspace(1) nocapture %main, i32 %main_stride) #0 { +define amdgpu_kernel void @branch_undef(ptr addrspace(1) nocapture %main, i32 %main_stride, i1 %c0, i1 %c1) #0 { entry: - br i1 undef, label %for.end, label %for.body.lr.ph + br i1 %c0, label %for.end, label %for.body.lr.ph for.body.lr.ph: ; preds = %entry %add.ptr.sum = shl i32 %main_stride, 1 @@ -90,7 +90,7 @@ %add.ptr3 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 %add.ptr4.sum %4 = load i32, ptr addrspace(1) %add.ptr3, align 4 %add.ptr6 = getelementptr inbounds i8, ptr addrspace(1) %main.addr.011, i32 undef - br i1 undef, label %for.end, label %for.body + br i1 %c1, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -1134,7 +1134,7 @@ ret void } -define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) { +define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out, i1 %c0) { ; SI-LABEL: move_to_valu_vgpr_operand_phi: ; SI: ; %bb.0: ; %bb0 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1187,7 +1187,7 @@ %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ] %tmp1 = add nsw i32 %tmp0, -1 %tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1 - br i1 undef, label %bb2, label %bb3 + br i1 %c0, label %bb2, label %bb3 bb2: ; preds = %bb1 store volatile i32 1, ptr addrspace(3) %tmp2, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll @@ -25,7 +25,7 @@ ; GCN: {{^}}[[LOOP:.L[A-Z0-9_]+]]: ; GCN: s_cbranch_scc1 [[LOOP]] ; GCN: {{^}}[[BB0]]: -define amdgpu_kernel void @fix_sgpr_live_ranges_crash(i32 %arg, i32 %arg1) { +define amdgpu_kernel void @fix_sgpr_live_ranges_crash(i32 %arg, i32 %arg1, i1 %c0) { bb: %cnd = trunc i32 %arg to i1 br i1 %cnd, label %bb2, label %bb5 @@ -45,7 +45,7 @@ br i1 %tmp10, label %bb11, label %bb12 bb11: ; preds = %bb11, %bb5 - br i1 undef, label %bb11, label %bb12 + br i1 %c0, label %bb11, label %bb12 bb12: ; preds = %bb11, %bb5 ret void diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -6,11 +6,11 @@ ; GCN-NOT: v_accvgpr ; GCN-COUNT-32: v_mov_b32_e32 ; GCN-NOT: v_accvgpr -define amdgpu_kernel void @test_v1024() { +define amdgpu_kernel void @test_v1024(i1 %c0) { entry: %alloca = alloca <32 x i32>, align 16, addrspace(5) call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 128, i1 false) - br i1 undef, label %if.then.i.i, label %if.else.i + br i1 %c0, label %if.then.i.i, label %if.else.i if.then.i.i: ; preds = %entry call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 16 %alloca, ptr addrspace(5) align 4 undef, i64 128, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -460,7 +460,7 @@ ret float %r2 } -define amdgpu_kernel void @livevariables_update_missed_block(ptr addrspace(1) %src1) { +define amdgpu_kernel void @livevariables_update_missed_block(ptr addrspace(1) %src1, i1 %c0) { ; SI-LABEL: name: livevariables_update_missed_block ; SI: bb.0.entry: ; SI-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) @@ -541,7 +541,7 @@ ret void if.then9: ; preds = %entry - br i1 undef, label %sw.bb18, label %sw.bb + br i1 %c0, label %sw.bb18, label %sw.bb sw.bb: ; preds = %if.then9 %i17 = load i8, ptr addrspace(1) null, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -891,7 +891,7 @@ ; GCN-LABEL: {{^}}test_branch_true: ; GFX1032: s_mov_b32 vcc_lo, exec_lo ; GFX1064: s_mov_b64 vcc, exec -define amdgpu_kernel void @test_branch_true() #2 { +define amdgpu_kernel void @test_branch_true(i1 %c0) #2 { entry: br i1 true, label %for.end, label %for.body.lr.ph @@ -899,7 +899,7 @@ br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph - br i1 undef, label %for.end, label %for.body + br i1 %c0, label %for.end, label %for.body for.end: ; preds = %for.body, %entry ret void