Index: llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
+
+; This used to crash due to an unallocatable bundle being produced to
+; hint register allocation to form soft clauses.
+
+define amdgpu_vs void @main(<8 x i32> addrspace(6)* inreg noalias align 32 dereferenceable(18446744073709551615) %arg, <4 x i32> inreg %arg1, i32 %arg2) #0 {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_mov_b32 s20, s1
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    s_mov_b32 s21, s2
+; CHECK-NEXT:    s_load_dwordx8 s[52:59], s[0:1], 0x4c0
+; CHECK-NEXT:    s_mov_b32 s2, 0.5
+; CHECK-NEXT:    s_mov_b32 s22, s3
+; CHECK-NEXT:    s_mov_b32 s3, s2
+; CHECK-NEXT:    s_clause 0x1
+; CHECK-NEXT:    s_load_dwordx8 s[44:51], s[0:1], 0x440
+; CHECK-NEXT:    s_load_dwordx4 s[24:27], s[0:1], 0x0
+; CHECK-NEXT:    v_mov_b32_e32 v6, s3
+; CHECK-NEXT:    v_mov_b32_e32 v5, s2
+; CHECK-NEXT:    s_movk_i32 s2, 0x4b0
+; CHECK-NEXT:    s_mov_b32 s3, s1
+; CHECK-NEXT:    s_load_dwordx8 s[36:43], s[0:1], 0x480
+; CHECK-NEXT:    s_load_dwordx4 s[28:31], s[2:3], 0x0
+; CHECK-NEXT:    s_movk_i32 s2, 0x4f0
+; CHECK-NEXT:    s_mov_b32 s23, s4
+; CHECK-NEXT:    s_load_dwordx4 s[88:91], s[2:3], 0x0
+; CHECK-NEXT:    s_movk_i32 s2, 0x530
+; CHECK-NEXT:    s_movk_i32 s4, 0x5b0
+; CHECK-NEXT:    s_mov_b32 s5, s1
+; CHECK-NEXT:    s_clause 0x1
+; CHECK-NEXT:    s_load_dwordx8 s[68:75], s[0:1], 0x0
+; CHECK-NEXT:    s_load_dwordx8 s[76:83], s[0:1], 0x5c0
+; CHECK-NEXT:    s_mov_b32 s8, s1
+; CHECK-NEXT:    s_mov_b32 s9, s1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v15, s52, 0
+; CHECK-NEXT:    s_mov_b32 s10, s1
+; CHECK-NEXT:    s_mov_b32 s11, s1
+; CHECK-NEXT:    s_mov_b32 s12, s1
+; CHECK-NEXT:    s_mov_b32 s13, s1
+; CHECK-NEXT:    v_writelane_b32 v15, s53, 1
+; CHECK-NEXT:    s_mov_b32 s14, s1
+; CHECK-NEXT:    s_mov_b32 s15, s1
+; CHECK-NEXT:    s_mov_b32 s16, s1
+; CHECK-NEXT:    s_mov_b32 s17, s1
+; CHECK-NEXT:    v_writelane_b32 v15, s54, 2
+; CHECK-NEXT:    s_mov_b32 s18, s1
+; CHECK-NEXT:    s_mov_b32 s19, s1
+; CHECK-NEXT:    v_writelane_b32 v15, s55, 3
+; CHECK-NEXT:    v_writelane_b32 v15, s56, 4
+; CHECK-NEXT:    v_writelane_b32 v15, s57, 5
+; CHECK-NEXT:    v_writelane_b32 v15, s58, 6
+; CHECK-NEXT:    v_writelane_b32 v15, s59, 7
+; CHECK-NEXT:    s_load_dwordx8 s[52:59], s[0:1], 0x500
+; CHECK-NEXT:    s_load_dwordx4 s[92:95], s[2:3], 0x0
+; CHECK-NEXT:    s_load_dwordx8 s[60:67], s[0:1], 0x540
+; CHECK-NEXT:    s_movk_i32 s0, 0x5f0
+; CHECK-NEXT:    s_movk_i32 s2, 0x570
+; CHECK-NEXT:    s_load_dwordx4 s[96:99], s[4:5], 0x0
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
+; CHECK-NEXT:    s_load_dwordx4 s[84:87], s[2:3], 0x0
+; CHECK-NEXT:    image_sample_lz v7, v[5:6], s[12:19], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    image_sample_lz v8, v[5:6], s[44:51], s[24:27] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    image_sample_lz v9, v[5:6], s[36:43], s[28:31] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    v_readlane_b32 s8, v15, 0
+; CHECK-NEXT:    v_readlane_b32 s9, v15, 1
+; CHECK-NEXT:    v_readlane_b32 s10, v15, 2
+; CHECK-NEXT:    v_readlane_b32 s11, v15, 3
+; CHECK-NEXT:    v_readlane_b32 s12, v15, 4
+; CHECK-NEXT:    v_readlane_b32 s13, v15, 5
+; CHECK-NEXT:    v_readlane_b32 s14, v15, 6
+; CHECK-NEXT:    v_readlane_b32 s15, v15, 7
+; CHECK-NEXT:    s_nop 4
+; CHECK-NEXT:    image_sample_lz v10, v[5:6], s[8:15], s[88:91] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    image_sample_lz v11, v[5:6], s[52:59], s[92:95] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    image_sample_lz v12, v[5:6], s[68:75], s[96:99] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    image_sample_lz v14, v[5:6], s[76:83], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    image_sample_lz v13, v[5:6], s[60:67], s[84:87] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; CHECK-NEXT:    buffer_load_format_xyzw v[1:4], v0, s[20:23], 0 idxen
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    v_max_f32_e32 v0, v7, v8
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    v_max3_f32 v0, v0, v9, v10
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_add_f32_e32 v1, 0xbf6dd2f2, v12
+; CHECK-NEXT:    v_add_f32_e32 v3, 0xbef8d4fe, v14
+; CHECK-NEXT:    v_max3_f32 v0, v0, v11, v13
+; CHECK-NEXT:    v_max3_f32 v0, v0, v1, v3
+; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x3d4ccccd, v0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
+; CHECK-NEXT:    exp pos0 v0, v2, v0, v0 done vm
+; CHECK-NEXT:    s_endpgm
+bb:
+  %i = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %arg1, i32 %arg2, i32 0, i32 0, i32 0) #4
+  %i3 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> undef, <4 x i32> zeroinitializer, i1 false, i32 0, i32 0) #4
+  %i4 = extractelement <4 x float> %i3, i32 0
+  %i5 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 34
+  %i6 = load <8 x i32>, <8 x i32> addrspace(6)* %i5, align 32
+  %i7 = load <4 x i32>, <4 x i32> addrspace(6)* undef, align 16
+  %i8 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i6, <4 x i32> %i7, i1 false, i32 0, i32 0) #4
+  %i9 = extractelement <4 x float> %i8, i32 0
+  %i10 = call float @llvm.maxnum.f32(float %i4, float %i9) #4
+  %i11 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 36
+  %i12 = load <8 x i32>, <8 x i32> addrspace(6)* %i11, align 32
+  %i13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 75
+  %i14 = load <4 x i32>, <4 x i32> addrspace(6)* %i13, align 16
+  %i15 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i12, <4 x i32> %i14, i1 false, i32 0, i32 0) #4
+  %i16 = extractelement <4 x float> %i15, i32 0
+  %i17 = call float @llvm.maxnum.f32(float %i10, float %i16) #4
+  %i18 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 38
+  %i19 = load <8 x i32>, <8 x i32> addrspace(6)* %i18, align 32
+  %i20 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 79
+  %i21 = load <4 x i32>, <4 x i32> addrspace(6)* %i20, align 16
+  %i22 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i19, <4 x i32> %i21, i1 false, i32 0, i32 0) #4
+  %i23 = extractelement <4 x float> %i22, i32 0
+  %i24 = call float @llvm.maxnum.f32(float %i17, float %i23) #4
+  %i25 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 40
+  %i26 = load <8 x i32>, <8 x i32> addrspace(6)* %i25, align 32
+  %i27 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 83
+  %i28 = load <4 x i32>, <4 x i32> addrspace(6)* %i27, align 16
+  %i29 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i26, <4 x i32> %i28, i1 false, i32 0, i32 0) #4
+  %i30 = extractelement <4 x float> %i29, i32 0
+  %i31 = call float @llvm.maxnum.f32(float %i24, float %i30) #4
+  %i32 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 42
+  %i33 = load <8 x i32>, <8 x i32> addrspace(6)* %i32, align 32
+  %i34 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 87
+  %i35 = load <4 x i32>, <4 x i32> addrspace(6)* %i34, align 16
+  %i36 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i33, <4 x i32> %i35, i1 false, i32 0, i32 0) #4
+  %i37 = extractelement <4 x float> %i36, i32 0
+  %i38 = call float @llvm.maxnum.f32(float %i31, float %i37) #4
+  %i39 = load <8 x i32>, <8 x i32> addrspace(6)* undef, align 32
+  %i40 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 91
+  %i41 = load <4 x i32>, <4 x i32> addrspace(6)* %i40, align 16
+  %i42 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i39, <4 x i32> %i41, i1 false, i32 0, i32 0) #4
+  %i43 = extractelement <4 x float> %i42, i32 0
+  %i44 = fadd float %i43, 0xBFEDBA5E40000000
+  %i45 = call float @llvm.maxnum.f32(float %i38, float %i44) #4
+  %i46 = getelementptr inbounds <8 x i32>, <8 x i32> addrspace(6)* %arg, i32 46
+  %i47 = load <8 x i32>, <8 x i32> addrspace(6)* %i46, align 32
+  %i48 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(6)* null, i32 95
+  %i49 = load <4 x i32>, <4 x i32> addrspace(6)* %i48, align 16
+  %i50 = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float 5.000000e-01, float 5.000000e-01, <8 x i32> %i47, <4 x i32> %i49, i1 false, i32 0, i32 0) #4
+  %i51 = extractelement <4 x float> %i50, i32 0
+  %i52 = fadd float %i51, 0xBFDF1A9FC0000000
+  %i53 = call float @llvm.maxnum.f32(float %i45, float %i52) #4
+  %i54 = fcmp olt float %i53, 0x3FA99999A0000000
+  %i55 = select i1 %i54, float 1.000000e+00, float 0.000000e+00
+  %i56 = extractelement <4 x float> %i, i32 1
+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %i55, float %i56, float undef, float undef, i1 true, i1 true) #0
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
+declare float @llvm.fabs.f32(float) #2
+declare float @llvm.maxnum.f32(float, float) #2
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly willreturn }
+attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
+attributes #3 = { inaccessiblememonly nounwind willreturn writeonly }
+attributes #4 = { nounwind readnone }