diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -286,8 +286,15 @@ } Intervals.push_back(LI); OrigRegs.push_back(VRM->getPhys(Reg)); - MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); - MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); + if (LI->empty()) { + // The address input is undef, so it doesn't contribute to the relevant + // range. Seed a reasonable index range if required. + if (I == 0) + MinInd = MaxInd = LIS->getInstructionIndex(*MI); + continue; + } + MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); + MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); } if (Intervals.empty()) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll @@ -80,6 +80,45 @@ ret <2 x float> %r } +; Test that undef inputs with NSA are handled safely; these tests used to crash. + +; GCN-LABEL: {{^}}sample_undef_undef_undef_undef: +; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) { + %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + ret float %r +} + +; GCN-LABEL: {{^}}sample_undef_undef_undef_def: +; NONSA: v_mov_b32_e32 v3, v0 +; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) { + %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + ret float %r +} + +; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd: +; GCN: v_rndne_f32_e32 v3, v0 +; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) { + %layer_rnd = call float @llvm.rint.f32(float %layer) + %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + ret float %r +} + +; GCN-LABEL: {{^}}sample_def_undef_undef_undef: +; GCN: v_add_f32_e32 v0, 1.0, v0 +; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) { + ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands + ; comes directly from a function argument (via COPY). To test that NSA can be + ; eliminated in the presence of undef, just add an arbitrary intermediate + ; computation. + %c0 = fadd float %z0, 1.0 + %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) + ret float %r +} declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 @@ -88,4 +127,8 @@ declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare float @llvm.rint.f32(float) #2 +declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone speculatable willreturn }