Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -800,8 +800,7 @@ Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, APInt DemandedElts, - int DmaskIdx = -1, - int TFCIdx = -1); + int DmaskIdx = -1); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth = 0); Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -966,25 +966,16 @@ } /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics. +/// +/// Note: This only supports non-TFE/LWE image intrinsic calls; those have +/// struct returns. Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, APInt DemandedElts, - int DMaskIdx, - int TFCIdx) { + int DMaskIdx) { unsigned VWidth = II->getType()->getVectorNumElements(); if (VWidth == 1) return nullptr; - // Need to change to new instruction format - bool TFELWEEnabled = false; - if (TFCIdx > 0) { - if (ConstantInt *TFC = dyn_cast(II->getArgOperand(TFCIdx))) - TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE - || TFC->getZExtValue() & 0x2; // LWE - } - - if (TFELWEEnabled) - return nullptr; // TFE not yet supported - ConstantInt *NewDMask = nullptr; if (DMaskIdx < 0) { @@ -1648,9 +1639,15 @@ case Intrinsic::amdgcn_struct_buffer_load_format: return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); default: { - if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) - return simplifyAMDGCNMemoryIntrinsicDemanded( - II, DemandedElts, 0, II->getNumArgOperands() - 2); + if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) { + LLVM_DEBUG( + Value *TFC = II->getArgOperand(II->getNumOperands() - 2); + assert(!isa(TFC) || + dyn_cast(TFC)->getZExtValue() == 0); + ); + + return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0); + } break; } Index: test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll =================================================================== --- test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -2404,6 +2404,21 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) +; -------------------------------------------------------------------- +; TFE / LWE +; -------------------------------------------------------------------- + +; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32( +; CHECK-NEXT: %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) +define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 { + %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) + %rgba = extractvalue { <4 x float>, i32 } %data, 0 + %elt0 = extractelement <4 x float> %rgba, i32 0 + ret float %elt0 +} + +declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32, i32, <8 x i32>, i32, i32) #1 + attributes #0 = { nounwind } attributes #1 = { nounwind readonly }