Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2010,6 +2010,11 @@ return performOrCombine(N, DCI); case AMDGPUISD::FP_CLASS: return performClassCombine(N, DCI); + case AMDGPUISD::TBUFFER_STORE_FORMAT: + // Delete undef stores + if (N->getOperand(2).getOpcode() == ISD::UNDEF) + return N->getOperand(0); + break; } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } Index: test/CodeGen/AMDGPU/tbuffer-dead-store.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/tbuffer-dead-store.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN: {{^}}main: +; GCN: tbuffer_store_format_x v{{[0-9]+}}, 0x0 +; GCN-NOT: tbuffer_store_format +; GCN: s_endpgm + +define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { +main_body: + %12 = getelementptr [9 x <16 x i8>], [9 x <16 x i8>] addrspace(2)* %0, i64 0, i64 0 + %13 = load <16 x i8>, <16 x i8> addrspace(2)* %12, align 16, !tbaa !0 + %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 + %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !tbaa !0 + %16 = add i32 %5, %8 + %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %15, i32 0, i32 %16) + %18 = extractelement <4 x float> %17, i32 0 + %19 = bitcast float %18 to i32 + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %13, i32 %19, i32 1, i32 undef, i32 %7, i32 0, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %13, i32 undef, i32 1, i32 undef, i32 %7, i32 4, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %13, i32 undef, i32 1, i32 undef, i32 %7, i32 8, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %13, i32 undef, i32 1, i32 undef, i32 %7, i32 12, i32 4, i32 4, i32 0, i32 0, i32 1, i32 1, i32 0) + ret void +} + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 + +declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } +attributes #1 = { nounwind readnone } + +!0 = !{!"const", null, i32 1}